blob: 1c447ca24a1b5f2b8e6a86509d83514cbb7021b2 [file] [log] [blame] [edit]
// Copyright 2021 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! Implements hibernate suspend functionality.
use std::time::Duration;
use std::time::Instant;
use std::time::UNIX_EPOCH;
use anyhow::Context;
use anyhow::Result;
use libc::reboot;
use libc::RB_AUTOBOOT;
use libc::RB_POWER_OFF;
use log::debug;
use log::error;
use log::info;
use log::warn;
use crate::cookie::set_hibernate_cookie;
use crate::cookie::HibernateCookieValue;
use crate::hiberlog;
use crate::hiberlog::redirect_log;
use crate::hiberlog::replay_logs;
use crate::hiberlog::reset_log;
use crate::hiberlog::HiberlogOut;
use crate::hiberutil::path_to_stateful_block;
use crate::hiberutil::prealloc_mem;
use crate::hiberutil::HibernateError;
use crate::hiberutil::HibernateOptions;
use crate::hiberutil::HibernateStage;
use crate::hiberutil::TimestampFile;
use crate::metrics::log_hibernate_attempt;
use crate::metrics::read_and_send_metrics;
use crate::metrics::MetricsFile;
use crate::metrics::MetricsLogger;
use crate::snapdev::FrozenUserspaceTicket;
use crate::snapdev::SnapshotDevice;
use crate::snapdev::SnapshotMode;
use crate::update_engine::is_update_engine_idle;
use crate::volume::VolumeManager;
/// The SuspendConductor weaves a delicate baton to guide us through the
/// symphony of hibernation.
pub struct SuspendConductor {
options: HibernateOptions,
metrics: MetricsLogger,
volume_manager: VolumeManager,
timestamp_resumed: Option<Duration>,
}
impl SuspendConductor {
/// Create a new SuspendConductor in preparation for imminent hibernation.
pub fn new() -> Result<Self> {
Ok(SuspendConductor {
options: Default::default(),
metrics: MetricsLogger::new()?,
volume_manager: VolumeManager::new()?,
timestamp_resumed: None,
})
}
/// Public entry point that hibernates the system, and returns either upon
/// failure to hibernate or after the system has resumed from a successful
/// hibernation.
pub fn hibernate(&mut self, options: HibernateOptions) -> Result<()> {
self.options = options;
info!("Beginning hibernate");
self.volume_manager.setup_hibermeta_lv(true)?;
if let Err(e) = log_hibernate_attempt() {
warn!("Failed to log hibernate attempt: \n {}", e);
}
self.create_metrics_file()?;
// Don't hibernate if the update engine is up to something, as we would
// not want to hibernate if upon reboot the other slot gets booted.
// While an update is "pending reboot", the update engine might do
// further checks for updates it can apply. So no state except idle is
// safe.
if !is_update_engine_idle()? {
return Err(HibernateError::UpdateEngineBusyError()).context("Update engine is active");
}
// Stop logging to syslog, and divert instead to a file since the
// logging daemon's about to be frozen.
let log_file_path = hiberlog::LogFile::get_path(HibernateStage::Suspend);
let log_file = hiberlog::LogFile::create(log_file_path)?;
redirect_log(HiberlogOut::File(Box::new(log_file)));
debug!("Syncing filesystems");
// This is safe because sync() does not modify memory.
unsafe {
libc::sync();
}
prealloc_mem(&mut self.metrics).context("Failed to preallocate memory for hibernate")?;
let result = self.suspend_system();
self.volume_manager.mount_hibermeta()?;
// Now send any remaining logs and future logs to syslog.
redirect_log(HiberlogOut::Syslog);
// Replay logs first because they happened earlier.
replay_logs(
result.is_ok() && !self.options.dry_run,
!self.options.dry_run,
);
self.record_total_resume_time();
// Read the metrics files and send out the samples.
read_and_send_metrics();
self.volume_manager.unmount_hibermeta()?;
result
}
/// Inner helper function to actually take the snapshot, save it to disk,
/// and shut down. Returns upon a failure to hibernate, or after a
/// successful hibernation has resumed.
fn suspend_system(&mut self) -> Result<()> {
let mut snap_dev = SnapshotDevice::new(SnapshotMode::Read)?;
info!("Freezing userspace");
let frozen_userspace = snap_dev.freeze_userspace()?;
self.metrics.flush()?;
self.metrics.file = None;
redirect_log(HiberlogOut::BufferInMemory);
self.volume_manager.unmount_hibermeta()?;
self.snapshot_and_save(frozen_userspace)
}
/// Snapshot the system, write the result to disk, and power down. Returns
/// upon failure to hibernate, or after a hibernated system has successfully
/// resumed.
fn snapshot_and_save(&mut self, mut frozen_userspace: FrozenUserspaceTicket) -> Result<()> {
let block_path = path_to_stateful_block()?;
let dry_run = self.options.dry_run;
let snap_dev = frozen_userspace.as_mut();
// This is where the suspend path and resume path fork. On success,
// both halves of these conditions execute, just at different times.
if snap_dev.atomic_snapshot()? {
// Suspend path. Everything after this point is invisible to the
// hibernated kernel.
// Briefly remount 'hibermeta' to write logs and metrics.
self.volume_manager.mount_hibermeta()?;
let log_file_path = hiberlog::LogFile::get_path(HibernateStage::Suspend);
let log_file = hiberlog::LogFile::open(log_file_path)?;
redirect_log(HiberlogOut::File(Box::new(log_file)));
let start = Instant::now();
if let Err(e) = snap_dev.transfer_block_device() {
snap_dev.unfreeze_userspace()?;
return Err(e);
}
let io_duration = start.elapsed();
self.create_metrics_file()?;
self.metrics.metrics_send_io_sample(
"WriteHibernateImage",
snap_dev.get_image_size()?,
io_duration,
);
// Close the metrics file before unmounting hibermeta. The metrics will be
// sent on resume.
self.metrics.flush()?;
self.metrics.file = None;
// Set the hibernate cookie so the next boot knows to start in RO mode.
info!("Setting hibernate cookie at {}", block_path);
set_hibernate_cookie(Some(&block_path), HibernateCookieValue::ResumeReady)?;
if dry_run {
info!("Not powering off due to dry run");
} else {
info!("Powering off");
}
redirect_log(HiberlogOut::BufferInMemory);
self.volume_manager.unmount_hibermeta()?;
// Power the thing down.
if !dry_run {
if !self.options.reboot {
Self::power_off()?;
error!("Returned from power off");
} else {
Self::reboot()?;
error!("Returned from reboot");
}
}
} else {
self.timestamp_resumed = Some(UNIX_EPOCH.elapsed().unwrap());
// This is the resume path. First, forcefully reset the logger, which is some
// stale partial state that the suspend path ultimately flushed and closed.
// Keep logs in memory for now.
reset_log();
redirect_log(HiberlogOut::BufferInMemory);
info!("Resumed from hibernate");
}
// Unset the hibernate cookie.
info!("Clearing hibernate cookie at {}", block_path);
set_hibernate_cookie(Some(&block_path), HibernateCookieValue::NoResume)
.context("Failed to clear hibernate cookie")
}
/// Record the total resume time.
fn record_total_resume_time(&self) {
if self.timestamp_resumed.is_none() {
return;
}
let res = TimestampFile::read_timestamp("resume_start.ts");
if let Err(e) = res {
warn!("Failed to read resume start timestap: {e}");
return;
}
let resume_start = res.unwrap();
let resume_time = self.timestamp_resumed.unwrap() - resume_start;
debug!(
"Resume from hibernate took {}.{}.s",
resume_time.as_secs(),
resume_time.subsec_millis()
);
// TODO: log metric
}
/// Create the file for the suspend metrics and configure the metrics logger
/// to use it.
fn create_metrics_file(&mut self) -> Result<()> {
let metrics_file_path = MetricsFile::get_path(HibernateStage::Suspend);
let metrics_file = MetricsFile::create(metrics_file_path)?;
self.metrics.file = Some(metrics_file);
Ok(())
}
/// Utility function to power the system down immediately.
fn power_off() -> Result<()> {
// This is safe because the system either ceases to exist, or does
// nothing to memory.
unsafe {
// On success, we shouldn't be executing, so the return code can be
// ignored because we already know it's a failure.
let _ = reboot(RB_POWER_OFF);
Err(HibernateError::ShutdownError(
libchromeos::sys::Error::last(),
))
.context("Failed to shut down")
}
}
/// Utility function to reboot the system immediately.
fn reboot() -> Result<()> {
// This is safe because the system either ceases to exist, or does
// nothing to memory.
unsafe {
// On success, we shouldn't be executing, so the return code can be
// ignored because we already know it's a failure.
let _ = reboot(RB_AUTOBOOT);
Err(HibernateError::ShutdownError(
libchromeos::sys::Error::last(),
))
.context("Failed to reboot")
}
}
}