blob: 59423885e2608baf85e4ce0bb61498aba220b3cd [file] [log] [blame] [edit]
// Copyright 2022 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! Implements a monitor that watches for a given set of dm-snapshots to become
//! nearly full, and kicks off an abort if a threshold is reached.
use anyhow::Result;
use log::error;
use log::info;
use log::warn;
use std::convert::TryInto;
use std::sync::mpsc::channel;
use std::sync::mpsc::Receiver;
use std::sync::mpsc::RecvTimeoutError;
use std::sync::mpsc::Sender;
use std::thread::JoinHandle;
use std::thread::{self};
use std::time::Duration;
use crate::hiberutil::emergency_reboot;
use crate::resume_dbus::send_abort;
use crate::volume::get_snapshot_size;
/// Define how full the snapshot has to get (as a percentage of its overall
/// space) before resume is aborted and the snapshot is merged.
const SNAPSHOT_FULL_ABORT_PERCENT: i32 = 75;
pub struct DmSnapshotSpaceMonitor {
channel_tx: Sender<SnapshotMonitorMessage>,
thread: Option<JoinHandle<()>>,
}
impl DmSnapshotSpaceMonitor {
pub fn new(name: &str) -> Result<Self> {
let (channel_tx, channel_rx) = channel();
let state = DmSnapshotSpaceMonitorState {
name: name.to_string(),
channel_rx,
report_percent: 10,
aborted: false,
};
let thread = Some(thread::spawn(|| snapshot_monitor_thread(state)));
Ok(Self { channel_tx, thread })
}
pub fn stop(&mut self) {
let thread = self.thread.take();
if let Some(thread) = thread {
self.channel_tx
.send(SnapshotMonitorMessage::Stop)
.expect("Snapshot monitor channel should never fill");
if let Err(e) = thread.join() {
warn!("Failed to join dm-snapshot space monitor thread: {:?}", e);
}
}
}
}
struct DmSnapshotSpaceMonitorState {
name: String,
channel_rx: Receiver<SnapshotMonitorMessage>,
report_percent: i32,
aborted: bool,
}
enum SnapshotMonitorMessage {
Stop,
}
fn snapshot_monitor_thread(mut state: DmSnapshotSpaceMonitorState) {
info!("Started watching snapshot {}", state.name);
loop {
match get_snapshot_size(&state.name) {
Ok((allocated, total)) => {
let percent_full = allocated * 100 / total;
let percent_full: i32 = percent_full.try_into().unwrap_or(i32::MAX);
// Print logs occasionally as the snapshot progresses towards being full.
if percent_full >= state.report_percent {
info!("Snapshot {} is {}% full", state.name, percent_full);
while state.report_percent <= percent_full {
state.report_percent += 10;
}
}
// Abort resume if the snapshot becomes close enough to full to
// be concerning, given that we only check it periodically.
if !state.aborted && percent_full > SNAPSHOT_FULL_ABORT_PERCENT {
error!(
"Snapshot {} is {}% full, aborting resume",
state.name, percent_full
);
state.aborted = true;
match send_abort(&format!(
"Snapshot {} became >={}% full",
state.name, SNAPSHOT_FULL_ABORT_PERCENT
)) {
Ok(()) => {
state.aborted = true;
}
Err(e) => {
error!("Attempting to abort returned: {}", e);
emergency_reboot("Failed to abort from snapshot monitor thread");
}
}
}
// If the snapshot is totally full, the kernel has deactivated
// it, and it's in an inconsistent state. Don't try to sync it,
// we're better off doing an emergency reboot to get back to a
// consistent state from hibernate time.
if percent_full == 100 {
error!("Snapshot {} is totally full, rebooting!", state.name);
emergency_reboot("Snapshot filled completely");
}
}
Err(e) => {
warn!("Error getting snapshot size: {}", e);
}
}
// Wait for a bit, or receive a message from the main thread.
match state.channel_rx.recv_timeout(Duration::from_secs(1)) {
Ok(message) => match message {
SnapshotMonitorMessage::Stop => {
info!("Stopped monitoring {}", state.name);
break;
}
},
Err(RecvTimeoutError::Timeout) => {}
Err(e) => {
error!("Failed to recv in dm-snapshot monitor: {}", e);
break;
}
}
}
}