blob: 376ac606a4f62070ba0c709bfacc1f12868c3b37 [file] [log] [blame] [edit]
// Copyright 2018 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This program collects fine-grained memory stats around events of interest
// (such as browser tab discards) and saves them in a queue of "clip files",
// to be uploaded with other logs.
//
// The program has two modes: slow and fast poll. In slow-poll mode, the
// program occasionally checks (every 2 seconds right now) whether it should go
// into fast-poll mode, because interesting events become possible. When in
// fast-poll mode, the program collects memory stats frequently (every 0.1
// seconds right now) and stores them in a circular buffer. When "interesting"
// events occur, the stats around each event are saved into a "clip" file.
// These files are also maintained as a queue, so only the latest N clips are
// available (N = 20 right now).
extern crate chrono;
extern crate dbus;
extern crate env_logger;
extern crate libc;
#[macro_use]
extern crate log;
extern crate procfs;
extern crate syslog;
extern crate tempfile;
#[cfg(not(test))]
use dbus::ffidisp::{BusType, Connection, ConnectionItem, WatchEvent};
#[cfg(not(test))]
use protobuf::Message;
use std::collections::hash_map::HashMap;
use std::fmt;
use std::fs::{create_dir, File, OpenOptions};
use std::io::Read;
use std::io::Write;
use std::os::unix::fs::OpenOptionsExt;
use std::path::{Path, PathBuf};
use std::time::Duration;
use std::{io, str};
use chrono::DateTime;
use chrono::Local;
#[cfg(not(test))]
use nix::sys::select;
use procfs::LoadAverage;
#[cfg(not(test))]
use system_api::metrics_event;
use tempfile::TempDir;
#[cfg(test)]
mod test;
const LOG_DIRECTORY: &str = "/var/log/memd";
const STATIC_PARAMETERS_LOG: &str = "memd.parameters";
const MAX_CLIP_COUNT: usize = 20;
const COLLECTION_DELAY_MS: i64 = 5_000; // Wait after event of interest.
const CLIP_COLLECTION_SPAN_MS: i64 = 10_000; // ms worth of samples in a clip.
const SAMPLES_PER_SECOND: i64 = 10; // Rate of fast sample collection.
const SAMPLING_PERIOD_MS: i64 = 1000 / SAMPLES_PER_SECOND;
// Danger threshold. When the distance between "available" and "margin" is
// greater than LOW_MEM_DANGER_THRESHOLD_MB, we assume that there's no danger
// of "interesting" events (such as a discard) happening in the next
// SLOW_POLL_PERIOD_DURATION. In other words, we expect that an allocation of
// more than LOW_MEM_DANGER_THRESHOLD_MB in a SLOW_POLL_PERIOD_DURATION will be
// rare.
const LOW_MEM_DANGER_THRESHOLD_MB: u32 = 600; // Poll fast when closer to margin than this.
const SLOW_POLL_PERIOD_DURATION: Duration = Duration::from_secs(2); // Sleep in slow-poll mode.
const FAST_POLL_PERIOD_DURATION: Duration = Duration::from_millis(SAMPLING_PERIOD_MS as u64); // Sleep duration in fast-poll mode.
// Print a warning if the fast-poll select lasts a lot longer than expected
// (which might happen because of huge load average and swap activity).
const UNREASONABLY_LONG_SLEEP: i64 = 10 * SAMPLING_PERIOD_MS;
// Size of sample queue. The queue contains mostly timer events, in the amount
// determined by the clip span and the sampling rate. It also contains other
// events, such as OOM kills etc., whose amount is expected to be smaller than
// the former. Generously double the number of timer events to leave room for
// non-timer events.
const SAMPLE_QUEUE_LENGTH: usize =
(CLIP_COLLECTION_SPAN_MS / 1000 * SAMPLES_PER_SECOND * 2) as usize;
// The names of fields of interest in /proc/vmstat. They must be listed in
// the order in which they appear in /proc/vmstat. When parsing the file,
// if a mandatory field is missing, the program panics. A missing optional
// field (for instance, pgmajfault_f for some kernels) results in a value
// of 0. (BAD: This works only for the last field.)
//
// For fields with |accumulate| = true, use the name as a prefix, and add up
// all values with that prefix in consecutive lines.
const VMSTAT_VALUES_COUNT: usize = 5; // Number of vmstat values we're tracking.
#[rustfmt::skip]
const VMSTATS: [(&str, bool, bool); VMSTAT_VALUES_COUNT] = [
// name mandatory accumulate
("pswpin", true, false),
("pswpout", true, false),
("pgalloc", true, true), // pgalloc_dma, pgalloc_normal, etc.
("pgmajfault", true, false),
("pgmajfault_f", false, false),
];
// The only difference from x86_64 is pgalloc_dma vs. pgalloc_dma32.
// For resource manager D-Bus interface
#[cfg(not(test))]
const RESOURCED_SERVICE_NAME: &str = "org.chromium.ResourceManager";
#[cfg(not(test))]
const RESOURCED_PATH_NAME: &str = "/org/chromium/ResourceManager";
#[cfg(not(test))]
const RESOURCED_INTERFACE_NAME: &str = RESOURCED_SERVICE_NAME;
#[derive(Debug)]
pub enum Error {
LowMemFileError(Box<dyn std::error::Error>),
VmstatFileError(Box<std::io::Error>),
RunnablesFileError(Box<std::io::Error>),
AvailableFileError(Box<dyn std::error::Error>),
CreateLogDirError(Box<std::io::Error>),
StartingClipCounterMissingError(Box<dyn std::error::Error>),
LogStaticParametersError(Box<dyn std::error::Error>),
DbusWatchError(Box<dyn std::error::Error>),
LowMemFDWatchError(Box<dyn std::error::Error>),
LowMemWatcherError(Box<dyn std::error::Error>),
InitSyslogError(Box<dyn std::error::Error>),
}
impl std::error::Error for Error {
// This function is "soft-deprecated" so
// we use the fmt() function from Display below.
fn description(&self) -> &str {
"memd_error"
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::LowMemFileError(ref e) => write!(f, "cannot opening low-mem file: {}", e),
Error::VmstatFileError(ref e) => write!(f, "cannot open vmstat: {}", e),
Error::RunnablesFileError(ref e) => write!(f, "cannot open loadavg: {}", e),
Error::AvailableFileError(ref e) => write!(f, "cannot open available file: {}", e),
Error::CreateLogDirError(ref e) => write!(f, "cannot create log directory: {}", e),
Error::StartingClipCounterMissingError(ref e) => {
write!(f, "cannot find starting clip counter: {}", e)
}
Error::LogStaticParametersError(ref e) => {
write!(f, "cannot log static parameters: {}", e)
}
Error::DbusWatchError(ref e) => write!(f, "cannot watch dbus fd: {}", e),
Error::LowMemFDWatchError(ref e) => write!(f, "cannot watch low-mem fd: {}", e),
Error::LowMemWatcherError(ref e) => write!(f, "cannot set low-mem watcher: {}", e),
Error::InitSyslogError(ref e) => write!(f, "cannot init syslog: {}", e),
}
}
}
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
// Converts the result of an integer expression |e| to modulo |n|. |e| may be
// negative. This differs from plain "%" in that the result of this function
// is always be between 0 and n-1.
fn modulo(e: isize, n: usize) -> usize {
let nn = n as isize;
let x = e % nn;
(if x >= 0 { x } else { x + nn }) as usize
}
// Reads a string from the file named by |path|, representing a u32, and
// returns the value the strings it represents. If there are multiple ints
// in the file, then it returns the first one.
fn read_int(path: &Path) -> Result<u32> {
let mut file = File::open(path)?;
let mut content = String::new();
file.read_to_string(&mut content)?;
Ok(content
.split_whitespace()
.next()
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))?
.parse::<u32>()?)
}
// The Timer trait allows us to mock time for testing.
trait Timer {
// A wrapper for select() with only ready-to-read fds.
fn select(
&mut self,
dbus_receiver: &crossbeam_channel::Receiver<DbusEvent>,
timeout: Duration,
) -> Result<bool>;
fn now(&self) -> i64;
fn quit_request(&self) -> bool;
// For mocking available in test.
fn get_available_mb(&self) -> Result<u32>;
}
#[cfg(not(test))]
struct GenuineTimer {
dbus_connection: dbus::blocking::Connection,
}
#[cfg(not(test))]
impl Timer for GenuineTimer {
// Returns current uptime (active time since boot, in milliseconds)
fn now(&self) -> i64 {
let ts = nix::time::clock_gettime(nix::time::ClockId::CLOCK_MONOTONIC)
.expect("clock_gettime() failed!");
(ts.tv_sec() * 1000 + ts.tv_nsec() / 1_000_000).into()
}
// Always returns false, unless testing (see MockTimer).
fn quit_request(&self) -> bool {
false
}
fn select(
&mut self,
dbus_receiver: &crossbeam_channel::Receiver<DbusEvent>,
timeout: Duration,
) -> Result<bool> {
let mut channel_select = crossbeam_channel::Select::new();
let _operation = channel_select.recv(dbus_receiver);
match channel_select.ready_timeout(timeout) {
Ok(_) => Ok(true),
Err(_) => Ok(false),
}
}
fn get_available_mb(&self) -> Result<u32> {
let proxy = self.dbus_connection.with_proxy(
RESOURCED_INTERFACE_NAME,
RESOURCED_PATH_NAME,
Duration::from_millis(5000),
);
let (available,): (u64,) =
proxy.method_call(RESOURCED_SERVICE_NAME, "GetAvailableMemoryKB", ())?;
Ok((available / 1024) as u32)
}
}
#[derive(Clone, Copy, Default)]
struct Sample {
uptime: i64, // system uptime in ms
sample_type: SampleType,
info: Sysinfo,
runnables: u32, // number of runnable processes
available: u32, // available RAM from low-mem notifier
vmstat_values: [i64; VMSTAT_VALUES_COUNT],
}
impl Sample {
// Outputs a sample.
fn output(&self, out: &mut File) -> Result<()> {
writeln!(
out,
"{}.{:02} {:6} {} {} {} {} {} {} {} {} {} {} {}",
self.uptime / 1000,
self.uptime % 1000 / 10,
self.sample_type,
self.info.loads_1_minute,
self.info.freeram,
self.info.freeswap,
self.info.procs,
self.runnables,
self.available,
self.vmstat_values[0],
self.vmstat_values[1],
self.vmstat_values[2],
self.vmstat_values[3],
self.vmstat_values[4],
)?;
Ok(())
}
}
#[derive(Copy, Clone, Default)]
struct Sysinfo {
loads_1_minute: f64,
freeram: u64,
freeswap: u64,
procs: u16,
}
fn sysinfo() -> Result<Sysinfo> {
let sysinfo_result = nix::sys::sysinfo::sysinfo()?;
Ok(Sysinfo {
loads_1_minute: sysinfo_result.load_average().0,
freeram: sysinfo_result.ram_unused(),
freeswap: sysinfo_result.swap_free(),
procs: sysinfo_result.process_count(),
})
}
impl Sysinfo {
// Fakes sysinfo for testing.
fn fake_sysinfo() -> Result<Sysinfo> {
Ok(Sysinfo {
loads_1_minute: 5.0,
freeram: 42_000_000,
freeswap: 84_000_000,
procs: 1234,
})
}
}
struct SampleQueue {
samples: [Sample; SAMPLE_QUEUE_LENGTH],
head: usize, // points to latest entry
count: usize, // count of valid entries (min=0, max=SAMPLE_QUEUE_LENGTH)
}
impl SampleQueue {
fn new() -> SampleQueue {
let s: Sample = Default::default();
SampleQueue {
samples: [s; SAMPLE_QUEUE_LENGTH],
head: 0,
count: 0,
}
}
// Returns self.head as isize, to make index calculations behave correctly
// on underflow.
fn ihead(&self) -> isize {
self.head as isize
}
fn reset(&mut self) {
self.head = 0;
self.count = 0;
}
// Returns the next slot in the queue. Always succeeds, since on overflow
// it discards the LRU slot.
fn next_slot(&mut self) -> &mut Sample {
let slot = self.head;
self.head = modulo(self.ihead() + 1, SAMPLE_QUEUE_LENGTH);
if self.count < SAMPLE_QUEUE_LENGTH {
self.count += 1;
}
&mut self.samples[slot]
}
fn sample(&self, i: isize) -> &Sample {
assert!(i >= 0);
// Subtract 1 because head points to the next free slot.
assert!(
modulo(self.ihead() - 1 - i, SAMPLE_QUEUE_LENGTH) <= self.count,
"bad queue index: i {}, head {}, count {}, queue len {}",
i,
self.head,
self.count,
SAMPLE_QUEUE_LENGTH
);
&self.samples[i as usize]
}
// Outputs to file |f| samples from |start_time| to the head. Uses a start
// time rather than a start index because when we start a clip we have to
// do a time-based search anyway.
fn output_from_time(&self, f: &mut File, start_time: i64) -> Result<()> {
// For now just do a linear search. ;)
let mut start_index = modulo(self.ihead() - 1, SAMPLE_QUEUE_LENGTH);
debug!(
"output_from_time: start_time {}, head {}",
start_time, start_index
);
loop {
let sample = self.samples[start_index];
// Ignore samples from external sources because their time stamp may be off.
if sample.uptime <= start_time && sample.sample_type.has_internal_timestamp() {
break;
}
debug!(
"output_from_time: seeking uptime {}, index {}",
sample.uptime, start_index
);
start_index = modulo(start_index as isize - 1, SAMPLE_QUEUE_LENGTH);
if modulo(self.ihead() - 1 - start_index as isize, SAMPLE_QUEUE_LENGTH) > self.count {
warn!("too many events in requested interval");
break;
}
}
let mut index = modulo(start_index as isize + 1, SAMPLE_QUEUE_LENGTH) as isize;
while index != self.ihead() {
debug!("output_from_time: outputting index {}", index);
self.sample(index).output(f)?;
index = modulo(index + 1, SAMPLE_QUEUE_LENGTH) as isize;
}
Ok(())
}
}
// Returns the number of processes currently runnable (running or on ready queue).
// Rule of thumb:
// runnable / CPU_count < 3, CPU loading is not high.
// runnable / CPU_count > 5, CPU loading is very high.
fn get_runnables() -> Result<u32> {
Ok(parse_runnables(LoadAverage::new()?))
}
fn parse_runnables(load_average: LoadAverage) -> u32 {
load_average.cur
}
fn get_vmstats() -> Result<[i64; VMSTAT_VALUES_COUNT]> {
let vmstats = procfs::vmstat()?;
parse_vmstats(&vmstats)
}
fn parse_vmstats(vmstats: &HashMap<String, i64>) -> Result<[i64; VMSTAT_VALUES_COUNT]> {
let mut result = [0i64; VMSTAT_VALUES_COUNT];
for (i, &(field_name, mandatory, accumulate)) in VMSTATS.iter().enumerate() {
if accumulate {
for (sub_field_name, value) in vmstats {
if sub_field_name.starts_with(field_name) {
result[i] += value;
}
}
} else {
match vmstats.get(field_name) {
Some(value) => result[i] = *value,
None => {
if mandatory {
return Err(format!("vmstat: missing value: {}", field_name).into());
}
}
}
}
}
Ok(result)
}
struct Watermarks {
min: u32,
low: u32,
high: u32,
}
struct ZoneinfoFile(File);
impl ZoneinfoFile {
// Computes and returns the watermark values from /proc/zoneinfo.
fn read_watermarks(&mut self) -> Result<Watermarks> {
let mut min = 0;
let mut low = 0;
let mut high = 0;
let mut content = String::new();
self.0.read_to_string(&mut content)?;
for line in content.lines() {
let items = line.split_whitespace().collect::<Vec<_>>();
match items[0] {
"min" => min += items[1].parse::<u32>()?,
"low" => low += items[1].parse::<u32>()?,
"high" => high += items[1].parse::<u32>()?,
_ => {}
}
}
Ok(Watermarks { min, low, high })
}
}
enum DbusEvent {
TabDiscard { time: i64 },
OomKill { time: i64 },
OomKillKernel { time: i64 },
CriticalMemoryPressure,
}
// The main object.
struct Sampler<'a> {
always_poll_fast: bool, // When true, program stays in fast poll mode.
paths: &'a Paths, // Paths of files used by the program.
dbus_receiver: crossbeam_channel::Receiver<DbusEvent>,
low_mem_margin_mb: u32, // Low-memory margin, assumed to remain constant in a boot session.
sample_header: String, // The text at the beginning of each clip file.
clip_counter: usize, // Index of next clip file (also part of file name).
sample_queue: SampleQueue, // A running queue of samples of vm quantities.
current_available: u32, // Amount of "available" memory (in MB) at last reading.
current_time: i64, // Wall clock in ms at last reading.
collecting: bool, // True if we're in the middle of collecting a clip.
timer: Box<dyn Timer>, // Real or mock timer.
quit_request: bool, // Signals a quit-and-restart request when testing.
}
impl<'a> Sampler<'a> {
fn new(
always_poll_fast: bool,
paths: &'a Paths,
timer: Box<dyn Timer>,
dbus_receiver: crossbeam_channel::Receiver<DbusEvent>,
low_mem_margin_mb: u32,
) -> Result<Sampler> {
let mut low_mem_file_flags = OpenOptions::new();
low_mem_file_flags.custom_flags(libc::O_NONBLOCK);
low_mem_file_flags.read(true);
let sample_header = build_sample_header();
let mut sampler = Sampler {
always_poll_fast,
dbus_receiver,
low_mem_margin_mb,
paths,
sample_header,
sample_queue: SampleQueue::new(),
clip_counter: 0,
collecting: false,
current_available: 0,
current_time: 0,
timer,
quit_request: false,
};
sampler
.find_starting_clip_counter()
.map_err(Error::StartingClipCounterMissingError)?;
sampler
.log_static_parameters(low_mem_margin_mb)
.map_err(Error::LogStaticParametersError)?;
Ok(sampler)
}
// Refresh cached time. This should be called after system calls, which
// can potentially block, but not if current_time is unused before the next call.
fn refresh_time(&mut self) {
self.current_time = self.timer.now();
}
// Collect a sample using the latest time snapshot.
fn enqueue_sample(&mut self, sample_type: SampleType) -> Result<()> {
let time = self.current_time; // to pacify the borrow checker
self.enqueue_sample_at_time(sample_type, time)
}
// Collect a sample with an externally-generated time stamp.
fn enqueue_sample_external(&mut self, sample_type: SampleType, time: i64) -> Result<()> {
assert!(!sample_type.has_internal_timestamp());
self.enqueue_sample_at_time(sample_type, time)
}
// Collects a sample of memory manager stats and adds it to the sample
// queue, possibly overwriting an old value. |sample_type| indicates the
// type of sample, and |time| the system uptime at the time the sample was
// collected.
fn enqueue_sample_at_time(&mut self, sample_type: SampleType, time: i64) -> Result<()> {
{
let sample: &mut Sample = self.sample_queue.next_slot();
sample.uptime = time;
sample.sample_type = sample_type;
sample.available = self.current_available;
sample.runnables = get_runnables()?;
sample.info = if cfg!(test) {
Sysinfo::fake_sysinfo()?
} else {
sysinfo()?
};
sample.vmstat_values = get_vmstats()?;
}
self.refresh_time();
Ok(())
}
// Creates or overwrites a file in the memd log directory containing
// quantities of interest.
fn log_static_parameters(&self, low_mem_margin_mb: u32) -> Result<()> {
let out = &mut OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(&self.paths.static_parameters)?;
fprint_datetime(out)?;
writeln!(out, "margin {}", low_mem_margin_mb)?;
let psv = &self.paths.procsysvm;
log_from_procfs(out, psv, "min_filelist_kbytes")?;
log_from_procfs(out, psv, "min_free_kbytes")?;
log_from_procfs(out, psv, "extra_free_kbytes")?;
let mut zoneinfo = ZoneinfoFile(File::open(&self.paths.zoneinfo)?);
let watermarks = zoneinfo.read_watermarks()?;
writeln!(out, "min_water_mark_kbytes {}", watermarks.min * 4)?;
writeln!(out, "low_water_mark_kbytes {}", watermarks.low * 4)?;
writeln!(out, "high_water_mark_kbytes {}", watermarks.high * 4)?;
Ok(())
}
// Returns true if the program should go back to slow-polling mode (or stay
// in that mode). Returns false otherwise. Relies on |self.collecting|
// and |self.current_available| being up-to-date.
fn should_poll_slowly(&self) -> bool {
!self.collecting
&& !self.always_poll_fast
&& self.low_mem_margin_mb > 0
&& self.current_available > self.low_mem_margin_mb + LOW_MEM_DANGER_THRESHOLD_MB
}
// Sits mostly idle and checks available RAM at low frequency. Returns
// when available memory gets "close enough" to the tab discard margin.
fn slow_poll(&mut self) -> Result<()> {
debug!("entering slow poll at {}", self.current_time);
// Idiom for do ... while.
while {
let event_ready = self
.timer
.select(&self.dbus_receiver, SLOW_POLL_PERIOD_DURATION)?;
debug!("event_ready: {} at {}", event_ready, self.timer.now());
self.quit_request = self.timer.quit_request();
self.current_available = self.timer.get_available_mb()?;
self.refresh_time();
self.should_poll_slowly() && !self.quit_request && !event_ready
} {}
Ok(())
}
// Collects timer samples at fast rate. Also collects event samples.
// Samples contain various system stats related to kernel memory
// management. The samples are placed in a circular buffer. When
// something "interesting" happens, (for instance a tab discard, or a
// kernel OOM-kill) the samples around that event are saved into a "clip
// file".
fn fast_poll(&mut self) -> Result<()> {
let mut earliest_start_time = self.current_time;
debug!("entering fast poll at {}", earliest_start_time);
// Collect the first timer sample immediately upon entering.
self.enqueue_sample(SampleType::Timer)?;
// Keep track if we're in a low-mem state. Initially assume we are
// not.
let mut was_in_low_mem = false;
// |clip_{start,end}_time| are the collection start and end time for
// the current clip. Their value is valid only when |self.collecting|
// is true.
let mut clip_start_time = self.current_time;
let mut clip_end_time = self.current_time;
// |final_collection_time| indicates when we should stop collecting
// samples for any clip (either the current one, or the next one). Its
// value is valid only when |self.collecting| is true.
let mut final_collection_time = self.current_time;
// |self.collecting| is true when we're in the middle of collecting a clip
// because something interesting has happened.
self.collecting = false;
// Poll/select loop.
loop {
// Assume event is not interesting (since most events
// aren't). Change this to true for some event types.
let mut event_is_interesting = false;
let watch_start_time = self.current_time;
let event_ready = self
.timer
.select(&self.dbus_receiver, FAST_POLL_PERIOD_DURATION)?;
// Check for dbus events.
while let Ok(dbus_event) = self.dbus_receiver.try_recv() {
let (sample_type, sample_time) = match dbus_event {
DbusEvent::TabDiscard { time } => (SampleType::TabDiscard, time),
DbusEvent::OomKill { time } => (SampleType::OomKillBrowser, time),
DbusEvent::OomKillKernel { time } => (SampleType::OomKillKernel, time),
DbusEvent::CriticalMemoryPressure => (SampleType::Uninitialized, 0),
};
if sample_type != SampleType::Uninitialized {
debug!("enqueue {:?}, {:?}", sample_type, sample_time);
self.enqueue_sample_external(sample_type, sample_time)?;
}
event_is_interesting = true;
}
self.quit_request = self.timer.quit_request();
self.current_available = self.timer.get_available_mb()?;
let in_low_mem = self.current_available < self.low_mem_margin_mb;
self.refresh_time();
// Record a sample when we sleep too long. Such samples are
// somewhat redundant as they could be deduced from the log, but we
// wish to make it easier to detect such (rare, we hope)
// occurrences.
if watch_start_time > self.current_time + UNREASONABLY_LONG_SLEEP {
warn!(
"woke up at {} after unreasonable {} sleep",
self.current_time,
self.current_time - watch_start_time
);
self.enqueue_sample(SampleType::Sleeper)?;
}
if was_in_low_mem && !in_low_mem {
// Refresh time since we may have blocked. (That should
// not happen often because currently the run times between
// sleeps are well below the minimum timeslice.)
self.current_time = self.timer.now();
debug!("leaving low mem at {}", self.current_time);
was_in_low_mem = false;
self.enqueue_sample(SampleType::LeaveLowMem)?;
}
if !event_ready {
// Timer event.
self.enqueue_sample(SampleType::Timer)?;
} else {
// See comment above about watching low_mem.
let low_mem_has_fired = !was_in_low_mem && in_low_mem;
if low_mem_has_fired {
debug!("entering low mem at {}", self.current_time);
was_in_low_mem = true;
self.enqueue_sample(SampleType::EnterLowMem)?;
// Make this interesting at least until chrome events are
// plumbed, maybe permanently.
event_is_interesting = true;
}
}
// Arrange future saving of samples around interesting events.
if event_is_interesting {
// Update the time intervals to ensure we include all samples
// of interest in a clip. If we're not in the middle of
// collecting a clip, start one. If we're in the middle of
// collecting a clip which can be extended, do that.
final_collection_time = self.current_time + COLLECTION_DELAY_MS;
if self.collecting {
// Check if the current clip can be extended.
if clip_end_time < clip_start_time + CLIP_COLLECTION_SPAN_MS {
clip_end_time =
final_collection_time.min(clip_start_time + CLIP_COLLECTION_SPAN_MS);
debug!("extending clip to {}", clip_end_time);
}
} else {
// Start the clip collection.
self.collecting = true;
clip_start_time =
earliest_start_time.max(self.current_time - COLLECTION_DELAY_MS);
clip_end_time = self.current_time + COLLECTION_DELAY_MS;
debug!(
"starting new clip from {} to {}",
clip_start_time, clip_end_time
);
}
}
// Check if it is time to save the samples into a file.
if self.collecting && self.current_time > clip_end_time - SAMPLING_PERIOD_MS {
// Save the clip to disk.
debug!(
"[{}] saving clip: ({} ... {}), final {}",
self.current_time, clip_start_time, clip_end_time, final_collection_time
);
let res = self.save_clip(clip_start_time);
// Don't panic if there's an error writing to disk, log it instead.
if res.is_err() {
warn!("Error saving clip: {:?}", res);
}
self.collecting = false;
earliest_start_time = clip_end_time;
// Need to schedule another collection?
if final_collection_time > clip_end_time {
// Continue collecting by starting a new clip. Note that
// the clip length may be less than CLIP_COLLECTION_SPAN.
// This happens when event spans overlap, and also if we
// started fast sample collection just recently.
assert!(final_collection_time <= clip_end_time + CLIP_COLLECTION_SPAN_MS);
clip_start_time = clip_end_time;
clip_end_time = final_collection_time;
self.collecting = true;
debug!(
"continue collecting with new clip ({} {})",
clip_start_time, clip_end_time
);
// If we got stuck in the select() for a very long time
// because of system slowdown, it may be time to collect
// this second clip as well. But we don't bother, since
// this is unlikely, and we can collect next time around.
if self.current_time > clip_end_time {
debug!(
"heavy slowdown: postponing collection of ({}, {})",
clip_start_time, clip_end_time
);
}
}
}
if self.should_poll_slowly() || (self.quit_request && !self.collecting) {
break;
}
}
Ok(())
}
// Returns the clip file pathname to be used after the current one,
// and advances the clip counter.
fn next_clip_path(&mut self) -> PathBuf {
let name = format!("memd.clip{:03}.log", self.clip_counter);
self.clip_counter = modulo(self.clip_counter as isize + 1, MAX_CLIP_COUNT);
self.paths.log_directory.join(name)
}
// Finds and sets the starting value for the clip counter in this session.
// The goal is to preserve the most recent clips (if any) from previous
// sessions.
fn find_starting_clip_counter(&mut self) -> Result<()> {
self.clip_counter = 0;
let mut previous_time = std::time::UNIX_EPOCH;
loop {
let path = self.next_clip_path();
if !path.exists() {
break;
}
let md = std::fs::metadata(path)?;
let time = md.modified()?;
if time < previous_time {
break;
} else {
previous_time = time;
}
}
// We found the starting point, but the counter has already advanced so we
// need to backtrack one step.
self.clip_counter = modulo(self.clip_counter as isize - 1, MAX_CLIP_COUNT);
Ok(())
}
// Stores samples of interest in a new clip log, and remove those samples
// from the queue.
fn save_clip(&mut self, start_time: i64) -> Result<()> {
let path = self.next_clip_path();
let out = &mut OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(path)?;
// Print courtesy header. The first line is the current time. The
// second line lists the names of the variables in the following lines,
// in the same order.
fprint_datetime(out)?;
out.write_all(self.sample_header.as_bytes())?;
// Output samples from |start_time| to the head.
self.sample_queue.output_from_time(out, start_time)?;
// The queue is now empty.
self.sample_queue.reset();
Ok(())
}
}
// Prints |name| and value of entry /pros/sys/vm/|name| (or 0, if the entry is
// missing) to file |out|.
fn log_from_procfs(out: &mut File, dir: &Path, name: &str) -> Result<()> {
let procfs_path = dir.join(name);
let value = read_int(&procfs_path).unwrap_or(0);
writeln!(out, "{} {}", name, value)?;
Ok(())
}
// Outputs readable date and time to file |out|.
fn fprint_datetime(out: &mut File) -> Result<()> {
let local: DateTime<Local> = Local::now();
writeln!(out, "{}", local)?;
Ok(())
}
#[derive(Copy, Clone, Debug, Default, PartialEq)]
enum SampleType {
EnterLowMem, // Entering low-memory state, from the kernel low-mem notifier.
LeaveLowMem, // Leaving low-memory state, from the kernel low-mem notifier.
OomKillBrowser, // Chrome browser letting us know it detected OOM kill.
OomKillKernel, // Anomaly detector letting us know it detected OOM kill.
Sleeper, // Memd was not running for a long time.
TabDiscard, // Chrome browser letting us know about a tab discard.
Timer, // Event was produced after FAST_POLL_PERIOD_DURATION with no other events.
#[default]
Uninitialized, // Internal use.
}
impl fmt::Display for SampleType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.name())
}
}
impl SampleType {
// Returns true if the timestamp for this sample type is generated
// internally. This knowledge is used in outputting samples to clip files,
// because the timestamps of samples generated externally may be skewed.
fn has_internal_timestamp(&self) -> bool {
!matches!(
self,
&SampleType::TabDiscard | &SampleType::OomKillBrowser | &SampleType::OomKillKernel
)
}
}
impl SampleType {
// Returns the 6-character(max) identifier for a sample type.
fn name(&self) -> &'static str {
match *self {
SampleType::EnterLowMem => "lowmem",
SampleType::LeaveLowMem => "lealow",
SampleType::OomKillBrowser => "oomkll", // OOM from chrome
SampleType::OomKillKernel => "keroom", // OOM from kernel
SampleType::Sleeper => "sleepr",
SampleType::TabDiscard => "discrd",
SampleType::Timer => "timer",
SampleType::Uninitialized => "UNINIT",
}
}
}
// Path names of various system files, mostly in /proc, /sys, and /dev. They
// are collected into this struct because they get special values when testing.
#[derive(Clone)]
pub struct Paths {
log_directory: PathBuf,
static_parameters: PathBuf,
zoneinfo: PathBuf,
procsysvm: PathBuf,
testing_root: PathBuf,
}
// Returns a file name that replaces |name| when testing.
fn test_filename(testing: bool, testing_root: &str, name: &str) -> String {
if testing {
testing_root.to_string() + name
} else {
name.to_owned()
}
}
// This macro constructs a "normal" Paths object when |testing| is false, and
// one that mimics a root filesystem in a temporary directory when |testing| is
// true.
macro_rules! make_paths {
($testing:expr, $root:expr,
$($name:ident : $e:expr,)*
) => (
Paths {
testing_root: PathBuf::from($root),
$($name: PathBuf::from(test_filename($testing, $root, &($e).to_string()))),*
}
)
}
fn build_sample_header() -> String {
let mut s = "uptime type load freeram freeswap procs runnables available".to_string();
for vmstat in VMSTATS {
s = s + " " + vmstat.0;
}
s + "\n"
}
fn main() -> Result<()> {
let mut always_poll_fast = false;
let mut debug_log = false;
libchromeos::panic_handler::install_memfd_handler();
let args: Vec<String> = std::env::args().collect();
for arg in &args[1..] {
match arg.as_ref() {
"always-poll-fast" => always_poll_fast = true,
"debug-log" => debug_log = true,
_ => panic!("usage: memd [always-poll-fast|debug-log]*"),
}
}
let log_level = if debug_log {
log::LevelFilter::Debug
} else {
log::LevelFilter::Warn
};
syslog::init_unix(syslog::Facility::LOG_USER, log_level)
.map_err(|e| Error::InitSyslogError(Box::new(e)))?;
// Unlike log!(), warn!() etc., panic!() is not redirected by the syslog
// facility, instead always goes to stderr, which can get lost. Here we
// provide our own panic hook to make sure that the panic message goes to
// the syslog as well.
let default_panic = std::panic::take_hook();
std::panic::set_hook(Box::new(move |panic_info| {
error!("memd: {}", panic_info);
default_panic(panic_info);
}));
warn!("memd started");
run_memory_daemon(always_poll_fast)
}
// Creates a directory for testing, if testing. Otherwise
// returns None
fn make_testing_dir() -> Option<TempDir> {
if cfg!(test) {
Some(TempDir::new().expect("cannot create temp dir"))
} else {
None
}
}
fn get_paths(root: Option<TempDir>) -> Paths {
// make_paths! returns a Paths object initializer with these fields.
let testing_root = match root {
Some(tmpdir) => tmpdir.path().to_str().unwrap().to_string(),
None => "/".to_string(),
};
make_paths!(
cfg!(test),
&testing_root,
log_directory: LOG_DIRECTORY,
static_parameters: LOG_DIRECTORY.to_string() + "/" + STATIC_PARAMETERS_LOG,
zoneinfo: "/proc/zoneinfo",
procsysvm: "/proc/sys/vm",
)
}
// Receive D-Bus events and resend via channel.
#[cfg(not(test))]
fn receive_dbus_events(sender: crossbeam_channel::Sender<DbusEvent>) -> Result<()> {
let connection = Connection::get_private(BusType::System)?;
let _m = connection.add_match(concat!(
"type='signal',",
"interface='org.chromium.AnomalyEventServiceInterface',",
"member='AnomalyEvent'"
));
let _m = connection.add_match(concat!(
"type='signal',",
"interface='org.chromium.MetricsEventServiceInterface',",
"member='ChromeEvent'"
));
let _m = connection.add_match(concat!(
"type='signal',",
"interface='org.chromium.ResourceManager',",
"member='MemoryPressureChrome'"
));
let mut watch_fdset = select::FdSet::new();
for fd in connection.watch_fds() {
watch_fdset.insert(fd.fd());
}
let highest = watch_fdset.highest().ok_or("The fd set is empty")?;
loop {
let mut inout_fdset = watch_fdset;
let _n = select::select(highest + 1, &mut inout_fdset, None, None, None)?;
for fd in inout_fdset.fds(None) {
let handle = connection.watch_handle(fd, WatchEvent::Readable as libc::c_uint);
for connection_item in handle {
// Only consider signals.
if let ConnectionItem::Signal(ref message) = connection_item {
// Only consider signals with "ChromeEvent" or "AnomalyEvent" members.
if let Some(member) = message.member() {
if &*member == "ChromeEvent" || &*member == "AnomalyEvent" {
// Read first item in signal message as byte blob and
// parse blob into protobuf.
let raw_buffer: Vec<u8> = message.read1()?;
let mut protobuf = metrics_event::Event::new();
protobuf.merge_from_bytes(&raw_buffer)?;
let event_type = protobuf.type_.enum_value_or_default();
let time_stamp = protobuf.timestamp;
match event_type {
metrics_event::event::Type::TAB_DISCARD => {
sender.send(DbusEvent::TabDiscard { time: time_stamp })?;
}
metrics_event::event::Type::OOM_KILL => {
sender.send(DbusEvent::OomKill { time: time_stamp })?;
}
metrics_event::event::Type::OOM_KILL_KERNEL => {
sender.send(DbusEvent::OomKillKernel { time: time_stamp })?;
}
_ => {
warn!("unknown event type {:?}", event_type);
}
}
} else if &*member == "MemoryPressureChrome" {
let pressure_level: u8 = message.read1()?;
const PRESSURE_LEVEL_CHROME_CRITICAL: u8 = 2;
if pressure_level == PRESSURE_LEVEL_CHROME_CRITICAL {
sender.send(DbusEvent::CriticalMemoryPressure)?;
}
} else if &*member != "NameAcquired" {
// Do not report spurious "NameAcquired" signal to avoid spam.
warn!("unexpected dbus signal member {}", &*member);
}
}
}
}
}
}
}
// Get the memory margin by calling resource manager D-Bus method.
#[cfg(not(test))]
fn get_memory_margin_mb() -> Result<u32> {
let conn = dbus::blocking::Connection::new_system()?;
let proxy = conn.with_proxy(
RESOURCED_INTERFACE_NAME,
RESOURCED_PATH_NAME,
Duration::from_millis(5000),
);
let (critical, _moderate): (u64, u64) =
proxy.method_call(RESOURCED_SERVICE_NAME, "GetMemoryMarginsKB", ())?;
Ok((critical / 1024) as u32)
}
fn run_memory_daemon(always_poll_fast: bool) -> Result<()> {
let test_dir_option = make_testing_dir();
let paths = get_paths(test_dir_option);
debug!("Using root: {}", paths.testing_root.display());
#[cfg(test)]
{
test::setup_test_environment(&paths);
let var_log = &paths.log_directory.parent().unwrap();
std::fs::create_dir_all(var_log).map_err(|e| Error::CreateLogDirError(Box::new(e)))?;
}
// Make sure /var/log/memd exists. Create it if not. Assume /var/log
// exists. Panic on errors.
if !paths.log_directory.exists() {
create_dir(&paths.log_directory).map_err(|e| Error::CreateLogDirError(Box::new(e)))?
}
#[cfg(test)]
{
test::test_loop(always_poll_fast, &paths);
test::teardown_test_environment(&paths);
Ok(())
}
#[cfg(not(test))]
{
let timer = Box::new(GenuineTimer {
dbus_connection: dbus::blocking::Connection::new_system()?,
});
let (send, recv) = crossbeam_channel::unbounded();
let _sender_thread = std::thread::spawn(move || {
let _ = receive_dbus_events(send);
});
let mut sampler = Sampler::new(
always_poll_fast,
&paths,
timer,
recv,
get_memory_margin_mb()?,
)?;
loop {
// Run forever, alternating between slow and fast poll.
sampler.slow_poll()?;
sampler.fast_poll()?;
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_vmstats() {
let vmstats: HashMap<String, i64> = HashMap::from([
("noop".to_string(), 600),
("pswpin".to_string(), 100),
("pswpout".to_string(), 200),
("pgalloc_dma".to_string(), 1000),
("pgalloc_dma32".to_string(), 2000),
("pgalloc_normal".to_string(), 3000),
("pgalloc_movable".to_string(), 4000),
("pgmajfault".to_string(), 1500),
("pgmajfault_f".to_string(), 550),
]);
let result = parse_vmstats(&vmstats).unwrap();
assert_eq!(result[0], 100); // pswpin
assert_eq!(result[1], 200); // pswpout
assert_eq!(result[2], 10000); // paalloc
assert_eq!(result[3], 1500); // pgmajfault
assert_eq!(result[4], 550); // pgmajfault_f
}
#[test]
fn test_parse_runnables() {
// See also https://docs.kernel.org/filesystems/proc.html for field values of
// /proc/loadavg.
let load_average =
LoadAverage::from_reader("3.15 2.15 1.15 5/990 1270".as_bytes()).unwrap();
let runnables = parse_runnables(load_average);
assert_eq!(runnables, 5);
}
/// Regression test for https://crbug.com/1058463. Ensures that output_from_time doesn't read
/// samples outside of the valid range.
#[test]
fn queue_loop_test() {
test::queue_loop();
}
#[test]
fn memory_daemon_test() {
env_logger::init();
run_memory_daemon(false).expect("run_memory_daemon error");
}
}