metrics/memd/src/main.rs - third_party/platform2 - Git at Google

 // Copyright 2018 The ChromiumOS Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //
 // This program collects fine-grained memory stats around events of interest
 // (such as browser tab discards) and saves them in a queue of "clip files",
 // to be uploaded with other logs.
 //
 // The program has two modes: slow and fast poll.  In slow-poll mode, the
 // program occasionally checks (every 2 seconds right now) whether it should go
 // into fast-poll mode, because interesting events become possible.  When in
 // fast-poll mode, the program collects memory stats frequently (every 0.1
 // seconds right now) and stores them in a circular buffer.  When "interesting"
 // events occur, the stats around each event are saved into a "clip" file.
 // These files are also maintained as a queue, so only the latest N clips are
 // available (N = 20 right now).

 extern crate chrono;
 extern crate dbus;
 extern crate env_logger;
 extern crate libc;
 #[macro_use]
 extern crate log;
 extern crate procfs;
 extern crate syslog;
 extern crate tempfile;

 #[cfg(not(test))]
 use dbus::ffidisp::{BusType, Connection, ConnectionItem, WatchEvent};
 #[cfg(not(test))]
 use protobuf::Message;

 use std::collections::hash_map::HashMap;
 use std::fmt;
 use std::fs::{create_dir, File, OpenOptions};
 use std::io::Read;
 use std::io::Write;
 use std::os::unix::fs::OpenOptionsExt;
 use std::path::{Path, PathBuf};
 use std::time::Duration;
 use std::{io, str};

 use chrono::DateTime;
 use chrono::Local;
 #[cfg(not(test))]
 use nix::sys::select;
 use procfs::LoadAverage;
 #[cfg(not(test))]
 use system_api::metrics_event;
 use tempfile::TempDir;

 #[cfg(test)]
 mod test;

 const LOG_DIRECTORY: &str = "/var/log/memd";
 const STATIC_PARAMETERS_LOG: &str = "memd.parameters";
 const MAX_CLIP_COUNT: usize = 20;

 const COLLECTION_DELAY_MS: i64 = 5_000; // Wait after event of interest.
 const CLIP_COLLECTION_SPAN_MS: i64 = 10_000; // ms worth of samples in a clip.
 const SAMPLES_PER_SECOND: i64 = 10; // Rate of fast sample collection.
 const SAMPLING_PERIOD_MS: i64 = 1000 / SAMPLES_PER_SECOND;
 // Danger threshold.  When the distance between "available" and "margin" is
 // greater than LOW_MEM_DANGER_THRESHOLD_MB, we assume that there's no danger
 // of "interesting" events (such as a discard) happening in the next
 // SLOW_POLL_PERIOD_DURATION.  In other words, we expect that an allocation of
 // more than LOW_MEM_DANGER_THRESHOLD_MB in a SLOW_POLL_PERIOD_DURATION will be
 // rare.
 const LOW_MEM_DANGER_THRESHOLD_MB: u32 = 600; // Poll fast when closer to margin than this.
 const SLOW_POLL_PERIOD_DURATION: Duration = Duration::from_secs(2); // Sleep in slow-poll mode.
 const FAST_POLL_PERIOD_DURATION: Duration = Duration::from_millis(SAMPLING_PERIOD_MS as u64); // Sleep duration in fast-poll mode.

 // Print a warning if the fast-poll select lasts a lot longer than expected
 // (which might happen because of huge load average and swap activity).
 const UNREASONABLY_LONG_SLEEP: i64 = 10 * SAMPLING_PERIOD_MS;

 // Size of sample queue.  The queue contains mostly timer events, in the amount
 // determined by the clip span and the sampling rate.  It also contains other
 // events, such as OOM kills etc., whose amount is expected to be smaller than
 // the former.  Generously double the number of timer events to leave room for
 // non-timer events.
 const SAMPLE_QUEUE_LENGTH: usize =
     (CLIP_COLLECTION_SPAN_MS / 1000 * SAMPLES_PER_SECOND * 2) as usize;

 // The names of fields of interest in /proc/vmstat.  They must be listed in
 // the order in which they appear in /proc/vmstat.  When parsing the file,
 // if a mandatory field is missing, the program panics.  A missing optional
 // field (for instance, pgmajfault_f for some kernels) results in a value
 // of 0. (BAD: This works only for the last field.)
 //
 // For fields with |accumulate| = true, use the name as a prefix, and add up
 // all values with that prefix in consecutive lines.
 const VMSTAT_VALUES_COUNT: usize = 5; // Number of vmstat values we're tracking.
 #[rustfmt::skip]
 const VMSTATS: [(&str, bool, bool); VMSTAT_VALUES_COUNT] = [
     // name                 mandatory   accumulate
     ("pswpin",              true,       false),
     ("pswpout",             true,       false),
     ("pgalloc",             true,       true),   // pgalloc_dma, pgalloc_normal, etc.
     ("pgmajfault",          true,       false),
     ("pgmajfault_f",        false,      false),
 ];
 // The only difference from x86_64 is pgalloc_dma vs. pgalloc_dma32.

 // For resource manager D-Bus interface
 #[cfg(not(test))]
 const RESOURCED_SERVICE_NAME: &str = "org.chromium.ResourceManager";
 #[cfg(not(test))]
 const RESOURCED_PATH_NAME: &str = "/org/chromium/ResourceManager";
 #[cfg(not(test))]
 const RESOURCED_INTERFACE_NAME: &str = RESOURCED_SERVICE_NAME;

 #[derive(Debug)]
 pub enum Error {
     LowMemFileError(Box<dyn std::error::Error>),
     VmstatFileError(Box<std::io::Error>),
     RunnablesFileError(Box<std::io::Error>),
     AvailableFileError(Box<dyn std::error::Error>),
     CreateLogDirError(Box<std::io::Error>),
     StartingClipCounterMissingError(Box<dyn std::error::Error>),
     LogStaticParametersError(Box<dyn std::error::Error>),
     DbusWatchError(Box<dyn std::error::Error>),
     LowMemFDWatchError(Box<dyn std::error::Error>),
     LowMemWatcherError(Box<dyn std::error::Error>),
     InitSyslogError(Box<dyn std::error::Error>),
 }

 impl std::error::Error for Error {
     // This function is "soft-deprecated" so
     // we use the fmt() function from Display below.
     fn description(&self) -> &str {
         "memd_error"
     }
 }

 impl fmt::Display for Error {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match *self {
             Error::LowMemFileError(ref e) => write!(f, "cannot opening low-mem file: {}", e),
             Error::VmstatFileError(ref e) => write!(f, "cannot open vmstat: {}", e),
             Error::RunnablesFileError(ref e) => write!(f, "cannot open loadavg: {}", e),
             Error::AvailableFileError(ref e) => write!(f, "cannot open available file: {}", e),
             Error::CreateLogDirError(ref e) => write!(f, "cannot create log directory: {}", e),
             Error::StartingClipCounterMissingError(ref e) => {
                 write!(f, "cannot find starting clip counter: {}", e)
             }
             Error::LogStaticParametersError(ref e) => {
                 write!(f, "cannot log static parameters: {}", e)
             }
             Error::DbusWatchError(ref e) => write!(f, "cannot watch dbus fd: {}", e),
             Error::LowMemFDWatchError(ref e) => write!(f, "cannot watch low-mem fd: {}", e),
             Error::LowMemWatcherError(ref e) => write!(f, "cannot set low-mem watcher: {}", e),
             Error::InitSyslogError(ref e) => write!(f, "cannot init syslog: {}", e),
         }
     }
 }

 type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;

 // Converts the result of an integer expression |e| to modulo |n|. |e| may be
 // negative. This differs from plain "%" in that the result of this function
 // is always be between 0 and n-1.
 fn modulo(e: isize, n: usize) -> usize {
     let nn = n as isize;
     let x = e % nn;
     (if x >= 0 { x } else { x + nn }) as usize
 }

 // Reads a string from the file named by |path|, representing a u32, and
 // returns the value the strings it represents. If there are multiple ints
 // in the file, then it returns the first one.
 fn read_int(path: &Path) -> Result<u32> {
     let mut file = File::open(path)?;
     let mut content = String::new();
     file.read_to_string(&mut content)?;
     Ok(content
         .split_whitespace()
         .next()
         .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))?
         .parse::<u32>()?)
 }

 // The Timer trait allows us to mock time for testing.
 trait Timer {
     // A wrapper for select() with only ready-to-read fds.
     fn select(
         &mut self,
         dbus_receiver: &crossbeam_channel::Receiver<DbusEvent>,
         timeout: Duration,
     ) -> Result<bool>;
     fn now(&self) -> i64;
     fn quit_request(&self) -> bool;
     // For mocking available in test.
     fn get_available_mb(&self) -> Result<u32>;
 }

 #[cfg(not(test))]
 struct GenuineTimer {
     dbus_connection: dbus::blocking::Connection,
 }

 #[cfg(not(test))]
 impl Timer for GenuineTimer {
     // Returns current uptime (active time since boot, in milliseconds)
     fn now(&self) -> i64 {
         let ts = nix::time::clock_gettime(nix::time::ClockId::CLOCK_MONOTONIC)
             .expect("clock_gettime() failed!");
         (ts.tv_sec() * 1000 + ts.tv_nsec() / 1_000_000).into()
     }

     // Always returns false, unless testing (see MockTimer).
     fn quit_request(&self) -> bool {
         false
     }

     fn select(
         &mut self,
         dbus_receiver: &crossbeam_channel::Receiver<DbusEvent>,
         timeout: Duration,
     ) -> Result<bool> {
         let mut channel_select = crossbeam_channel::Select::new();
         let _operation = channel_select.recv(dbus_receiver);
         match channel_select.ready_timeout(timeout) {
             Ok(_) => Ok(true),
             Err(_) => Ok(false),
         }
     }

     fn get_available_mb(&self) -> Result<u32> {
         let proxy = self.dbus_connection.with_proxy(
             RESOURCED_INTERFACE_NAME,
             RESOURCED_PATH_NAME,
             Duration::from_millis(5000),
         );
         let (available,): (u64,) =
             proxy.method_call(RESOURCED_SERVICE_NAME, "GetAvailableMemoryKB", ())?;
         Ok((available / 1024) as u32)
     }
 }

 #[derive(Clone, Copy, Default)]
 struct Sample {
     uptime: i64, // system uptime in ms
     sample_type: SampleType,
     info: Sysinfo,
     runnables: u32, // number of runnable processes
     available: u32, // available RAM from low-mem notifier
     vmstat_values: [i64; VMSTAT_VALUES_COUNT],
 }

 impl Sample {
     // Outputs a sample.
     fn output(&self, out: &mut File) -> Result<()> {
         writeln!(
             out,
             "{}.{:02} {:6} {} {} {} {} {} {} {} {} {} {} {}",
             self.uptime / 1000,
             self.uptime % 1000 / 10,
             self.sample_type,
             self.info.loads_1_minute,
             self.info.freeram,
             self.info.freeswap,
             self.info.procs,
             self.runnables,
             self.available,
             self.vmstat_values[0],
             self.vmstat_values[1],
             self.vmstat_values[2],
             self.vmstat_values[3],
             self.vmstat_values[4],
         )?;
         Ok(())
     }
 }

 #[derive(Copy, Clone, Default)]
 struct Sysinfo {
     loads_1_minute: f64,
     freeram: u64,
     freeswap: u64,
     procs: u16,
 }

 fn sysinfo() -> Result<Sysinfo> {
     let sysinfo_result = nix::sys::sysinfo::sysinfo()?;
     Ok(Sysinfo {
         loads_1_minute: sysinfo_result.load_average().0,
         freeram: sysinfo_result.ram_unused(),
         freeswap: sysinfo_result.swap_free(),
         procs: sysinfo_result.process_count(),
     })
 }

 impl Sysinfo {
     // Fakes sysinfo for testing.
     fn fake_sysinfo() -> Result<Sysinfo> {
         Ok(Sysinfo {
             loads_1_minute: 5.0,
             freeram: 42_000_000,
             freeswap: 84_000_000,
             procs: 1234,
         })
     }
 }

 struct SampleQueue {
     samples: [Sample; SAMPLE_QUEUE_LENGTH],
     head: usize,  // points to latest entry
     count: usize, // count of valid entries (min=0, max=SAMPLE_QUEUE_LENGTH)
 }

 impl SampleQueue {
     fn new() -> SampleQueue {
         let s: Sample = Default::default();
         SampleQueue {
             samples: [s; SAMPLE_QUEUE_LENGTH],
             head: 0,
             count: 0,
         }
     }

     // Returns self.head as isize, to make index calculations behave correctly
     // on underflow.
     fn ihead(&self) -> isize {
         self.head as isize
     }

     fn reset(&mut self) {
         self.head = 0;
         self.count = 0;
     }

     // Returns the next slot in the queue.  Always succeeds, since on overflow
     // it discards the LRU slot.
     fn next_slot(&mut self) -> &mut Sample {
         let slot = self.head;
         self.head = modulo(self.ihead() + 1, SAMPLE_QUEUE_LENGTH);
         if self.count < SAMPLE_QUEUE_LENGTH {
             self.count += 1;
         }
         &mut self.samples[slot]
     }

     fn sample(&self, i: isize) -> &Sample {
         assert!(i >= 0);
         // Subtract 1 because head points to the next free slot.
         assert!(
             modulo(self.ihead() - 1 - i, SAMPLE_QUEUE_LENGTH) <= self.count,
             "bad queue index: i {}, head {}, count {}, queue len {}",
             i,
             self.head,
             self.count,
             SAMPLE_QUEUE_LENGTH
         );
         &self.samples[i as usize]
     }

     // Outputs to file |f| samples from |start_time| to the head.  Uses a start
     // time rather than a start index because when we start a clip we have to
     // do a time-based search anyway.
     fn output_from_time(&self, f: &mut File, start_time: i64) -> Result<()> {
         // For now just do a linear search. ;)
         let mut start_index = modulo(self.ihead() - 1, SAMPLE_QUEUE_LENGTH);
         debug!(
             "output_from_time: start_time {}, head {}",
             start_time, start_index
         );
         loop {
             let sample = self.samples[start_index];
             // Ignore samples from external sources because their time stamp may be off.
             if sample.uptime <= start_time && sample.sample_type.has_internal_timestamp() {
                 break;
             }
             debug!(
                 "output_from_time: seeking uptime {}, index {}",
                 sample.uptime, start_index
             );
             start_index = modulo(start_index as isize - 1, SAMPLE_QUEUE_LENGTH);
             if modulo(self.ihead() - 1 - start_index as isize, SAMPLE_QUEUE_LENGTH) > self.count {
                 warn!("too many events in requested interval");
                 break;
             }
         }

         let mut index = modulo(start_index as isize + 1, SAMPLE_QUEUE_LENGTH) as isize;
         while index != self.ihead() {
             debug!("output_from_time: outputting index {}", index);
             self.sample(index).output(f)?;
             index = modulo(index + 1, SAMPLE_QUEUE_LENGTH) as isize;
         }
         Ok(())
     }
 }

 // Returns the number of processes currently runnable (running or on ready queue).
 // Rule of thumb:
 // runnable / CPU_count < 3, CPU loading is not high.
 // runnable / CPU_count > 5, CPU loading is very high.
 fn get_runnables() -> Result<u32> {
     Ok(parse_runnables(LoadAverage::new()?))
 }

 fn parse_runnables(load_average: LoadAverage) -> u32 {
     load_average.cur
 }

 fn get_vmstats() -> Result<[i64; VMSTAT_VALUES_COUNT]> {
     let vmstats = procfs::vmstat()?;
     parse_vmstats(&vmstats)
 }

 fn parse_vmstats(vmstats: &HashMap<String, i64>) -> Result<[i64; VMSTAT_VALUES_COUNT]> {
     let mut result = [0i64; VMSTAT_VALUES_COUNT];
     for (i, &(field_name, mandatory, accumulate)) in VMSTATS.iter().enumerate() {
         if accumulate {
             for (sub_field_name, value) in vmstats {
                 if sub_field_name.starts_with(field_name) {
                     result[i] += value;
                 }
             }
         } else {
             match vmstats.get(field_name) {
                 Some(value) => result[i] = *value,
                 None => {
                     if mandatory {
                         return Err(format!("vmstat: missing value: {}", field_name).into());
                     }
                 }
             }
         }
     }
     Ok(result)
 }

 struct Watermarks {
     min: u32,
     low: u32,
     high: u32,
 }

 struct ZoneinfoFile(File);

 impl ZoneinfoFile {
     // Computes and returns the watermark values from /proc/zoneinfo.
     fn read_watermarks(&mut self) -> Result<Watermarks> {
         let mut min = 0;
         let mut low = 0;
         let mut high = 0;
         let mut content = String::new();
         self.0.read_to_string(&mut content)?;
         for line in content.lines() {
             let items = line.split_whitespace().collect::<Vec<_>>();
             match items[0] {
                 "min" => min += items[1].parse::<u32>()?,
                 "low" => low += items[1].parse::<u32>()?,
                 "high" => high += items[1].parse::<u32>()?,
                 _ => {}
             }
         }
         Ok(Watermarks { min, low, high })
     }
 }

 enum DbusEvent {
     TabDiscard { time: i64 },
     OomKill { time: i64 },
     OomKillKernel { time: i64 },
     CriticalMemoryPressure,
 }

 // The main object.
 struct Sampler<'a> {
     always_poll_fast: bool, // When true, program stays in fast poll mode.
     paths: &'a Paths,       // Paths of files used by the program.
     dbus_receiver: crossbeam_channel::Receiver<DbusEvent>,
     low_mem_margin_mb: u32, // Low-memory margin, assumed to remain constant in a boot session.
     sample_header: String,  // The text at the beginning of each clip file.
     clip_counter: usize,    // Index of next clip file (also part of file name).
     sample_queue: SampleQueue, // A running queue of samples of vm quantities.
     current_available: u32, // Amount of "available" memory (in MB) at last reading.
     current_time: i64,      // Wall clock in ms at last reading.
     collecting: bool,       // True if we're in the middle of collecting a clip.
     timer: Box<dyn Timer>,  // Real or mock timer.
     quit_request: bool,     // Signals a quit-and-restart request when testing.
 }

 impl<'a> Sampler<'a> {
     fn new(
         always_poll_fast: bool,
         paths: &'a Paths,
         timer: Box<dyn Timer>,
         dbus_receiver: crossbeam_channel::Receiver<DbusEvent>,
         low_mem_margin_mb: u32,
     ) -> Result<Sampler> {
         let mut low_mem_file_flags = OpenOptions::new();
         low_mem_file_flags.custom_flags(libc::O_NONBLOCK);
         low_mem_file_flags.read(true);

         let sample_header = build_sample_header();

         let mut sampler = Sampler {
             always_poll_fast,
             dbus_receiver,
             low_mem_margin_mb,
             paths,
             sample_header,
             sample_queue: SampleQueue::new(),
             clip_counter: 0,
             collecting: false,
             current_available: 0,
             current_time: 0,
             timer,
             quit_request: false,
         };
         sampler
             .find_starting_clip_counter()
             .map_err(Error::StartingClipCounterMissingError)?;
         sampler
             .log_static_parameters(low_mem_margin_mb)
             .map_err(Error::LogStaticParametersError)?;
         Ok(sampler)
     }

     // Refresh cached time.  This should be called after system calls, which
     // can potentially block, but not if current_time is unused before the next call.
     fn refresh_time(&mut self) {
         self.current_time = self.timer.now();
     }

     // Collect a sample using the latest time snapshot.
     fn enqueue_sample(&mut self, sample_type: SampleType) -> Result<()> {
         let time = self.current_time; // to pacify the borrow checker
         self.enqueue_sample_at_time(sample_type, time)
     }

     // Collect a sample with an externally-generated time stamp.
     fn enqueue_sample_external(&mut self, sample_type: SampleType, time: i64) -> Result<()> {
         assert!(!sample_type.has_internal_timestamp());
         self.enqueue_sample_at_time(sample_type, time)
     }

     // Collects a sample of memory manager stats and adds it to the sample
     // queue, possibly overwriting an old value.  |sample_type| indicates the
     // type of sample, and |time| the system uptime at the time the sample was
     // collected.
     fn enqueue_sample_at_time(&mut self, sample_type: SampleType, time: i64) -> Result<()> {
         {
             let sample: &mut Sample = self.sample_queue.next_slot();
             sample.uptime = time;
             sample.sample_type = sample_type;
             sample.available = self.current_available;
             sample.runnables = get_runnables()?;
             sample.info = if cfg!(test) {
                 Sysinfo::fake_sysinfo()?
             } else {
                 sysinfo()?
             };
             sample.vmstat_values = get_vmstats()?;
         }
         self.refresh_time();
         Ok(())
     }

     // Creates or overwrites a file in the memd log directory containing
     // quantities of interest.
     fn log_static_parameters(&self, low_mem_margin_mb: u32) -> Result<()> {
         let out = &mut OpenOptions::new()
             .write(true)
             .create(true)
             .truncate(true)
             .open(&self.paths.static_parameters)?;
         fprint_datetime(out)?;
         writeln!(out, "margin {}", low_mem_margin_mb)?;

         let psv = &self.paths.procsysvm;
         log_from_procfs(out, psv, "min_filelist_kbytes")?;
         log_from_procfs(out, psv, "min_free_kbytes")?;
         log_from_procfs(out, psv, "extra_free_kbytes")?;

         let mut zoneinfo = ZoneinfoFile(File::open(&self.paths.zoneinfo)?);
         let watermarks = zoneinfo.read_watermarks()?;
         writeln!(out, "min_water_mark_kbytes {}", watermarks.min * 4)?;
         writeln!(out, "low_water_mark_kbytes {}", watermarks.low * 4)?;
         writeln!(out, "high_water_mark_kbytes {}", watermarks.high * 4)?;
         Ok(())
     }

     // Returns true if the program should go back to slow-polling mode (or stay
     // in that mode).  Returns false otherwise.  Relies on |self.collecting|
     // and |self.current_available| being up-to-date.
     fn should_poll_slowly(&self) -> bool {
         !self.collecting
             && !self.always_poll_fast
             && self.low_mem_margin_mb > 0
             && self.current_available > self.low_mem_margin_mb + LOW_MEM_DANGER_THRESHOLD_MB
     }

     // Sits mostly idle and checks available RAM at low frequency.  Returns
     // when available memory gets "close enough" to the tab discard margin.
     fn slow_poll(&mut self) -> Result<()> {
         debug!("entering slow poll at {}", self.current_time);
         // Idiom for do ... while.
         while {
             let event_ready = self
                 .timer
                 .select(&self.dbus_receiver, SLOW_POLL_PERIOD_DURATION)?;
             debug!("event_ready: {} at {}", event_ready, self.timer.now());
             self.quit_request = self.timer.quit_request();
             self.current_available = self.timer.get_available_mb()?;
             self.refresh_time();
             self.should_poll_slowly() && !self.quit_request && !event_ready
         } {}
         Ok(())
     }

     // Collects timer samples at fast rate.  Also collects event samples.
     // Samples contain various system stats related to kernel memory
     // management.  The samples are placed in a circular buffer.  When
     // something "interesting" happens, (for instance a tab discard, or a
     // kernel OOM-kill) the samples around that event are saved into a "clip
     // file".
     fn fast_poll(&mut self) -> Result<()> {
         let mut earliest_start_time = self.current_time;
         debug!("entering fast poll at {}", earliest_start_time);

         // Collect the first timer sample immediately upon entering.
         self.enqueue_sample(SampleType::Timer)?;
         // Keep track if we're in a low-mem state.  Initially assume we are
         // not.
         let mut was_in_low_mem = false;
         // |clip_{start,end}_time| are the collection start and end time for
         // the current clip.  Their value is valid only when |self.collecting|
         // is true.
         let mut clip_start_time = self.current_time;
         let mut clip_end_time = self.current_time;
         // |final_collection_time| indicates when we should stop collecting
         // samples for any clip (either the current one, or the next one).  Its
         // value is valid only when |self.collecting| is true.
         let mut final_collection_time = self.current_time;

         // |self.collecting| is true when we're in the middle of collecting a clip
         // because something interesting has happened.
         self.collecting = false;

         // Poll/select loop.
         loop {
             // Assume event is not interesting (since most events
             // aren't). Change this to true for some event types.
             let mut event_is_interesting = false;

             let watch_start_time = self.current_time;
             let event_ready = self
                 .timer
                 .select(&self.dbus_receiver, FAST_POLL_PERIOD_DURATION)?;
             // Check for dbus events.
             while let Ok(dbus_event) = self.dbus_receiver.try_recv() {
                 let (sample_type, sample_time) = match dbus_event {
                     DbusEvent::TabDiscard { time } => (SampleType::TabDiscard, time),
                     DbusEvent::OomKill { time } => (SampleType::OomKillBrowser, time),
                     DbusEvent::OomKillKernel { time } => (SampleType::OomKillKernel, time),
                     DbusEvent::CriticalMemoryPressure => (SampleType::Uninitialized, 0),
                 };
                 if sample_type != SampleType::Uninitialized {
                     debug!("enqueue {:?}, {:?}", sample_type, sample_time);
                     self.enqueue_sample_external(sample_type, sample_time)?;
                 }
                 event_is_interesting = true;
             }
             self.quit_request = self.timer.quit_request();
             self.current_available = self.timer.get_available_mb()?;
             let in_low_mem = self.current_available < self.low_mem_margin_mb;
             self.refresh_time();

             // Record a sample when we sleep too long.  Such samples are
             // somewhat redundant as they could be deduced from the log, but we
             // wish to make it easier to detect such (rare, we hope)
             // occurrences.
             if watch_start_time > self.current_time + UNREASONABLY_LONG_SLEEP {
                 warn!(
                     "woke up at {} after unreasonable {} sleep",
                     self.current_time,
                     self.current_time - watch_start_time
                 );
                 self.enqueue_sample(SampleType::Sleeper)?;
             }

             if was_in_low_mem && !in_low_mem {
                 // Refresh time since we may have blocked.  (That should
                 // not happen often because currently the run times between
                 // sleeps are well below the minimum timeslice.)
                 self.current_time = self.timer.now();
                 debug!("leaving low mem at {}", self.current_time);
                 was_in_low_mem = false;
                 self.enqueue_sample(SampleType::LeaveLowMem)?;
             }

             if !event_ready {
                 // Timer event.
                 self.enqueue_sample(SampleType::Timer)?;
             } else {
                 // See comment above about watching low_mem.
                 let low_mem_has_fired = !was_in_low_mem && in_low_mem;
                 if low_mem_has_fired {
                     debug!("entering low mem at {}", self.current_time);
                     was_in_low_mem = true;
                     self.enqueue_sample(SampleType::EnterLowMem)?;
                     // Make this interesting at least until chrome events are
                     // plumbed, maybe permanently.
                     event_is_interesting = true;
                 }
             }

             // Arrange future saving of samples around interesting events.
             if event_is_interesting {
                 // Update the time intervals to ensure we include all samples
                 // of interest in a clip.  If we're not in the middle of
                 // collecting a clip, start one.  If we're in the middle of
                 // collecting a clip which can be extended, do that.
                 final_collection_time = self.current_time + COLLECTION_DELAY_MS;
                 if self.collecting {
                     // Check if the current clip can be extended.
                     if clip_end_time < clip_start_time + CLIP_COLLECTION_SPAN_MS {
                         clip_end_time =
                             final_collection_time.min(clip_start_time + CLIP_COLLECTION_SPAN_MS);
                         debug!("extending clip to {}", clip_end_time);
                     }
                 } else {
                     // Start the clip collection.
                     self.collecting = true;
                     clip_start_time =
                         earliest_start_time.max(self.current_time - COLLECTION_DELAY_MS);
                     clip_end_time = self.current_time + COLLECTION_DELAY_MS;
                     debug!(
                         "starting new clip from {} to {}",
                         clip_start_time, clip_end_time
                     );
                 }
             }

             // Check if it is time to save the samples into a file.
             if self.collecting && self.current_time > clip_end_time - SAMPLING_PERIOD_MS {
                 // Save the clip to disk.
                 debug!(
                     "[{}] saving clip: ({} ... {}), final {}",
                     self.current_time, clip_start_time, clip_end_time, final_collection_time
                 );
                 let res = self.save_clip(clip_start_time);
                 // Don't panic if there's an error writing to disk, log it instead.
                 if res.is_err() {
                     warn!("Error saving clip: {:?}", res);
                 }
                 self.collecting = false;
                 earliest_start_time = clip_end_time;
                 // Need to schedule another collection?
                 if final_collection_time > clip_end_time {
                     // Continue collecting by starting a new clip.  Note that
                     // the clip length may be less than CLIP_COLLECTION_SPAN.
                     // This happens when event spans overlap, and also if we
                     // started fast sample collection just recently.
                     assert!(final_collection_time <= clip_end_time + CLIP_COLLECTION_SPAN_MS);
                     clip_start_time = clip_end_time;
                     clip_end_time = final_collection_time;
                     self.collecting = true;
                     debug!(
                         "continue collecting with new clip ({} {})",
                         clip_start_time, clip_end_time
                     );
                     // If we got stuck in the select() for a very long time
                     // because of system slowdown, it may be time to collect
                     // this second clip as well.  But we don't bother, since
                     // this is unlikely, and we can collect next time around.
                     if self.current_time > clip_end_time {
                         debug!(
                             "heavy slowdown: postponing collection of ({}, {})",
                             clip_start_time, clip_end_time
                         );
                     }
                 }
             }
             if self.should_poll_slowly() || (self.quit_request && !self.collecting) {
                 break;
             }
         }
         Ok(())
     }

     // Returns the clip file pathname to be used after the current one,
     // and advances the clip counter.
     fn next_clip_path(&mut self) -> PathBuf {
         let name = format!("memd.clip{:03}.log", self.clip_counter);
         self.clip_counter = modulo(self.clip_counter as isize + 1, MAX_CLIP_COUNT);
         self.paths.log_directory.join(name)
     }

     // Finds and sets the starting value for the clip counter in this session.
     // The goal is to preserve the most recent clips (if any) from previous
     // sessions.
     fn find_starting_clip_counter(&mut self) -> Result<()> {
         self.clip_counter = 0;
         let mut previous_time = std::time::UNIX_EPOCH;
         loop {
             let path = self.next_clip_path();
             if !path.exists() {
                 break;
             }
             let md = std::fs::metadata(path)?;
             let time = md.modified()?;
             if time < previous_time {
                 break;
             } else {
                 previous_time = time;
             }
         }
         // We found the starting point, but the counter has already advanced so we
         // need to backtrack one step.
         self.clip_counter = modulo(self.clip_counter as isize - 1, MAX_CLIP_COUNT);
         Ok(())
     }

     // Stores samples of interest in a new clip log, and remove those samples
     // from the queue.
     fn save_clip(&mut self, start_time: i64) -> Result<()> {
         let path = self.next_clip_path();
         let out = &mut OpenOptions::new()
             .write(true)
             .create(true)
             .truncate(true)
             .open(path)?;

         // Print courtesy header.  The first line is the current time.  The
         // second line lists the names of the variables in the following lines,
         // in the same order.
         fprint_datetime(out)?;
         out.write_all(self.sample_header.as_bytes())?;
         // Output samples from |start_time| to the head.
         self.sample_queue.output_from_time(out, start_time)?;
         // The queue is now empty.
         self.sample_queue.reset();
         Ok(())
     }
 }

 // Prints |name| and value of entry /pros/sys/vm/|name| (or 0, if the entry is
 // missing) to file |out|.
 fn log_from_procfs(out: &mut File, dir: &Path, name: &str) -> Result<()> {
     let procfs_path = dir.join(name);
     let value = read_int(&procfs_path).unwrap_or(0);
     writeln!(out, "{} {}", name, value)?;
     Ok(())
 }

 // Outputs readable date and time to file |out|.
 fn fprint_datetime(out: &mut File) -> Result<()> {
     let local: DateTime<Local> = Local::now();
     writeln!(out, "{}", local)?;
     Ok(())
 }

 #[derive(Copy, Clone, Debug, Default, PartialEq)]
 enum SampleType {
     EnterLowMem,    // Entering low-memory state, from the kernel low-mem notifier.
     LeaveLowMem,    // Leaving low-memory state, from the kernel low-mem notifier.
     OomKillBrowser, // Chrome browser letting us know it detected OOM kill.
     OomKillKernel,  // Anomaly detector letting us know it detected OOM kill.
     Sleeper,        // Memd was not running for a long time.
     TabDiscard,     // Chrome browser letting us know about a tab discard.
     Timer,          // Event was produced after FAST_POLL_PERIOD_DURATION with no other events.
     #[default]
     Uninitialized, // Internal use.
 }

 impl fmt::Display for SampleType {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "{}", self.name())
     }
 }

 impl SampleType {
     // Returns true if the timestamp for this sample type is generated
     // internally.  This knowledge is used in outputting samples to clip files,
     // because the timestamps of samples generated externally may be skewed.
     fn has_internal_timestamp(&self) -> bool {
         !matches!(
             self,
             &SampleType::TabDiscard | &SampleType::OomKillBrowser | &SampleType::OomKillKernel
         )
     }
 }

 impl SampleType {
     // Returns the 6-character(max) identifier for a sample type.
     fn name(&self) -> &'static str {
         match *self {
             SampleType::EnterLowMem => "lowmem",
             SampleType::LeaveLowMem => "lealow",
             SampleType::OomKillBrowser => "oomkll", // OOM from chrome
             SampleType::OomKillKernel => "keroom",  // OOM from kernel
             SampleType::Sleeper => "sleepr",
             SampleType::TabDiscard => "discrd",
             SampleType::Timer => "timer",
             SampleType::Uninitialized => "UNINIT",
         }
     }
 }

 // Path names of various system files, mostly in /proc, /sys, and /dev.  They
 // are collected into this struct because they get special values when testing.
 #[derive(Clone)]
 pub struct Paths {
     log_directory: PathBuf,
     static_parameters: PathBuf,
     zoneinfo: PathBuf,
     procsysvm: PathBuf,
     testing_root: PathBuf,
 }

 // Returns a file name that replaces |name| when testing.
 fn test_filename(testing: bool, testing_root: &str, name: &str) -> String {
     if testing {
         testing_root.to_string() + name
     } else {
         name.to_owned()
     }
 }

 // This macro constructs a "normal" Paths object when |testing| is false, and
 // one that mimics a root filesystem in a temporary directory when |testing| is
 // true.
 macro_rules! make_paths {
     ($testing:expr, $root:expr,
      $($name:ident : $e:expr,)*
     ) => (
         Paths {
             testing_root: PathBuf::from($root),
             $($name: PathBuf::from(test_filename($testing, $root, &($e).to_string()))),*
         }
     )
 }

 fn build_sample_header() -> String {
     let mut s = "uptime type load freeram freeswap procs runnables available".to_string();
     for vmstat in VMSTATS {
         s = s + " " + vmstat.0;
     }
     s + "\n"
 }

 fn main() -> Result<()> {
     let mut always_poll_fast = false;
     let mut debug_log = false;

     libchromeos::panic_handler::install_memfd_handler();
     let args: Vec<String> = std::env::args().collect();
     for arg in &args[1..] {
         match arg.as_ref() {
             "always-poll-fast" => always_poll_fast = true,
             "debug-log" => debug_log = true,
             _ => panic!("usage: memd [always-poll-fast|debug-log]*"),
         }
     }

     let log_level = if debug_log {
         log::LevelFilter::Debug
     } else {
         log::LevelFilter::Warn
     };
     syslog::init_unix(syslog::Facility::LOG_USER, log_level)
         .map_err(|e| Error::InitSyslogError(Box::new(e)))?;

     // Unlike log!(), warn!() etc., panic!() is not redirected by the syslog
     // facility, instead always goes to stderr, which can get lost.  Here we
     // provide our own panic hook to make sure that the panic message goes to
     // the syslog as well.
     let default_panic = std::panic::take_hook();
     std::panic::set_hook(Box::new(move |panic_info| {
         error!("memd: {}", panic_info);
         default_panic(panic_info);
     }));

     warn!("memd started");
     run_memory_daemon(always_poll_fast)
 }

 // Creates a directory for testing, if testing.  Otherwise
 // returns None
 fn make_testing_dir() -> Option<TempDir> {
     if cfg!(test) {
         Some(TempDir::new().expect("cannot create temp dir"))
     } else {
         None
     }
 }

 fn get_paths(root: Option<TempDir>) -> Paths {
     // make_paths! returns a Paths object initializer with these fields.
     let testing_root = match root {
         Some(tmpdir) => tmpdir.path().to_str().unwrap().to_string(),
         None => "/".to_string(),
     };
     make_paths!(
         cfg!(test),
         &testing_root,
         log_directory:     LOG_DIRECTORY,
         static_parameters: LOG_DIRECTORY.to_string() + "/" + STATIC_PARAMETERS_LOG,
         zoneinfo:          "/proc/zoneinfo",
         procsysvm:         "/proc/sys/vm",
     )
 }

 // Receive D-Bus events and resend via channel.
 #[cfg(not(test))]
 fn receive_dbus_events(sender: crossbeam_channel::Sender<DbusEvent>) -> Result<()> {
     let connection = Connection::get_private(BusType::System)?;
     let _m = connection.add_match(concat!(
         "type='signal',",
         "interface='org.chromium.AnomalyEventServiceInterface',",
         "member='AnomalyEvent'"
     ));
     let _m = connection.add_match(concat!(
         "type='signal',",
         "interface='org.chromium.MetricsEventServiceInterface',",
         "member='ChromeEvent'"
     ));
     let _m = connection.add_match(concat!(
         "type='signal',",
         "interface='org.chromium.ResourceManager',",
         "member='MemoryPressureChrome'"
     ));

     let mut watch_fdset = select::FdSet::new();
     for fd in connection.watch_fds() {
         watch_fdset.insert(fd.fd());
     }
     let highest = watch_fdset.highest().ok_or("The fd set is empty")?;

     loop {
         let mut inout_fdset = watch_fdset;
         let _n = select::select(highest + 1, &mut inout_fdset, None, None, None)?;

         for fd in inout_fdset.fds(None) {
             let handle = connection.watch_handle(fd, WatchEvent::Readable as libc::c_uint);
             for connection_item in handle {
                 // Only consider signals.
                 if let ConnectionItem::Signal(ref message) = connection_item {
                     // Only consider signals with "ChromeEvent" or "AnomalyEvent" members.
                     if let Some(member) = message.member() {
                         if &*member == "ChromeEvent" || &*member == "AnomalyEvent" {
                             // Read first item in signal message as byte blob and
                             // parse blob into protobuf.
                             let raw_buffer: Vec<u8> = message.read1()?;
                             let mut protobuf = metrics_event::Event::new();
                             protobuf.merge_from_bytes(&raw_buffer)?;

                             let event_type = protobuf.type_.enum_value_or_default();
                             let time_stamp = protobuf.timestamp;
                             match event_type {
                                 metrics_event::event::Type::TAB_DISCARD => {
                                     sender.send(DbusEvent::TabDiscard { time: time_stamp })?;
                                 }
                                 metrics_event::event::Type::OOM_KILL => {
                                     sender.send(DbusEvent::OomKill { time: time_stamp })?;
                                 }
                                 metrics_event::event::Type::OOM_KILL_KERNEL => {
                                     sender.send(DbusEvent::OomKillKernel { time: time_stamp })?;
                                 }
                                 _ => {
                                     warn!("unknown event type {:?}", event_type);
                                 }
                             }
                         } else if &*member == "MemoryPressureChrome" {
                             let pressure_level: u8 = message.read1()?;
                             const PRESSURE_LEVEL_CHROME_CRITICAL: u8 = 2;
                             if pressure_level == PRESSURE_LEVEL_CHROME_CRITICAL {
                                 sender.send(DbusEvent::CriticalMemoryPressure)?;
                             }
                         } else if &*member != "NameAcquired" {
                             // Do not report spurious "NameAcquired" signal to avoid spam.
                             warn!("unexpected dbus signal member {}", &*member);
                         }
                     }
                 }
             }
         }
     }
 }

 // Get the memory margin by calling resource manager D-Bus method.
 #[cfg(not(test))]
 fn get_memory_margin_mb() -> Result<u32> {
     let conn = dbus::blocking::Connection::new_system()?;
     let proxy = conn.with_proxy(
         RESOURCED_INTERFACE_NAME,
         RESOURCED_PATH_NAME,
         Duration::from_millis(5000),
     );
     let (critical, _moderate): (u64, u64) =
         proxy.method_call(RESOURCED_SERVICE_NAME, "GetMemoryMarginsKB", ())?;
     Ok((critical / 1024) as u32)
 }

 fn run_memory_daemon(always_poll_fast: bool) -> Result<()> {
     let test_dir_option = make_testing_dir();
     let paths = get_paths(test_dir_option);
     debug!("Using root: {}", paths.testing_root.display());

     #[cfg(test)]
     {
         test::setup_test_environment(&paths);
         let var_log = &paths.log_directory.parent().unwrap();
         std::fs::create_dir_all(var_log).map_err(|e| Error::CreateLogDirError(Box::new(e)))?;
     }

     // Make sure /var/log/memd exists.  Create it if not.  Assume /var/log
     // exists.  Panic on errors.
     if !paths.log_directory.exists() {
         create_dir(&paths.log_directory).map_err(|e| Error::CreateLogDirError(Box::new(e)))?
     }

     #[cfg(test)]
     {
         test::test_loop(always_poll_fast, &paths);
         test::teardown_test_environment(&paths);
         Ok(())
     }

     #[cfg(not(test))]
     {
         let timer = Box::new(GenuineTimer {
             dbus_connection: dbus::blocking::Connection::new_system()?,
         });
         let (send, recv) = crossbeam_channel::unbounded();
         let _sender_thread = std::thread::spawn(move || {
             let _ = receive_dbus_events(send);
         });
         let mut sampler = Sampler::new(
             always_poll_fast,
             &paths,
             timer,
             recv,
             get_memory_margin_mb()?,
         )?;
         loop {
             // Run forever, alternating between slow and fast poll.
             sampler.slow_poll()?;
             sampler.fast_poll()?;
         }
     }
 }

 #[cfg(test)]
 mod tests {
     use super::*;

     #[test]
     fn test_parse_vmstats() {
         let vmstats: HashMap<String, i64> = HashMap::from([
             ("noop".to_string(), 600),
             ("pswpin".to_string(), 100),
             ("pswpout".to_string(), 200),
             ("pgalloc_dma".to_string(), 1000),
             ("pgalloc_dma32".to_string(), 2000),
             ("pgalloc_normal".to_string(), 3000),
             ("pgalloc_movable".to_string(), 4000),
             ("pgmajfault".to_string(), 1500),
             ("pgmajfault_f".to_string(), 550),
         ]);
         let result = parse_vmstats(&vmstats).unwrap();
         assert_eq!(result[0], 100); // pswpin
         assert_eq!(result[1], 200); // pswpout
         assert_eq!(result[2], 10000); // paalloc
         assert_eq!(result[3], 1500); // pgmajfault
         assert_eq!(result[4], 550); // pgmajfault_f
     }

     #[test]
     fn test_parse_runnables() {
         // See also https://docs.kernel.org/filesystems/proc.html for field values of
         // /proc/loadavg.
         let load_average =
             LoadAverage::from_reader("3.15 2.15 1.15 5/990 1270".as_bytes()).unwrap();
         let runnables = parse_runnables(load_average);
         assert_eq!(runnables, 5);
     }

     /// Regression test for https://crbug.com/1058463. Ensures that output_from_time doesn't read
     /// samples outside of the valid range.
     #[test]
     fn queue_loop_test() {
         test::queue_loop();
     }

     #[test]
     fn memory_daemon_test() {
         env_logger::init();
         run_memory_daemon(false).expect("run_memory_daemon error");
     }
 }