| # Copyright 2016 The ChromiumOS Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """System metrics.""" |
| |
| import errno |
| import logging |
| import os |
| import time |
| |
| import psutil # pylint: disable=import-error |
| |
| from chromite.lib import metrics |
| |
| |
| logger = logging.getLogger(__name__) |
| |
| |
| _cpu_count_metric = metrics.GaugeMetric( |
| "dev/cpu/count", description="Number of CPU cores." |
| ) |
| _cpu_time_metric = metrics.FloatMetric( |
| "dev/cpu/time", |
| description="percentage of time spent by the CPU " "in different states.", |
| ) |
| |
| _disk_free_metric = metrics.GaugeMetric( |
| "dev/disk/free", description="Available bytes on disk partition." |
| ) |
| _disk_total_metric = metrics.GaugeMetric( |
| "dev/disk/total", description="Total bytes on disk partition." |
| ) |
| |
| _inodes_free_metric = metrics.GaugeMetric( |
| "dev/inodes/free", |
| description="Number of available inodes on " "disk partition (unix only).", |
| ) |
| _inodes_total_metric = metrics.GaugeMetric( |
| "dev/inodes/total", |
| description="Number of possible inodes on " "disk partition (unix only)", |
| ) |
| |
| _mem_free_metric = metrics.GaugeMetric( |
| "dev/mem/free", |
| description="Amount of memory available to a " |
| "process (in Bytes). Buffers are considered " |
| "free memory.", |
| ) |
| |
| _mem_total_metric = metrics.GaugeMetric( |
| "dev/mem/total", description="Total physical memory in Bytes." |
| ) |
| |
| _BOOT_TIME = psutil.boot_time() |
| |
| _disk_read_metric = metrics.CounterMetric( |
| "dev/disk/read", |
| start_time=_BOOT_TIME, |
| description="Number of Bytes read on disk.", |
| ) |
| _disk_write_metric = metrics.CounterMetric( |
| "dev/disk/write", |
| start_time=_BOOT_TIME, |
| description="Number of Bytes written on disk.", |
| ) |
| |
| _uptime_metric = metrics.GaugeMetric( |
| "dev/uptime", description="Machine uptime, in seconds." |
| ) |
| |
| _load_average_metric = metrics.FloatMetric( |
| "dev/proc/load_average", |
| description="Number of processes currently " "in the system run queue.", |
| ) |
| |
| # ts_mon pipeline uses backend clocks when assigning timestamps to metric |
| # points. By comparing point timestamp to the point value (i.e. time by |
| # machine's local clock), we can potentially detect some anomalies (clock |
| # drift, unusually high metrics pipeline delay, completely wrong clocks, etc). |
| # |
| # It is important to gather this metric right before the flush. |
| _unix_time_metric = metrics.GaugeMetric( |
| "dev/unix_time", |
| description="Number of milliseconds since epoch" |
| " based on local machine clock.", |
| ) |
| |
| _os_name_metric = metrics.StringMetric( |
| "proc/os/name", description="OS name on the machine" |
| ) |
| |
| _os_version_metric = metrics.StringMetric( |
| "proc/os/version", description="OS version on the machine" |
| ) |
| |
| _os_arch_metric = metrics.StringMetric( |
| "proc/os/arch", description="OS architecture on this machine" |
| ) |
| |
| _python_arch_metric = metrics.StringMetric( |
| "proc/python/arch", |
| description="python userland " "architecture on this machine", |
| ) |
| |
| |
| def collect_uptime() -> None: |
| _uptime_metric.set(int(time.time() - _BOOT_TIME)) |
| |
| |
| def collect_cpu_info() -> None: |
| _cpu_count_metric.set(psutil.cpu_count()) |
| |
| times = psutil.cpu_times_percent() |
| for mode in ("user", "system", "idle"): |
| _cpu_time_metric.set(getattr(times, mode), {"mode": mode}) |
| |
| |
| def collect_disk_info(mountpoints=None) -> None: |
| if mountpoints is None: |
| mountpoints = [disk.mountpoint for disk in psutil.disk_partitions()] |
| for mountpoint in mountpoints: |
| _collect_disk_info_single(mountpoint) |
| _collect_fs_inode_info(mountpoint) |
| _collect_disk_io_info() |
| |
| |
| def _collect_disk_info_single(mountpoint) -> None: |
| fields = {"path": mountpoint} |
| |
| try: |
| usage = psutil.disk_usage(mountpoint) |
| except OSError as ex: |
| if ex.errno == errno.ENOENT: |
| # This happens on Windows when querying a removable drive that |
| # doesn't have any media inserted right now. |
| pass |
| else: |
| raise |
| else: |
| _disk_free_metric.set(usage.free, fields=fields) |
| _disk_total_metric.set(usage.total, fields=fields) |
| |
| # inode counts are only available on Unix. |
| if os.name == "posix": |
| _collect_fs_inode_info(mountpoint) |
| |
| |
| def _collect_fs_inode_info(mountpoint) -> None: |
| fields = {"path": mountpoint} |
| stats = os.statvfs(mountpoint) |
| _inodes_free_metric.set(stats.f_favail, fields=fields) |
| _inodes_total_metric.set(stats.f_files, fields=fields) |
| |
| |
| def _collect_disk_io_info() -> None: |
| try: |
| disk_counters = psutil.disk_io_counters(perdisk=True).items() |
| except RuntimeError as ex: |
| if "couldn't find any physical disk" in str(ex): |
| # Disk performance counters aren't enabled on Windows. |
| pass |
| else: |
| raise |
| else: |
| for disk, counters in disk_counters: |
| fields = {"disk": disk} |
| _disk_read_metric.set(counters.read_bytes, fields=fields) |
| _disk_write_metric.set(counters.write_bytes, fields=fields) |
| |
| |
| def collect_mem_info() -> None: |
| # We don't report mem.used because (due to virtual memory) it is not |
| # useful. |
| mem = psutil.virtual_memory() |
| _mem_free_metric.set(mem.available) |
| _mem_total_metric.set(mem.total) |
| |
| |
| def collect_load_avg() -> None: |
| try: |
| avg1, avg5, avg15 = os.getloadavg() |
| except OSError: |
| pass |
| else: |
| _load_average_metric.set(avg1, fields={"minutes": 1}) |
| _load_average_metric.set(avg5, fields={"minutes": 5}) |
| _load_average_metric.set(avg15, fields={"minutes": 15}) |
| |
| |
| def collect_unix_time() -> None: |
| _unix_time_metric.set(int(time.time() * 1000)) |