blob: bc68e621e0948798f5ff65c20c5f9dd9bead936d [file] [log] [blame]
// Copyright 2018 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "ml/metrics.h"
#include <algorithm>
#include <string>
#include <vector>
#include <base/bind.h>
#include <base/check_op.h>
#include <base/files/file_path.h>
#include <base/logging.h>
#include <base/system/sys_info.h>
#include <base/time/time.h>
#include "ml/process.h"
#include "ml/request_metrics.h"
#include "ml/util.h"
namespace ml {
namespace {
// UMA metric names:
constexpr char kCpuUsageMetricName[] =
"MachineLearningService.CpuUsageMilliPercent";
constexpr char kMojoConnectionEventMetricName[] =
"MachineLearningService.MojoConnectionEvent";
constexpr char kTotalMemoryMetricName[] =
"MachineLearningService.TotalMemoryKb";
constexpr char kPeakTotalMemoryMetricName[] =
"MachineLearningService.PeakTotalMemoryKb";
constexpr char kNumWorkerProcessMetricName[] =
"MachineLearningService.NumWorkerProcess";
// UMA histogram ranges:
constexpr int kCpuUsageMinMilliPercent = 1; // 0.001%
constexpr int kCpuUsageMaxMilliPercent = 100000; // 100%
constexpr int kCpuUsageBuckets = 25;
constexpr int kMemoryUsageMinKb = 10; // 10 KB
constexpr int kMemoryUsageMaxKb = 100000000; // 100 GB
constexpr int kMemoryUsageBuckets = 100;
constexpr int kNumWorkerProcessMin = 0;
constexpr int kNumWorkerProcessMax = 1000;
constexpr int kNumWorkerProcessBuckets = 100;
// chromeos_metrics::CumulativeMetrics constants:
constexpr char kCumulativeMetricsBackingDir[] = "/var/lib/ml_service/metrics";
constexpr char kPeakTotalMemoryCumulativeStatName[] = "peak_total_memory_kb";
constexpr base::TimeDelta kCumulativeMetricsUpdatePeriod =
base::TimeDelta::FromMinutes(5);
constexpr base::TimeDelta kCumulativeMetricsReportPeriod =
base::TimeDelta::FromDays(1);
void RecordCumulativeMetrics(
MetricsLibrary* const metrics_library,
chromeos_metrics::CumulativeMetrics* const cumulative_metrics) {
metrics_library->SendToUMA(
kPeakTotalMemoryMetricName,
cumulative_metrics->Get(kPeakTotalMemoryCumulativeStatName),
kMemoryUsageMinKb, kMemoryUsageMaxKb, kMemoryUsageBuckets);
}
// Returns true if getting the RAM of control process succeeds. Otherwise
// returns false in which case the value of `total_mem_usage` should be
// ignored.
// Here we ignore the return status of getting worker processes's RAM usage
// because there may be a case that the worker process has disappeared but it
// has not been removed from Process::GetWorkerPidInfoMap(). We do not want this
// to block the overall metric report. In the future, we may implement some
// dedicated metrics to report such cases.
bool GetControlAndWorkerProcessMemoryUsage(size_t* total_mem_usage) {
DCHECK(total_mem_usage != nullptr);
*total_mem_usage = 0;
MemoryUsage usage;
// Collect RAM usage for worker processes.
// Do not crash if `GetProcessMemoryUsage` fails for worker processes because
// maybe some worker process terminates before it is unregistered.
for (const auto& pid_info : Process::GetInstance()->GetWorkerPidInfoMap()) {
if (GetProcessMemoryUsage(&usage, pid_info.first)) {
*total_mem_usage += usage.VmRSSKb + usage.VmSwapKb;
} else {
RecordProcessErrorEvent(ProcessError::kGetWorkerProcessMemoryUsageFailed);
}
}
// Collect RAM usage for control processes.
if (GetProcessMemoryUsage(&usage)) {
*total_mem_usage += usage.VmRSSKb + usage.VmSwapKb;
return true;
} else {
return false;
}
}
} // namespace
Metrics::Metrics()
: process_metrics_(base::ProcessMetrics::CreateCurrentProcessMetrics()) {}
void Metrics::StartCollectingProcessMetrics() {
if (cumulative_metrics_) {
LOG(WARNING) << "Multiple calls to StartCollectingProcessMetrics";
return;
}
// Baseline the CPU usage counter in `process_metrics_` to be zero as of now.
const double initial_cpu_usage =
process_metrics_->GetPlatformIndependentCPUUsage();
DCHECK_EQ(initial_cpu_usage, 0);
cumulative_metrics_ = std::make_unique<chromeos_metrics::CumulativeMetrics>(
base::FilePath(kCumulativeMetricsBackingDir),
std::vector<std::string>{kPeakTotalMemoryCumulativeStatName},
kCumulativeMetricsUpdatePeriod,
base::Bind(&Metrics::UpdateAndRecordMetrics, base::Unretained(this),
true /*record_current_metrics*/),
kCumulativeMetricsReportPeriod,
base::Bind(&RecordCumulativeMetrics,
base::Unretained(&metrics_library_)));
}
void Metrics::UpdateCumulativeMetricsNow() {
if (!cumulative_metrics_) {
return;
}
UpdateAndRecordMetrics(false /*record_current_metrics*/,
cumulative_metrics_.get());
}
void Metrics::UpdateAndRecordMetrics(
const bool record_current_metrics,
chromeos_metrics::CumulativeMetrics* const cumulative_metrics) {
size_t usage = 0;
if (!GetControlAndWorkerProcessMemoryUsage(&usage)) {
LOG(DFATAL) << "Getting process memory usage failed";
return;
}
// Update max memory stats.
cumulative_metrics->Max(kPeakTotalMemoryCumulativeStatName,
static_cast<int64_t>(usage));
if (record_current_metrics) {
// Record CPU usage (units = milli-percent i.e. 0.001%):
// First get the CPU usage of the control process.
auto cpu_usage = process_metrics_->GetPlatformIndependentCPUUsage();
// Then get the CPU usages of the worker processes.
for (const auto& pid_info : Process::GetInstance()->GetWorkerPidInfoMap()) {
cpu_usage +=
pid_info.second.process_metrics->GetPlatformIndependentCPUUsage();
}
const int cpu_usage_milli_percent = static_cast<int>(
1000. * cpu_usage / base::SysInfo::NumberOfProcessors());
metrics_library_.SendToUMA(kCpuUsageMetricName, cpu_usage_milli_percent,
kCpuUsageMinMilliPercent,
kCpuUsageMaxMilliPercent, kCpuUsageBuckets);
// Record memory usage:
metrics_library_.SendToUMA(kTotalMemoryMetricName, usage, kMemoryUsageMinKb,
kMemoryUsageMaxKb, kMemoryUsageBuckets);
// Record how many worker processes.
metrics_library_.SendToUMA(
kNumWorkerProcessMetricName,
Process::GetInstance()->GetWorkerPidInfoMap().size(),
kNumWorkerProcessMin, kNumWorkerProcessMax, kNumWorkerProcessBuckets);
}
}
void Metrics::RecordMojoConnectionEvent(const MojoConnectionEvent event) {
metrics_library_.SendEnumToUMA(kMojoConnectionEventMetricName, event);
}
} // namespace ml