| // Copyright 2018 The Chromium OS Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "ml/metrics.h" |
| |
| #include <algorithm> |
| #include <string> |
| #include <vector> |
| |
| #include <base/bind.h> |
| #include <base/check_op.h> |
| #include <base/files/file_path.h> |
| #include <base/logging.h> |
| #include <base/system/sys_info.h> |
| #include <base/time/time.h> |
| |
| #include "ml/process.h" |
| #include "ml/request_metrics.h" |
| #include "ml/util.h" |
| |
| namespace ml { |
| |
| namespace { |
| |
| // UMA metric names: |
| constexpr char kCpuUsageMetricName[] = |
| "MachineLearningService.CpuUsageMilliPercent"; |
| constexpr char kMojoConnectionEventMetricName[] = |
| "MachineLearningService.MojoConnectionEvent"; |
| constexpr char kTotalMemoryMetricName[] = |
| "MachineLearningService.TotalMemoryKb"; |
| constexpr char kPeakTotalMemoryMetricName[] = |
| "MachineLearningService.PeakTotalMemoryKb"; |
| constexpr char kNumWorkerProcessMetricName[] = |
| "MachineLearningService.NumWorkerProcess"; |
| |
| // UMA histogram ranges: |
| constexpr int kCpuUsageMinMilliPercent = 1; // 0.001% |
| constexpr int kCpuUsageMaxMilliPercent = 100000; // 100% |
| constexpr int kCpuUsageBuckets = 25; |
| constexpr int kMemoryUsageMinKb = 10; // 10 KB |
| constexpr int kMemoryUsageMaxKb = 100000000; // 100 GB |
| constexpr int kMemoryUsageBuckets = 100; |
| constexpr int kNumWorkerProcessMin = 0; |
| constexpr int kNumWorkerProcessMax = 1000; |
| constexpr int kNumWorkerProcessBuckets = 100; |
| |
| // chromeos_metrics::CumulativeMetrics constants: |
| constexpr char kCumulativeMetricsBackingDir[] = "/var/lib/ml_service/metrics"; |
| constexpr char kPeakTotalMemoryCumulativeStatName[] = "peak_total_memory_kb"; |
| |
| constexpr base::TimeDelta kCumulativeMetricsUpdatePeriod = |
| base::TimeDelta::FromMinutes(5); |
| constexpr base::TimeDelta kCumulativeMetricsReportPeriod = |
| base::TimeDelta::FromDays(1); |
| |
| void RecordCumulativeMetrics( |
| MetricsLibrary* const metrics_library, |
| chromeos_metrics::CumulativeMetrics* const cumulative_metrics) { |
| metrics_library->SendToUMA( |
| kPeakTotalMemoryMetricName, |
| cumulative_metrics->Get(kPeakTotalMemoryCumulativeStatName), |
| kMemoryUsageMinKb, kMemoryUsageMaxKb, kMemoryUsageBuckets); |
| } |
| |
| // Returns true if getting the RAM of control process succeeds. Otherwise |
| // returns false in which case the value of `total_mem_usage` should be |
| // ignored. |
| // Here we ignore the return status of getting worker processes's RAM usage |
| // because there may be a case that the worker process has disappeared but it |
| // has not been removed from Process::GetWorkerPidInfoMap(). We do not want this |
| // to block the overall metric report. In the future, we may implement some |
| // dedicated metrics to report such cases. |
| bool GetControlAndWorkerProcessMemoryUsage(size_t* total_mem_usage) { |
| DCHECK(total_mem_usage != nullptr); |
| *total_mem_usage = 0; |
| MemoryUsage usage; |
| // Collect RAM usage for worker processes. |
| // Do not crash if `GetProcessMemoryUsage` fails for worker processes because |
| // maybe some worker process terminates before it is unregistered. |
| for (const auto& pid_info : Process::GetInstance()->GetWorkerPidInfoMap()) { |
| if (GetProcessMemoryUsage(&usage, pid_info.first)) { |
| *total_mem_usage += usage.VmRSSKb + usage.VmSwapKb; |
| } else { |
| RecordProcessErrorEvent(ProcessError::kGetWorkerProcessMemoryUsageFailed); |
| } |
| } |
| // Collect RAM usage for control processes. |
| if (GetProcessMemoryUsage(&usage)) { |
| *total_mem_usage += usage.VmRSSKb + usage.VmSwapKb; |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| } // namespace |
| |
| Metrics::Metrics() |
| : process_metrics_(base::ProcessMetrics::CreateCurrentProcessMetrics()) {} |
| |
| void Metrics::StartCollectingProcessMetrics() { |
| if (cumulative_metrics_) { |
| LOG(WARNING) << "Multiple calls to StartCollectingProcessMetrics"; |
| return; |
| } |
| |
| // Baseline the CPU usage counter in `process_metrics_` to be zero as of now. |
| const double initial_cpu_usage = |
| process_metrics_->GetPlatformIndependentCPUUsage(); |
| DCHECK_EQ(initial_cpu_usage, 0); |
| |
| cumulative_metrics_ = std::make_unique<chromeos_metrics::CumulativeMetrics>( |
| base::FilePath(kCumulativeMetricsBackingDir), |
| std::vector<std::string>{kPeakTotalMemoryCumulativeStatName}, |
| kCumulativeMetricsUpdatePeriod, |
| base::BindRepeating(&Metrics::UpdateAndRecordMetrics, |
| base::Unretained(this), |
| true /*record_current_metrics*/), |
| kCumulativeMetricsReportPeriod, |
| base::BindRepeating(&RecordCumulativeMetrics, |
| base::Unretained(&metrics_library_))); |
| } |
| |
| void Metrics::UpdateCumulativeMetricsNow() { |
| if (!cumulative_metrics_) { |
| return; |
| } |
| UpdateAndRecordMetrics(false /*record_current_metrics*/, |
| cumulative_metrics_.get()); |
| } |
| |
| void Metrics::UpdateAndRecordMetrics( |
| const bool record_current_metrics, |
| chromeos_metrics::CumulativeMetrics* const cumulative_metrics) { |
| size_t usage = 0; |
| if (!GetControlAndWorkerProcessMemoryUsage(&usage)) { |
| LOG(DFATAL) << "Getting process memory usage failed"; |
| return; |
| } |
| |
| // Update max memory stats. |
| cumulative_metrics->Max(kPeakTotalMemoryCumulativeStatName, |
| static_cast<int64_t>(usage)); |
| |
| if (record_current_metrics) { |
| // Record CPU usage (units = milli-percent i.e. 0.001%): |
| // First get the CPU usage of the control process. |
| auto cpu_usage = process_metrics_->GetPlatformIndependentCPUUsage(); |
| // Then get the CPU usages of the worker processes. |
| for (const auto& pid_info : Process::GetInstance()->GetWorkerPidInfoMap()) { |
| cpu_usage += |
| pid_info.second.process_metrics->GetPlatformIndependentCPUUsage(); |
| } |
| |
| const int cpu_usage_milli_percent = static_cast<int>( |
| 1000. * cpu_usage / base::SysInfo::NumberOfProcessors()); |
| metrics_library_.SendToUMA(kCpuUsageMetricName, cpu_usage_milli_percent, |
| kCpuUsageMinMilliPercent, |
| kCpuUsageMaxMilliPercent, kCpuUsageBuckets); |
| // Record memory usage: |
| metrics_library_.SendToUMA(kTotalMemoryMetricName, usage, kMemoryUsageMinKb, |
| kMemoryUsageMaxKb, kMemoryUsageBuckets); |
| |
| // Record how many worker processes. |
| metrics_library_.SendToUMA( |
| kNumWorkerProcessMetricName, |
| Process::GetInstance()->GetWorkerPidInfoMap().size(), |
| kNumWorkerProcessMin, kNumWorkerProcessMax, kNumWorkerProcessBuckets); |
| } |
| } |
| |
| void Metrics::RecordMojoConnectionEvent(const MojoConnectionEvent event) { |
| metrics_library_.SendEnumToUMA(kMojoConnectionEventMetricName, event); |
| } |
| |
| } // namespace ml |