ml_benchmark: add memory sampler to estimate peak rss+swap usage
BUG=b:174827149
TEST=FEATURES=test emerge-betty ml-benchmark
cros deploy <dut> ml-benchmark
(DUT) $ ml_benchmark --config_file_name=soda-scenario-1.config
Change-Id: I80e854e8217fed4ad9bb610d50621db57a04edb3
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform2/+/2582021
Tested-by: Jim Pollock <jmpollock@chromium.org>
Reviewed-by: Michael Pishchagin <mblsha@google.com>
Commit-Queue: Jim Pollock <jmpollock@chromium.org>
Auto-Submit: Jim Pollock <jmpollock@chromium.org>
diff --git a/ml_benchmark/BUILD.gn b/ml_benchmark/BUILD.gn
index cb4367f..10a0a9e 100644
--- a/ml_benchmark/BUILD.gn
+++ b/ml_benchmark/BUILD.gn
@@ -38,6 +38,8 @@
"benchmark_functions.h",
"json_serializer.cc",
"json_serializer.h",
+ "memory_sampler.cc",
+ "memory_sampler.h",
"shared_library_benchmark.cc",
"shared_library_benchmark.h",
"shared_library_benchmark_functions.cc",
@@ -64,6 +66,7 @@
executable("ml_benchmark_test") {
sources = [
"json_serializer_test.cc",
+ "memory_sampler_test.cc",
"shared_library_benchmark_test.cc",
"sysmetrics_test.cc",
]
diff --git a/ml_benchmark/main.cc b/ml_benchmark/main.cc
index a743e84..c1d64a5 100644
--- a/ml_benchmark/main.cc
+++ b/ml_benchmark/main.cc
@@ -6,12 +6,14 @@
#include <base/files/file_util.h>
#include <base/json/json_writer.h>
#include <base/logging.h>
+#include <base/task/thread_pool/thread_pool_instance.h>
#include <base/values.h>
#include <brillo/flag_helper.h>
#include <string>
#include "ml_benchmark/json_serializer.h"
+#include "ml_benchmark/memory_sampler.h"
#include "ml_benchmark/shared_library_benchmark.h"
#include "ml_benchmark/shared_library_benchmark_functions.h"
#include "ml_benchmark/sysmetrics.h"
@@ -20,27 +22,21 @@
using chrome::ml_benchmark::BenchmarkResults;
using chrome::ml_benchmark::CrOSBenchmarkConfig;
using chrome::ml_benchmark::Metric;
+using ml_benchmark::PeakMemorySampler;
using ml_benchmark::SharedLibraryBenchmark;
using ml_benchmark::SharedLibraryBenchmarkFunctions;
namespace {
-void AddMemoryMetrics(const int64_t initial_memsize,
- const int64_t final_peaksize,
- BenchmarkResults* results) {
- auto& initial_mem = *results->add_metrics();
- initial_mem.set_name("initial_vmsize");
- initial_mem.set_units(Metric::BYTES);
- initial_mem.set_direction(Metric::SMALLER_IS_BETTER);
- initial_mem.set_cardinality(Metric::SINGLE);
- initial_mem.add_values(initial_memsize);
-
- auto& final_mem = *results->add_metrics();
- final_mem.set_name("final_vmpeak");
- final_mem.set_units(Metric::BYTES);
- final_mem.set_direction(Metric::SMALLER_IS_BETTER);
- final_mem.set_cardinality(Metric::SINGLE);
- final_mem.add_values(final_peaksize);
+void AddMemoryMetric(const std::string& metric_name,
+ const int64_t value,
+ BenchmarkResults* results) {
+ auto& metric = *results->add_metrics();
+ metric.set_name(metric_name);
+ metric.set_units(Metric::BYTES);
+ metric.set_direction(Metric::SMALLER_IS_BETTER);
+ metric.set_cardinality(Metric::SINGLE);
+ metric.add_values(value);
}
void PrintMetrics(const BenchmarkResults& results) {
@@ -73,6 +69,10 @@
}
const int64_t initial_memsize = ml_benchmark::GetVMSizeBytes();
+ const int64_t initial_rss_swap = ml_benchmark::GetSwapAndRSSBytes();
+
+ scoped_refptr<PeakMemorySampler> mem_sampler = new PeakMemorySampler();
+ PeakMemorySampler::StartSampling(mem_sampler);
LOG(INFO) << "Starting the " << driver_name << " benchmark";
SharedLibraryBenchmark benchmark(std::move(functions));
@@ -83,11 +83,18 @@
return;
}
+ PeakMemorySampler::StopSampling(mem_sampler);
+
if (results.status() == chrome::ml_benchmark::OK) {
LOG(INFO) << driver_name << " finished";
- const int64_t final_peaksize = ml_benchmark::GetVMPeakBytes();
- AddMemoryMetrics(initial_memsize, final_peaksize, &results);
+ const int64_t final_vmpeaksize = ml_benchmark::GetVMPeakBytes();
+ const int64_t peak_rss_swap = mem_sampler->GetMaxSample();
+
+ AddMemoryMetric("initial_vmsize", initial_memsize, &results);
+ AddMemoryMetric("final_vmpeak", final_vmpeaksize, &results);
+ AddMemoryMetric("initial_rss_swap", initial_rss_swap, &results);
+ AddMemoryMetric("peak_rss_swap", peak_rss_swap, &results);
PrintMetrics(results);
@@ -135,8 +142,9 @@
}
}
- base::FilePath driver_library(FLAGS_driver_library_path);
+ base::ThreadPoolInstance::CreateAndStartWithDefaultParams("ml_benchmark");
+ base::FilePath driver_library(FLAGS_driver_library_path);
BenchmarkAndReportResults(FLAGS_driver_library_path, driver_library,
benchmark_config, output_file_path);
diff --git a/ml_benchmark/memory_sampler.cc b/ml_benchmark/memory_sampler.cc
new file mode 100644
index 0000000..71c4936
--- /dev/null
+++ b/ml_benchmark/memory_sampler.cc
@@ -0,0 +1,60 @@
+// Copyright 2020 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "ml_benchmark/memory_sampler.h"
+
+#include <base/task/task_traits.h>
+#include <base/task/thread_pool.h>
+
+#include <algorithm>
+
+#include "ml_benchmark/sysmetrics.h"
+
+namespace ml_benchmark {
+
+PeakMemorySampler::PeakMemorySampler() : from_here_(FROM_HERE) {
+ task_runner_ = base::ThreadPool::CreateSequencedTaskRunner(
+ {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
+ base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN});
+}
+
+PeakMemorySampler::~PeakMemorySampler() {
+ SetRunning(false);
+}
+
+void PeakMemorySampler::SetRunning(bool is_running) {
+ base::AutoLock auto_lock(lock_);
+ running_ = is_running;
+}
+
+void PeakMemorySampler::StartSampling(
+ scoped_refptr<PeakMemorySampler> sampler) {
+ sampler->SetRunning(true);
+ SampleMemory(sampler);
+}
+
+void PeakMemorySampler::StopSampling(scoped_refptr<PeakMemorySampler> sampler) {
+ sampler->SetRunning(false);
+}
+
+int64_t PeakMemorySampler::GetMaxSample() {
+ // Writing to max_sample_ is protected by this lock as well.
+ base::AutoLock auto_lock(lock_);
+ return max_sample_;
+}
+
+void PeakMemorySampler::SampleMemory(scoped_refptr<PeakMemorySampler> sampler) {
+ base::AutoLock auto_lock(sampler->lock_);
+ if (!sampler->running_)
+ return;
+
+ sampler->sample_counter_++;
+ sampler->max_sample_ = std::max(sampler->max_sample_, GetSwapAndRSSBytes());
+ sampler->task_runner_->PostDelayedTask(
+ sampler->from_here_,
+ base::Bind(&PeakMemorySampler::SampleMemory, sampler),
+ sampler->sampling_interval_);
+}
+
+} // namespace ml_benchmark
diff --git a/ml_benchmark/memory_sampler.h b/ml_benchmark/memory_sampler.h
new file mode 100644
index 0000000..2ad296b
--- /dev/null
+++ b/ml_benchmark/memory_sampler.h
@@ -0,0 +1,45 @@
+// Copyright 2020 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef ML_BENCHMARK_MEMORY_SAMPLER_H_
+#define ML_BENCHMARK_MEMORY_SAMPLER_H_
+
+#include <base/memory/ref_counted.h>
+#include <base/synchronization/lock.h>
+#include <base/task/thread_pool.h>
+#include <base/time/time.h>
+#include <gtest/gtest_prod.h> // for FRIEND_TEST
+
+namespace ml_benchmark {
+
+class PeakMemorySampler : public base::RefCountedThreadSafe<PeakMemorySampler> {
+ public:
+ PeakMemorySampler();
+
+ static void StartSampling(scoped_refptr<PeakMemorySampler> sampler);
+ static void StopSampling(scoped_refptr<PeakMemorySampler> sampler);
+ int64_t GetMaxSample();
+
+ protected:
+ ~PeakMemorySampler();
+ friend class base::RefCountedThreadSafe<PeakMemorySampler>;
+
+ private:
+ static void SampleMemory(scoped_refptr<PeakMemorySampler> sampler);
+ void SetRunning(bool is_running);
+
+ base::Location from_here_;
+ bool running_ = false;
+ base::Lock lock_;
+ base::TimeDelta sampling_interval_ = base::TimeDelta::FromSeconds(1);
+ int64_t max_sample_ = 0;
+ scoped_refptr<base::SequencedTaskRunner> task_runner_;
+
+ // For testing purposes
+ int sample_counter_ = 0;
+ FRIEND_TEST(PeakMemorySamplerTest, BasicFunctions);
+};
+} // namespace ml_benchmark
+
+#endif // ML_BENCHMARK_MEMORY_SAMPLER_H_
diff --git a/ml_benchmark/memory_sampler_test.cc b/ml_benchmark/memory_sampler_test.cc
new file mode 100644
index 0000000..2a0a83b
--- /dev/null
+++ b/ml_benchmark/memory_sampler_test.cc
@@ -0,0 +1,96 @@
+// Copyright 2020 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "ml_benchmark/memory_sampler.h"
+
+#include <base/test/task_environment.h>
+#include <gtest/gtest.h>
+
+namespace ml_benchmark {
+
+class PeakMemorySamplerTest : public ::testing::Test {
+ public:
+ PeakMemorySamplerTest() = default;
+
+ protected:
+ base::test::TaskEnvironment task_environment_{
+ base::test::TaskEnvironment::TimeSource::MOCK_TIME};
+ base::TimeDelta sampling_interval_ = base::TimeDelta::FromSeconds(1);
+};
+
+TEST_F(PeakMemorySamplerTest, BasicFunctions) {
+ scoped_refptr<PeakMemorySampler> sampler = new PeakMemorySampler();
+
+ // No samples should mean zero
+ EXPECT_EQ(sampler->GetMaxSample(), 0);
+ EXPECT_EQ(sampler->sample_counter_, 0);
+
+ PeakMemorySampler::StartSampling(sampler);
+ task_environment_.FastForwardBy(sampling_interval_);
+ const int64_t initial_peak = sampler->GetMaxSample();
+ // StartSampling causes a sample, plus the interval is two samples.
+ EXPECT_EQ(sampler->sample_counter_, 2);
+
+ // Sample a few more times, make sure it hasn't changed
+ task_environment_.FastForwardBy(sampling_interval_ * 2);
+ EXPECT_EQ(initial_peak, sampler->GetMaxSample());
+
+ // Allocate 10MB
+ int ten_mb_bytes = 1024 * 1024 * 10;
+ char* allocate = new char[ten_mb_bytes];
+ // Zero it out and read so the compiler doesn't optimize the variable away.
+ memset(allocate, 0, ten_mb_bytes);
+ EXPECT_EQ(allocate[ten_mb_bytes - 1], 0);
+
+ task_environment_.FastForwardBy(sampling_interval_);
+ const int64_t higher_peak = sampler->GetMaxSample();
+ EXPECT_GT(higher_peak, initial_peak);
+
+ // Free the memory and make sure the peak doesn't drop
+ delete[] allocate;
+ task_environment_.FastForwardBy(sampling_interval_);
+ EXPECT_EQ(higher_peak, sampler->GetMaxSample());
+
+ // Stop sampling, allocate a bunch more memory
+ PeakMemorySampler::StopSampling(sampler);
+ EXPECT_EQ(sampler->sample_counter_, 6);
+
+ // Allocate 20MB
+ int twenty_mb_bytes = 1024 * 1024 * 20;
+ allocate = new char[twenty_mb_bytes];
+ // Zero it out and read so the compiler doesn't optimize the variable away.
+ memset(allocate, 0, twenty_mb_bytes);
+ EXPECT_EQ(allocate[twenty_mb_bytes - 1], 0);
+
+ // We're not sampling so the peak should stay the same.
+ task_environment_.FastForwardBy(sampling_interval_ * 2);
+ EXPECT_EQ(higher_peak, sampler->GetMaxSample());
+ EXPECT_EQ(sampler->sample_counter_, 6);
+
+ // Start sampling again and check it grows
+ PeakMemorySampler::StartSampling(sampler);
+ task_environment_.FastForwardBy(sampling_interval_);
+ EXPECT_GT(sampler->GetMaxSample(), higher_peak);
+
+ // StartSampling causes a sample, plus the interval is two samples.
+ EXPECT_EQ(sampler->sample_counter_, 8);
+
+ delete[] allocate;
+}
+
+TEST_F(PeakMemorySamplerTest, LifeCycle) {
+ scoped_refptr<PeakMemorySampler> sampler = new PeakMemorySampler();
+
+ PeakMemorySampler::StartSampling(sampler);
+ task_environment_.FastForwardBy(sampling_interval_);
+ EXPECT_GT(sampler->GetMaxSample(), 0);
+
+ // At this point another task has been scheduled in t+1, so
+ // delete the object and move forward in time. We expect this
+ // to 'just work' and not crash due to some dangling pointer.
+ sampler.reset();
+ task_environment_.FastForwardBy(sampling_interval_ * 2);
+}
+
+} // namespace ml_benchmark