crosperf: introduce option "compress_results" for result directory

Perf data and report file can occupy large disk space when we copy and
store them in the crosperf results directory. This patch introduces a
new field for crosperf so that we create a tarball for it to save space.

By default compress_result is set to True.

BUG=chromium:1079048
TEST=Passed unittest, tested with simple examples.

Change-Id: I70db6f3f70dc33bacfccd8833a4327c494b4d6b5
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/2212432
Reviewed-by: George Burgess <gbiv@chromium.org>
Commit-Queue: Zhizhou Yang <zhizhouy@google.com>
Tested-by: Zhizhou Yang <zhizhouy@google.com>
diff --git a/crosperf/crosperf_unittest.py b/crosperf/crosperf_unittest.py
index ffd964a..9c7d52a 100755
--- a/crosperf/crosperf_unittest.py
+++ b/crosperf/crosperf_unittest.py
@@ -68,7 +68,7 @@
     settings = crosperf.ConvertOptionsToSettings(options)
     self.assertIsNotNone(settings)
     self.assertIsInstance(settings, settings_factory.GlobalSettings)
-    self.assertEqual(len(settings.fields), 38)
+    self.assertEqual(len(settings.fields), 39)
     self.assertTrue(settings.GetField('rerun'))
     argv = ['crosperf/crosperf.py', 'temp.exp']
     options, _ = parser.parse_known_args(argv)
diff --git a/crosperf/experiment.py b/crosperf/experiment.py
index 45a028a..6e2efd4 100644
--- a/crosperf/experiment.py
+++ b/crosperf/experiment.py
@@ -28,8 +28,8 @@
   def __init__(self, name, remote, working_directory, chromeos_root,
                cache_conditions, labels, benchmarks, experiment_file, email_to,
                acquire_timeout, log_dir, log_level, share_cache,
-               results_directory, locks_directory, cwp_dso, ignore_min_max,
-               skylab, dut_config):
+               results_directory, compress_results, locks_directory, cwp_dso,
+               ignore_min_max, skylab, dut_config):
     self.name = name
     self.working_directory = working_directory
     self.remote = remote
@@ -42,6 +42,7 @@
                                             self.name + '_results')
     else:
       self.results_directory = misc.CanonicalizePath(results_directory)
+    self.compress_results = compress_results
     self.log_dir = log_dir
     self.log_level = log_level
     self.labels = labels
diff --git a/crosperf/experiment_factory.py b/crosperf/experiment_factory.py
index 0188fff..332f035 100644
--- a/crosperf/experiment_factory.py
+++ b/crosperf/experiment_factory.py
@@ -145,6 +145,7 @@
     config.AddConfig('no_email', global_settings.GetField('no_email'))
     share_cache = global_settings.GetField('share_cache')
     results_dir = global_settings.GetField('results_dir')
+    compress_results = global_settings.GetField('compress_results')
     # Warn user that option use_file_locks is deprecated.
     use_file_locks = global_settings.GetField('use_file_locks')
     if use_file_locks:
@@ -438,8 +439,8 @@
                             chromeos_root, cache_conditions, labels, benchmarks,
                             experiment_file.Canonicalize(), email,
                             acquire_timeout, log_dir, log_level, share_cache,
-                            results_dir, locks_dir, cwp_dso, ignore_min_max,
-                            skylab, dut_config)
+                            results_dir, compress_results, locks_dir, cwp_dso,
+                            ignore_min_max, skylab, dut_config)
 
     return experiment
 
diff --git a/crosperf/experiment_runner.py b/crosperf/experiment_runner.py
index 3e91e09..e2bd50d 100644
--- a/crosperf/experiment_runner.py
+++ b/crosperf/experiment_runner.py
@@ -273,22 +273,42 @@
 
     has_failure = False
     all_failed = True
-    self.l.LogOutput('Storing results of each benchmark run.')
-    for benchmark_run in experiment.benchmark_runs:
-      if benchmark_run.result:
-        if benchmark_run.result.retval:
-          has_failure = True
-        else:
-          all_failed = False
-        benchmark_run_name = ''.join(
-            ch for ch in benchmark_run.name if ch.isalnum())
-        benchmark_run_path = os.path.join(results_directory, benchmark_run_name)
-        benchmark_run.result.CopyResultsTo(benchmark_run_path)
-        benchmark_run.result.CleanUp(benchmark_run.benchmark.rm_chroot_tmp)
+
+    topstats_file = os.path.join(results_directory, 'topstats.log')
+    self.l.LogOutput('Storing top5 statistics of each benchmark run into %s.' %
+                     topstats_file)
+    with open(topstats_file, 'w') as top_fd:
+      for benchmark_run in experiment.benchmark_runs:
+        if benchmark_run.result:
+          # FIXME: Pylint has a bug suggesting the following change, which
+          # should be fixed in pylint 2.0. Resolve this after pylint >= 2.0.
+          # Bug: https://github.com/PyCQA/pylint/issues/1984
+          # pylint: disable=simplifiable-if-statement
+          if benchmark_run.result.retval:
+            has_failure = True
+          else:
+            all_failed = False
+          # Header with benchmark run name.
+          top_fd.write('%s\n' % str(benchmark_run))
+          # Formatted string with top statistics.
+          top_fd.write(benchmark_run.result.FormatStringTop5())
+          top_fd.write('\n\n')
 
     if all_failed:
       return self.ALL_FAILED
 
+    self.l.LogOutput('Storing results of each benchmark run.')
+    for benchmark_run in experiment.benchmark_runs:
+      if benchmark_run.result:
+        benchmark_run_name = ''.join(
+            ch for ch in benchmark_run.name if ch.isalnum())
+        benchmark_run_path = os.path.join(results_directory, benchmark_run_name)
+        if experiment.compress_results:
+          benchmark_run.result.CompressResultsTo(benchmark_run_path)
+        else:
+          benchmark_run.result.CopyResultsTo(benchmark_run_path)
+        benchmark_run.result.CleanUp(benchmark_run.benchmark.rm_chroot_tmp)
+
     self.l.LogOutput('Storing results report in %s.' % results_directory)
     results_table_path = os.path.join(results_directory, 'results.html')
     report = HTMLResultsReport.FromExperiment(experiment).GetReport()
@@ -307,18 +327,6 @@
     msg_body = "<pre style='font-size: 13px'>%s</pre>" % text_report
     FileUtils().WriteFile(msg_file_path, msg_body)
 
-    topstats_file = os.path.join(results_directory, 'topstats.log')
-    self.l.LogOutput('Storing top5 statistics of each benchmark run into %s.' %
-                     topstats_file)
-    with open(topstats_file, 'w') as top_fd:
-      for benchmark_run in experiment.benchmark_runs:
-        if benchmark_run.result:
-          # Header with benchmark run name.
-          top_fd.write('%s\n' % str(benchmark_run))
-          # Formatted string with top statistics.
-          top_fd.write(benchmark_run.result.FormatStringTop5())
-          top_fd.write('\n\n')
-
     return self.SUCCEEDED if not has_failure else self.HAS_FAILURE
 
   def Run(self):
diff --git a/crosperf/experiment_runner_unittest.py b/crosperf/experiment_runner_unittest.py
index fb58424..0bbab29 100755
--- a/crosperf/experiment_runner_unittest.py
+++ b/crosperf/experiment_runner_unittest.py
@@ -406,13 +406,14 @@
   @mock.patch.object(FileUtils, 'WriteFile')
   @mock.patch.object(HTMLResultsReport, 'FromExperiment')
   @mock.patch.object(TextResultsReport, 'FromExperiment')
+  @mock.patch.object(Result, 'CompressResultsTo')
   @mock.patch.object(Result, 'CopyResultsTo')
   @mock.patch.object(Result, 'CleanUp')
   @mock.patch.object(Result, 'FormatStringTop5')
   @mock.patch('builtins.open', new_callable=mock.mock_open)
   def test_store_results(self, mock_open, mock_top5, mock_cleanup, mock_copy,
-                         _mock_text_report, mock_report, mock_writefile,
-                         mock_mkdir, mock_rmdir):
+                         mock_compress, _mock_text_report, mock_report,
+                         mock_writefile, mock_mkdir, mock_rmdir):
 
     self.mock_logger.Reset()
     self.exp.results_directory = '/usr/local/crosperf-results'
@@ -433,6 +434,7 @@
     er._StoreResults(self.exp)
     self.assertEqual(mock_cleanup.call_count, 0)
     self.assertEqual(mock_copy.call_count, 0)
+    self.assertEqual(mock_compress.call_count, 0)
     self.assertEqual(mock_report.call_count, 0)
     self.assertEqual(mock_writefile.call_count, 0)
     self.assertEqual(mock_mkdir.call_count, 0)
@@ -447,6 +449,7 @@
     for r in self.exp.benchmark_runs:
       r.result = fake_result
     er._terminated = False
+    self.exp.compress_results = False
     er._StoreResults(self.exp)
     self.assertEqual(mock_cleanup.call_count, 6)
     mock_cleanup.assert_called_with(bench_run.benchmark.rm_chroot_tmp)
@@ -467,11 +470,11 @@
     self.assertEqual(self.mock_logger.LogOutputCount, 5)
     self.assertEqual(self.mock_logger.output_msgs, [
         'Storing experiment file in /usr/local/crosperf-results.',
+        'Storing top5 statistics of each benchmark run into'
+        ' /usr/local/crosperf-results/topstats.log.',
         'Storing results of each benchmark run.',
         'Storing results report in /usr/local/crosperf-results.',
         'Storing email message body in /usr/local/crosperf-results.',
-        'Storing top5 statistics of each benchmark run into'
-        ' /usr/local/crosperf-results/topstats.log.',
     ])
     self.assertEqual(mock_open.call_count, 1)
     # Check write to a topstats.log file.
@@ -483,6 +486,16 @@
     top5calls = [mock.call()] * 6
     self.assertEqual(mock_top5.call_args_list, top5calls)
 
+    # Test 3. Test compress_results.
+    self.exp.compress_results = True
+    mock_copy.call_count = 0
+    mock_compress.call_count = 0
+    er._StoreResults(self.exp)
+    self.assertEqual(mock_copy.call_count, 0)
+    mock_copy.assert_called_with(bench_path)
+    self.assertEqual(mock_compress.call_count, 6)
+    mock_compress.assert_called_with(bench_path)
+
 
 if __name__ == '__main__':
   unittest.main()
diff --git a/crosperf/results_cache.py b/crosperf/results_cache.py
index 85c3c6e..08a5d1b 100644
--- a/crosperf/results_cache.py
+++ b/crosperf/results_cache.py
@@ -30,6 +30,7 @@
 RESULTS_FILE = 'results.txt'
 MACHINE_FILE = 'machine.txt'
 AUTOTEST_TARBALL = 'autotest.tbz2'
+RESULTS_TARBALL = 'results.tbz2'
 PERF_RESULTS_FILE = 'perf-results.txt'
 CACHE_KEYS_FILE = 'cache_keys.txt'
 
@@ -115,6 +116,14 @@
     if self.results_file or self.perf_data_files or self.perf_report_files:
       self._logger.LogOutput('Results files stored in %s.' % dest_dir)
 
+  def CompressResultsTo(self, dest_dir):
+    tarball = os.path.join(self.results_dir, RESULTS_TARBALL)
+    results_dir = self.FindFilesInResultsDir('-name results').split('\n')[0]
+    self.CreateTarball(results_dir, tarball)
+    self.CopyFilesTo(dest_dir, [tarball])
+    if results_dir:
+      self._logger.LogOutput('Results files compressed into %s.' % dest_dir)
+
   def GetNewKeyvals(self, keyvals_dict):
     # Initialize 'units' dictionary.
     units_dict = {}
@@ -906,6 +915,15 @@
       command = 'rm -rf %s' % self.temp_dir
       self.ce.RunCommand(command)
 
+  def CreateTarball(self, results_dir, tarball):
+    ret = self.ce.RunCommand('cd %s && '
+                             'tar '
+                             '--exclude=var/spool '
+                             '--exclude=var/log '
+                             '-cjf %s .' % (results_dir, tarball))
+    if ret:
+      raise RuntimeError("Couldn't compress test output directory.")
+
   def StoreToCacheDir(self, cache_dir, machine_manager, key_list):
     # Create the dir if it doesn't exist.
     temp_dir = tempfile.mkdtemp()
@@ -927,14 +945,8 @@
 
     if self.results_dir:
       tarball = os.path.join(temp_dir, AUTOTEST_TARBALL)
-      command = ('cd %s && '
-                 'tar '
-                 '--exclude=var/spool '
-                 '--exclude=var/log '
-                 '-cjf %s .' % (self.results_dir, tarball))
-      ret = self.ce.RunCommand(command)
-      if ret:
-        raise RuntimeError("Couldn't store autotest output directory.")
+      self.CreateTarball(self.results_dir, tarball)
+
     # Store machine info.
     # TODO(asharif): Make machine_manager a singleton, and don't pass it into
     # this function.
diff --git a/crosperf/settings_factory.py b/crosperf/settings_factory.py
index 16e21a7..7033a3e 100644
--- a/crosperf/settings_factory.py
+++ b/crosperf/settings_factory.py
@@ -291,6 +291,12 @@
     self.AddField(
         TextField('results_dir', default='', description='The results dir.'))
     self.AddField(
+        BooleanField(
+            'compress_results',
+            default=True,
+            description='Whether to compress all test results other than '
+            'reports into a tarball to save disk space.'))
+    self.AddField(
         TextField(
             'locks_dir',
             default='',
diff --git a/crosperf/settings_factory_unittest.py b/crosperf/settings_factory_unittest.py
index f190095..bc10711 100755
--- a/crosperf/settings_factory_unittest.py
+++ b/crosperf/settings_factory_unittest.py
@@ -50,7 +50,7 @@
   def test_init(self):
     res = settings_factory.GlobalSettings('g_settings')
     self.assertIsNotNone(res)
-    self.assertEqual(len(res.fields), 38)
+    self.assertEqual(len(res.fields), 39)
     self.assertEqual(res.GetField('name'), '')
     self.assertEqual(res.GetField('board'), '')
     self.assertEqual(res.GetField('skylab'), False)
@@ -73,6 +73,7 @@
     self.assertEqual(res.GetField('show_all_results'), False)
     self.assertEqual(res.GetField('share_cache'), '')
     self.assertEqual(res.GetField('results_dir'), '')
+    self.assertEqual(res.GetField('compress_results'), True)
     self.assertEqual(res.GetField('chrome_src'), '')
     self.assertEqual(res.GetField('cwp_dso'), '')
     self.assertEqual(res.GetField('enable_aslr'), False)
@@ -107,7 +108,7 @@
     g_settings = settings_factory.SettingsFactory().GetSettings(
         'global', 'global')
     self.assertIsInstance(g_settings, settings_factory.GlobalSettings)
-    self.assertEqual(len(g_settings.fields), 38)
+    self.assertEqual(len(g_settings.fields), 39)
 
 
 if __name__ == '__main__':