server/cros/telemetry_runner.py - mirrors/cros/chromiumos/third_party/autotest - Git at Google

 # Lint as: python2, python3
 # Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import abc
 import json
 import logging
 import numbers
 import os
 import tempfile
 import six

 import numpy

 import common
 from autotest_lib.client.common_lib import error, utils
 from autotest_lib.server.cros import telemetry_setup

 TELEMETRY_RUN_BENCHMARKS_SCRIPT = 'tools/perf/run_benchmark'
 TELEMETRY_RUN_TESTS_SCRIPT = 'tools/telemetry/run_tests'
 TELEMETRY_RUN_GPU_TESTS_SCRIPT = 'content/test/gpu/run_gpu_integration_test.py'
 TELEMETRY_TIMEOUT_MINS = 150

 DUT_CHROME_ROOT = '/usr/local/telemetry/src'

 CHART_JSON_RESULT = 'results-chart.json'
 HISTOGRAM_SET_RESULT = 'histograms.json'
 PROFILE_ARTIFACTS = 'artifacts'

 # Result Statuses
 SUCCESS_STATUS = 'SUCCESS'
 WARNING_STATUS = 'WARNING'
 FAILED_STATUS = 'FAILED'

 # A list of telemetry tests that cannot run on dut.
 ON_DUT_BLOCKLIST = [
         'loading.desktop',  # crbug/882299
         'rendering.desktop',  # crbug/882291
 ]


 class TelemetryResult(object):
     """Class to represent the results of a telemetry run.

     This class represents the results of a telemetry run, whether it ran
     successful, failed or had warnings.
     """

     def __init__(self, exit_code=0, stdout='', stderr=''):
         """Initializes this TelemetryResultObject instance.

         @param status: Status of the telemtry run.
         @param stdout: Stdout of the telemetry run.
         @param stderr: Stderr of the telemetry run.
         """
         if exit_code == 0:
             self.status = SUCCESS_STATUS
         else:
             self.status = FAILED_STATUS

         self._stdout = stdout
         self._stderr = stderr
         self.output = '\n'.join([stdout, stderr])


 class TelemetryRunnerFactory(object):
     """A factory class to determine TelemetryRunner subclass to be used.

     The TelemetryRunner class, today, has various ways to execute the telemetry
     test. The test can be executed locally (using a tool like test_that) or can
     be executed in the Lab environment - for this usecase, either the drone OR
     the devserver can be used.

     A Factory class offloads this determination overhead from the clients. Users
     of the TelemetryRunner class are highly encouraged to go through this
     Factory class while determining the correct TelemetryRunner subclass.
     """

     def get_runner(self, host, local=False, telemetry_on_dut=True):
         """Method to determine which TelemetryRunner subclass to use."""
         if local:
             return LocalTelemetryRunner(host, telemetry_on_dut)
         else:
             return DroneTelemetryRunner(host, telemetry_on_dut)


 class TelemetryRunner(six.with_metaclass(abc.ABCMeta, object)):
     """Class responsible for telemetry for a given build.

     This class will extract and install telemetry environment and is
     responsible for executing the telemetry benchmarks and returning their
     output to the caller.
     """

     def __init__(self, host, telemetry_on_dut=True):
         """Initializes this telemetry runner instance.

         If telemetry is not installed for this build, it will be.

         @param host: Host where the test will be run.
         @param telemetry_on_dut: If set, telemetry itself (the test harness)
                                  will run on dut.
                                  It decides browser=[system|cros-chrome]
         """
         self._host = host
         self._telemetry_path = None
         self._perf_value_writer = None
         self._setup_telemetry()
         self._telemetry_on_dut = telemetry_on_dut
         self._benchmark_deps = None
         logging.debug('Telemetry Path: %s', self._telemetry_path)

     def __enter__(self):
         """Called while entering context manager; does nothing."""
         return self

     def __exit__(self, exc_type, exc_value, traceback):
         """Called while exiting context manager."""

     @abc.abstractmethod
     def _setup_telemetry(self):
         """Set up telemetry environment."""

     def _get_telemetry_cmd(self, script, test_or_benchmark, output_format,
                            *args, **kwargs):
         """Build command to execute telemetry based on script and benchmark.

         @param script: Telemetry script we want to run. For example:
                        [path_to_telemetry_src]/src/tools/telemetry/run_tests.
         @param test_or_benchmark: Name of the test or benchmark we want to run,
                                   with the page_set (if required) as part of
                                   the string.
         @param output_format: Format of the json result file: histogram or
                               chart-json.
         @param args: additional list of arguments to pass to the script.
         @param kwargs: additional list of keyword arguments to pass to the
                        script.

         @returns Full telemetry command to execute the script.
         """
         telemetry_cmd = []
         no_verbose = kwargs.get('no_verbose', False)

         output_dir = (DUT_CHROME_ROOT
                       if self._telemetry_on_dut else self._telemetry_path)
         # Create a temp directory to hold single test run.
         if self._perf_value_writer:
             output_dir = os.path.join(
                     output_dir, self._perf_value_writer.tmpdir.strip('/'))

         if self._telemetry_on_dut:
             telemetry_cmd.extend([
                     self._host.ssh_command(
                             alive_interval=900, connection_attempts=4),
                     'python2',
                     script,
                     '--output-format=%s' % output_format,
                     '--output-dir=%s' % output_dir,
                     '--browser=system',
             ])
         else:
             telemetry_cmd.extend([
                     'python2',
                     script,
                     '--browser=cros-chrome',
                     '--output-format=%s' % output_format,
                     '--output-dir=%s' % output_dir,
                     '--remote=%s' % self._host.hostname,
             ])
             if self._host.host_port != self._host.hostname:
                 # If the user specify a different port for the DUT, we should
                 # use different telemetry argument to set it up.
                 #
                 # e.g. When user is running experiments with ssh port
                 # forwarding, they specify remote as 127.0.0.1:2222. Now
                 # host_port is 127.0.0.1:2222 and hostname is 127.0.0.1
                 # port is 2222
                 telemetry_cmd.append('--remote-ssh-port=%s' % self._host.port)

         if not no_verbose:
             telemetry_cmd.append('--verbose')
         telemetry_cmd.extend(args)
         telemetry_cmd.append(test_or_benchmark)

         return ' '.join(telemetry_cmd)

     def _scp_telemetry_results_cmd(self, perf_results_dir, output_format,
                                    artifacts):
         """Build command to copy the telemetry results from the work directory.

         @param perf_results_dir: directory path where test output is to be
                                  collected.
         @param output_format: Format of the json result file: histogram or
                               chart-json.
         @param artifacts: Whether we want to copy artifacts directory.

         @returns SCP command to copy the results json to the specified
                  directory.
         """
         if not perf_results_dir:
             return ''

         output_filename = CHART_JSON_RESULT
         if output_format == 'histograms':
             output_filename = HISTOGRAM_SET_RESULT
         scp_cmd = []
         if self._telemetry_on_dut:
             scp_cmd.extend(['scp', '-r'])
             scp_cmd.append(
                     self._host.make_ssh_options(
                             alive_interval=900, connection_attempts=4))
             if not self._host.is_default_port:
                 scp_cmd.append('-P %d' % self._host.port)
             src = 'root@%s:%s' % (self._host.hostname, DUT_CHROME_ROOT)
         else:
             # Use rsync --remove-source-file to move rather than copy from
             # work dir. This is because each run will generate certain artifacts
             # and will not be removed after, making result size getting larger.
             # We don't do this for results on DUT because 1) rsync doesn't work
             # 2) DUT will be reflashed frequently and no need to worry about
             # result size.
             scp_cmd.extend(['rsync', '-avz', '--remove-source-files'])
             src = self._telemetry_path

         if self._perf_value_writer:
             src = os.path.join(src, self._perf_value_writer.tmpdir.strip('/'))

         scp_cmd.append(os.path.join(src, output_filename))

         # Copy artifacts back to result directory if needed.
         if artifacts:
             scp_cmd.append(os.path.join(src, PROFILE_ARTIFACTS))

         scp_cmd.append(perf_results_dir)
         return ' '.join(scp_cmd)

     def _run_cmd(self, cmd):
         """Execute an command in a external shell and capture the output.

         @param cmd: String of is a valid shell command.

         @returns The standard out, standard error and the integer exit code of
                  the executed command.
         """
         logging.debug('Running: %s', cmd)

         output = six.StringIO()
         error_output = six.StringIO()
         exit_code = 0
         try:
             result = utils.run(
                     cmd,
                     stdout_tee=output,
                     stderr_tee=error_output,
                     timeout=TELEMETRY_TIMEOUT_MINS * 60)
             exit_code = result.exit_status
         except error.CmdError as e:
             logging.debug('Error occurred executing.')
             exit_code = e.result_obj.exit_status

         stdout = output.getvalue()
         stderr = error_output.getvalue()
         logging.debug('Completed with exit code: %d.\nstdout:%s\n'
                       'stderr:%s', exit_code, stdout, stderr)
         return stdout, stderr, exit_code

     def _run_telemetry(self, script, test_or_benchmark, output_format, *args,
                        **kwargs):
         """Runs telemetry on a dut.

         @param script: Telemetry script we want to run. For example:
                        [path_to_telemetry_src]/src/tools/telemetry/run_tests.
         @param test_or_benchmark: Name of the test or benchmark we want to run,
                                  with the page_set (if required) as part of the
                                  string.
         @param args: additional list of arguments to pass to the script.
         @param kwargs: additional list of keyword arguments to pass to the
                        script.

         @returns A TelemetryResult Instance with the results of this telemetry
                  execution.
         """
         # TODO (sbasi crbug.com/239933) add support for incognito mode.

         telemetry_cmd = self._get_telemetry_cmd(script, test_or_benchmark,
                                                 output_format, *args, **kwargs)
         logging.info('Running Telemetry: %s', telemetry_cmd)

         stdout, stderr, exit_code = self._run_cmd(telemetry_cmd)

         return TelemetryResult(
                 exit_code=exit_code, stdout=stdout, stderr=stderr)

     def _run_scp(self, perf_results_dir, output_format, artifacts=False):
         """Runs telemetry on a dut.

         @param perf_results_dir: The local directory that results are being
                                  collected.
         @param output_format: Format of the json result file.
         @param artifacts: Whether we want to copy artifacts directory.
         """
         scp_cmd = self._scp_telemetry_results_cmd(perf_results_dir,
                                                   output_format, artifacts)
         logging.debug('Retrieving Results: %s', scp_cmd)
         _, _, exit_code = self._run_cmd(scp_cmd)
         if exit_code != 0:
             raise error.TestFail('Unable to retrieve results.')

         if output_format == 'histograms':
             # Converts to chart json format.
             input_filename = os.path.join(perf_results_dir,
                                           HISTOGRAM_SET_RESULT)
             output_filename = os.path.join(perf_results_dir, CHART_JSON_RESULT)
             histograms = json.loads(open(input_filename).read())
             chartjson = TelemetryRunner.convert_chart_json(histograms)
             with open(output_filename, 'w') as fout:
                 fout.write(json.dumps(chartjson, indent=2))

     def _run_test(self, script, test, *args):
         """Runs a telemetry test on a dut.

         @param script: Which telemetry test script we want to run. Can be
                        telemetry's base test script or the Chrome OS specific
                        test script.
         @param test: Telemetry test we want to run.
         @param args: additional list of arguments to pass to the script.

         @returns A TelemetryResult Instance with the results of this telemetry
                  execution.
         """
         logging.debug('Running telemetry test: %s', test)
         telemetry_script = os.path.join(self._telemetry_path, script)
         result = self._run_telemetry(telemetry_script, test, 'chartjson',
                                      *args)
         if result.status is FAILED_STATUS:
             raise error.TestFail('Telemetry test %s failed.' % test)
         return result

     def run_telemetry_test(self, test, *args):
         """Runs a telemetry test on a dut.

         @param test: Telemetry test we want to run.
         @param args: additional list of arguments to pass to the telemetry
                      execution script.

         @returns A TelemetryResult Instance with the results of this telemetry
                  execution.
         """
         return self._run_test(TELEMETRY_RUN_TESTS_SCRIPT, test, *args)

     def run_telemetry_benchmark(self,
                                 benchmark,
                                 perf_value_writer=None,
                                 *args,
                                 **kwargs):
         """Runs a telemetry benchmark on a dut.

         @param benchmark: Benchmark we want to run.
         @param perf_value_writer: Should be an instance with the function
                                   output_perf_value(), if None, no perf value
                                   will be written. Typically this will be the
                                   job object from an autotest test.
         @param args: additional list of arguments to pass to the telemetry
                      execution script.
         @param kwargs: additional list of keyword arguments to pass to the
                        telemetry execution script.

         @returns A TelemetryResult Instance with the results of this telemetry
                  execution.
         """
         logging.debug('Running telemetry benchmark: %s', benchmark)

         self._perf_value_writer = perf_value_writer

         if benchmark in ON_DUT_BLOCKLIST:
             self._telemetry_on_dut = False

         output_format = kwargs.get('ex_output_format', '')

         if not output_format:
             output_format = 'histograms'

         if self._telemetry_on_dut:
             telemetry_script = os.path.join(DUT_CHROME_ROOT,
                                             TELEMETRY_RUN_BENCHMARKS_SCRIPT)
             self._ensure_deps(self._host, benchmark)
         else:
             telemetry_script = os.path.join(self._telemetry_path,
                                             TELEMETRY_RUN_BENCHMARKS_SCRIPT)

         result = self._run_telemetry(telemetry_script, benchmark,
                                      output_format, *args, **kwargs)

         if result.status is WARNING_STATUS:
             raise error.TestWarn('Telemetry Benchmark: %s'
                                  ' exited with Warnings.\nOutput:\n%s\n' %
                                  (benchmark, result.output))
         elif result.status is FAILED_STATUS:
             raise error.TestFail('Telemetry Benchmark: %s'
                                  ' failed to run.\nOutput:\n%s\n' %
                                  (benchmark, result.output))
         elif '[  PASSED  ] 0 tests.' in result.output:
             raise error.TestWarn('Telemetry Benchmark: %s exited successfully,'
                                  ' but no test actually passed.\nOutput\n%s\n'
                                  % (benchmark, result.output))
         if perf_value_writer:
             artifacts = kwargs.get('artifacts', False)
             self._run_scp(perf_value_writer.resultsdir, output_format,
                           artifacts)
         return result

     def run_gpu_integration_test(self, test, *args):
         """Runs a gpu test on a dut.

         @param test: Gpu test we want to run.
         @param args: additional list of arguments to pass to the telemetry
                      execution script.

         @returns A TelemetryResult instance with the results of this telemetry
                  execution.
         """
         script = os.path.join(DUT_CHROME_ROOT, TELEMETRY_RUN_GPU_TESTS_SCRIPT)
         cmd = [
                 self._host.ssh_command(alive_interval=900,
                                        connection_attempts=4), 'python2',
                 script
         ]
         cmd.extend(args)
         cmd.append(test)
         cmd = ' '.join(cmd)
         stdout, stderr, exit_code = self._run_cmd(cmd)

         if exit_code:
             raise error.TestFail('Gpu Integration Test: %s'
                                  ' failed to run.' % test)

         return TelemetryResult(
                 exit_code=exit_code, stdout=stdout, stderr=stderr)

     def _ensure_deps(self, dut, test_name):
         """
         Ensure the dependencies are locally available on DUT.

         @param dut: The autotest host object representing DUT.
         @param test_name: Name of the telemetry test.
         """
         # Get DEPs using host's telemetry.
         # Example output, fetch_benchmark_deps.py --output-deps=deps octane:
         # {'octane': ['tools/perf/page_sets/data/octane_002.wprgo']}
         fetch_path = os.path.join(self._telemetry_path, 'tools', 'perf',
                                   'fetch_benchmark_deps.py')
         # Use a temporary file for |deps_path| to avoid race conditions. The
         # created temporary file is assigned to |self._benchmark_deps| to make
         # it valid until |self| is destroyed.
         self._benchmark_deps = tempfile.NamedTemporaryFile(
                 prefix='fetch_benchmark_deps_result.', suffix='.json')
         deps_path = self._benchmark_deps.name
         format_fetch = ('python2 %s --output-deps=%s %s')
         command_fetch = format_fetch % (fetch_path, deps_path, test_name)
         command_get = 'cat %s' % deps_path

         logging.info('Getting DEPs: %s', command_fetch)
         _, _, exit_code = self._run_cmd(command_fetch)
         if exit_code != 0:
             raise error.TestFail('Error occurred while fetching DEPs.')
         stdout, _, exit_code = self._run_cmd(command_get)
         if exit_code != 0:
             raise error.TestFail('Error occurred while getting DEPs.')

         # Download DEPs to DUT.
         # send_file() relies on rsync over ssh. Couldn't be better.
         deps = json.loads(stdout)
         for dep in deps[test_name]:
             src = os.path.join(self._telemetry_path, dep)
             dst = os.path.join(DUT_CHROME_ROOT, dep)
             if not os.path.isfile(src):
                 raise error.TestFail('Error occurred while saving DEPs.')
             logging.info('Copying: %s -> %s', src, dst)
             dut.send_file(src, dst)

     @staticmethod
     def convert_chart_json(histogram_set):
         """
         Convert from histogram set to chart json format.

         @param histogram_set: result in histogram set format.

         @returns result in chart json format.
         """
         value_map = {}

         # Gets generic set values.
         for obj in histogram_set:
             if 'type' in obj and obj['type'] == 'GenericSet':
                 value_map[obj['guid']] = obj['values']

         charts = {}
         benchmark_name = ''
         benchmark_desc = ''

         # Checks the unit test for how this conversion works.
         for obj in histogram_set:
             if 'name' not in obj or 'sampleValues' not in obj:
                 continue
             metric_name = obj['name']
             diagnostics = obj['diagnostics']
             if 'stories' in diagnostics:
                 story_name = value_map[diagnostics['stories']][0]
             else:
                 story_name = 'default'
             local_benchmark_name = value_map[diagnostics['benchmarks']][0]
             if benchmark_name == '':
                 benchmark_name = local_benchmark_name
                 if 'benchmarkDescriptions' in diagnostics:
                     benchmark_desc = value_map[
                             diagnostics['benchmarkDescriptions']][0]
             if benchmark_name != local_benchmark_name:
                 logging.warning(
                         'There are more than 1 benchmark names in the'
                         'result. old: %s, new: %s', benchmark_name,
                         local_benchmark_name)
                 continue

             unit = obj['unit']
             smaller_postfixes = ('_smallerIsBetter', '-')
             bigger_postfixes = ('_biggerIsBetter', '+')
             all_postfixes = smaller_postfixes + bigger_postfixes

             improvement = 'up'
             for postfix in smaller_postfixes:
                 if unit.endswith(postfix):
                     improvement = 'down'
             for postfix in all_postfixes:
                 if unit.endswith(postfix):
                     unit = unit[:-len(postfix)]
                     break

             if unit == 'unitless':
                 unit = 'score'

             values = [
                     x for x in obj['sampleValues']
                     if isinstance(x, numbers.Number)
             ]
             if metric_name not in charts:
                 charts[metric_name] = {}
             charts[metric_name][story_name] = {
                     'improvement_direction': improvement,
                     'name': metric_name,
                     'std': numpy.std(values),
                     'type': 'list_of_scalar_values',
                     'units': unit,
                     'values': values
             }

         # Adds summaries.
         for metric_name in charts:
             values = []
             metric_content = charts[metric_name]
             for story_name in metric_content:
                 story_content = metric_content[story_name]
                 values += story_content['values']
                 metric_type = story_content['type']
                 units = story_content['units']
                 improvement = story_content['improvement_direction']
             values.sort()
             std = numpy.std(values)
             metric_content['summary'] = {
                     'improvement_direction': improvement,
                     'name': metric_name,
                     'std': std,
                     'type': metric_type,
                     'units': units,
                     'values': values
             }

         benchmark_metadata = {
                 'description': benchmark_desc,
                 'name': benchmark_name,
                 'type': 'telemetry_benchmark'
         }
         return {
                 'benchmark_description': benchmark_desc,
                 'benchmark_metadata': benchmark_metadata,
                 'benchmark_name': benchmark_name,
                 'charts': charts,
                 'format_version': 1.0
         }


 class LocalTelemetryRunner(TelemetryRunner):
     """Specialized TelemetryRunner to handle local telemetry test runs."""

     def __init__(self, *args, **kwargs):
         """Initialize LocalTelemetryRunner.

         The telemetry test will run locally. Depending on whether
         telemetry_on_dut is True or False, there can be possible combinations
         for the execution of this test:

         telemetry_on_dut=False:
         python2 run_benchmark --browser=cros-chrome --remote=[dut] [test]

         telemetry_on_dut=True:
         ssh [dut] python2 run_benchmark --browser=system [test]

         @param args: The list of arguments to be passed. See Base class for a
                      complete list of accepted arguments.
         @param kwargs: Any keyword arguments to be passed. See Base class for a
                        complete list of accepted keyword arguments.
         """
         super(LocalTelemetryRunner, self).__init__(*args, **kwargs)

     def _setup_telemetry(self):
         """Setup Telemetry to use local path to its sources.

         First look for chrome source root, either externally mounted, or inside
         the chroot.  Prefer chrome-src-internal source tree to chrome-src.
         """
         TELEMETRY_DIR = 'src'
         CHROME_LOCAL_SRC = '/var/cache/chromeos-cache/distfiles/target/'
         CHROME_EXTERNAL_SRC = os.path.expanduser('~/chrome_root/')

         logging.debug('Setting up telemetry for local testing')

         sources_list = ('chrome-src-internal', 'chrome-src')
         dir_list = [CHROME_EXTERNAL_SRC]
         dir_list.extend(
                 [os.path.join(CHROME_LOCAL_SRC, x) for x in sources_list])
         if 'CHROME_ROOT' in os.environ:
             dir_list.insert(0, os.environ['CHROME_ROOT'])

         telemetry_src = ''
         for dir in dir_list:
             if os.path.exists(dir):
                 telemetry_src = os.path.join(dir, TELEMETRY_DIR)
                 break
         else:
             raise error.TestError('Telemetry source directory not found.')

         self._telemetry_path = telemetry_src


 class DroneTelemetryRunner(TelemetryRunner):
     """Handle telemetry test setup on the drone.

     Users of this class are strongly advised to use this class as a context
     manager. Since the setup for telemetry environment happens on the drone, it
     is imperative that this setup be cleaned up once the test is done. Using
     this class as a context manager will transfer the burden of clean up from
     the user to Python.
     """

     def __init__(self, *args, **kwargs):
         """Initialize DroneTelemetryRunner.

         The telemetry test will run on the drone. Depending on whether
         telemetry_on_dut is True or False, there can be possible combinations
         for the execution of this test:

         telemetry_on_dut=False:
         python2 run_benchmark --browser=cros-chrome --remote=[dut] [test]

         telemetry_on_dut=True:
         ssh [dut] python2 run_benchmark --browser=system [test]

         @param args: The list of arguments to be passed. See Base class for a
                      complete list of accepted arguments.
         @param kwargs: Any keyword arguments to be passed. See Base class for a
                        complete list of accepted keyword arguments.
         """
         self._telemetry_setup = None
         super(DroneTelemetryRunner, self).__init__(*args, **kwargs)

     def __enter__(self):
         """Called while entering context manager; does nothing."""
         return self

     def __exit__(self, exc_type, exc_value, traceback):
         """Called while exiting context manager; cleans up temp files."""
         logging.info('Cleaning up the telemetry environment on the drone.')
         self._telemetry_setup.Cleanup()

     def _setup_telemetry(self):
         """Setup Telemetry on the drone."""
         logging.debug('Setting up telemetry on the drone')
         info = self._host.host_info_store.get()
         if not info.build:
             logging.error('Unable to locate build label for host: %s.',
                           self._host.host_port)
             raise error.AutotestError('Failed to grab build for host %s.' %
                                       self._host.host_port)

         logging.debug('Setting up telemetry for build: %s', info.build)
         try:
             self._telemetry_setup = telemetry_setup.TelemetrySetup(
                     hostname=self._host.hostname, build=info.build)
             self._telemetry_path = self._telemetry_setup.Setup()
         except telemetry_setup.TelemetrySetupError as e:
             raise error.AutotestError('Telemetry Environment could not be '
                                       'setup: %s.' % e)