[Autotest] Fix telemetry_Crosperf tests.
Autotest/telemetry has recently changed the format of results
returned from telemetry tests, and removed some functionality
from telemetry_runner.py. telemetry_Crosperf was depending on
this removed functionality, so it's been broken for the last
few weeks. This CL puts the removed functionality directly into
telemetry_Crosperf, so it is working again. Longer term we should
revisit updating Crosperf to not depend on this stuff.
BUG=chromium:597099
TEST=Tested in nightly tests and in my own chroot and beta tested with
sque@.
Change-Id: I870e94eb21245eb80e44b9b3c412368f0b0402d6
Reviewed-on: https://chromium-review.googlesource.com/336274
Commit-Ready: Caroline Tice <cmtice@chromium.org>
Tested-by: Caroline Tice <cmtice@chromium.org>
Reviewed-by: Yunlian Jiang <yunlian@chromium.org>
diff --git a/server/site_tests/telemetry_Crosperf/telemetry_Crosperf.py b/server/site_tests/telemetry_Crosperf/telemetry_Crosperf.py
index 4440206..0e136d3 100644
--- a/server/site_tests/telemetry_Crosperf/telemetry_Crosperf.py
+++ b/server/site_tests/telemetry_Crosperf/telemetry_Crosperf.py
@@ -2,14 +2,15 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import logging
+import re
import os
+import pprint
import StringIO
import common
from autotest_lib.client.common_lib import error
from autotest_lib.server import test
from autotest_lib.server import utils
-from autotest_lib.server.cros import telemetry_runner
TELEMETRY_TIMEOUT_MINS = 60
@@ -17,6 +18,22 @@
CLIENT_CHROME_ROOT = '/usr/local/telemetry/src'
RUN_BENCHMARK = 'tools/perf/run_benchmark'
+# Result Statuses
+SUCCESS_STATUS = 'SUCCESS'
+WARNING_STATUS = 'WARNING'
+FAILED_STATUS = 'FAILED'
+
+# Regex for the RESULT output lines understood by chrome buildbot.
+# Keep in sync with
+# chromium/tools/build/scripts/slave/performance_log_processor.py.
+RESULTS_REGEX = re.compile(r'(?P<IMPORTANT>\*)?RESULT '
+ r'(?P<GRAPH>[^:]*): (?P<TRACE>[^=]*)= '
+ r'(?P<VALUE>[\{\[]?[-\d\., ]+[\}\]]?)('
+ r' ?(?P<UNITS>.+))?')
+HISTOGRAM_REGEX = re.compile(r'(?P<IMPORTANT>\*)?HISTOGRAM '
+ r'(?P<GRAPH>[^:]*): (?P<TRACE>[^=]*)= '
+ r'(?P<VALUE_JSON>{.*})(?P<UNITS>.+)?')
+
def _find_chrome_root_dir():
# Look for chrome source root, either externally mounted, or inside
@@ -77,6 +94,208 @@
raise error.TestFail('Error occurred while sending DEPs to dut.\n')
+class TelemetryResult(object):
+ """Class to represent the results of a telemetry run.
+
+ This class represents the results of a telemetry run, whether it ran
+ successful, failed or had warnings. -- Copied from the old
+ autotest/files/server/cros/telemetry_runner.py.
+ """
+
+
+ def __init__(self, exit_code=0, stdout='', stderr=''):
+ """Initializes this TelemetryResultObject instance.
+
+ @param status: Status of the telemtry run.
+ @param stdout: Stdout of the telemetry run.
+ @param stderr: Stderr of the telemetry run.
+ """
+ if exit_code == 0:
+ self.status = SUCCESS_STATUS
+ else:
+ self.status = FAILED_STATUS
+
+ # A list of perf values, e.g.
+ # [{'graph': 'graphA', 'trace': 'page_load_time',
+ # 'units': 'secs', 'value':0.5}, ...]
+ self.perf_data = []
+ self._stdout = stdout
+ self._stderr = stderr
+ self.output = '\n'.join([stdout, stderr])
+
+
+ def _cleanup_perf_string(self, str):
+ """Clean up a perf-related string by removing illegal characters.
+
+ Perf keys stored in the chromeOS database may contain only letters,
+ numbers, underscores, periods, and dashes. Transform an inputted
+ string so that any illegal characters are replaced by underscores.
+
+ @param str: The perf string to clean up.
+
+ @return The cleaned-up perf string.
+ """
+ return re.sub(r'[^\w.-]', '_', str)
+
+
+ def _cleanup_units_string(self, units):
+ """Cleanup a units string.
+
+ Given a string representing units for a perf measurement, clean it up
+ by replacing certain illegal characters with meaningful alternatives.
+ Any other illegal characters should then be replaced with underscores.
+
+ Examples:
+ count/time -> count_per_time
+ % -> percent
+ units! --> units_
+ score (bigger is better) -> score__bigger_is_better_
+ score (runs/s) -> score__runs_per_s_
+
+ @param units: The units string to clean up.
+
+ @return The cleaned-up units string.
+ """
+ if '%' in units:
+ units = units.replace('%', 'percent')
+ if '/' in units:
+ units = units.replace('/','_per_')
+ return self._cleanup_perf_string(units)
+
+
+ def parse_benchmark_results(self):
+ """Parse the results of a telemetry benchmark run.
+
+ Stdout has the output in RESULT block format below.
+
+ The lines of interest start with the substring "RESULT". These are
+ specially-formatted perf data lines that are interpreted by chrome
+ builbot (when the Telemetry tests run for chrome desktop) and are
+ parsed to extract perf data that can then be displayed on a perf
+ dashboard. This format is documented in the docstring of class
+ GraphingLogProcessor in this file in the chrome tree:
+
+ chromium/tools/build/scripts/slave/process_log_utils.py
+
+ Example RESULT output lines:
+ RESULT average_commit_time_by_url: http___www.ebay.com= 8.86528 ms
+ RESULT CodeLoad: CodeLoad= 6343 score (bigger is better)
+ RESULT ai-astar: ai-astar= [614,527,523,471,530,523,577,625,614,538] ms
+
+ Currently for chromeOS, we can only associate a single perf key (string)
+ with a perf value. That string can only contain letters, numbers,
+ dashes, periods, and underscores, as defined by write_keyval() in:
+
+ chromeos/src/third_party/autotest/files/client/common_lib/
+ base_utils.py
+
+ We therefore parse each RESULT line, clean up the strings to remove any
+ illegal characters not accepted by chromeOS, and construct a perf key
+ string based on the parsed components of the RESULT line (with each
+ component separated by a special delimiter). We prefix the perf key
+ with the substring "TELEMETRY" to identify it as a telemetry-formatted
+ perf key.
+
+ Stderr has the format of Warnings/Tracebacks. There is always a default
+ warning of the display enviornment setting, followed by warnings of
+ page timeouts or a traceback.
+
+ If there are any other warnings we flag the test as warning. If there
+ is a traceback we consider this test a failure.
+ """
+ if not self._stdout:
+ # Nothing in stdout implies a test failure.
+ logging.error('No stdout, test failed.')
+ self.status = FAILED_STATUS
+ return
+
+ stdout_lines = self._stdout.splitlines()
+ num_lines = len(stdout_lines)
+ for line in stdout_lines:
+ results_match = RESULTS_REGEX.search(line)
+ histogram_match = HISTOGRAM_REGEX.search(line)
+ if results_match:
+ self._process_results_line(results_match)
+ elif histogram_match:
+ self._process_histogram_line(histogram_match)
+
+ pp = pprint.PrettyPrinter(indent=2)
+ logging.debug('Perf values: %s', pp.pformat(self.perf_data))
+
+ if self.status is SUCCESS_STATUS:
+ return
+
+ # Otherwise check if simply a Warning occurred or a Failure,
+ # i.e. a Traceback is listed.
+ self.status = WARNING_STATUS
+ for line in self._stderr.splitlines():
+ if line.startswith('Traceback'):
+ self.status = FAILED_STATUS
+
+ def _process_results_line(self, line_match):
+ """Processes a line that matches the standard RESULT line format.
+
+ Args:
+ line_match: A MatchObject as returned by re.search.
+ """
+ match_dict = line_match.groupdict()
+ graph_name = self._cleanup_perf_string(match_dict['GRAPH'].strip())
+ trace_name = self._cleanup_perf_string(match_dict['TRACE'].strip())
+ units = self._cleanup_units_string(
+ (match_dict['UNITS'] or 'units').strip())
+ value = match_dict['VALUE'].strip()
+ unused_important = match_dict['IMPORTANT'] or False # Unused now.
+
+ if value.startswith('['):
+ # A list of values, e.g., "[12,15,8,7,16]". Extract just the
+ # numbers, compute the average and use that. In this example,
+ # we'd get 12+15+8+7+16 / 5 --> 11.6.
+ value_list = [float(x) for x in value.strip('[],').split(',')]
+ value = float(sum(value_list)) / len(value_list)
+ elif value.startswith('{'):
+ # A single value along with a standard deviation, e.g.,
+ # "{34.2,2.15}". Extract just the value itself and use that.
+ # In this example, we'd get 34.2.
+ value_list = [float(x) for x in value.strip('{},').split(',')]
+ value = value_list[0] # Position 0 is the value.
+ elif re.search('^\d+$', value):
+ value = int(value)
+ else:
+ value = float(value)
+
+ self.perf_data.append({'graph':graph_name, 'trace': trace_name,
+ 'units': units, 'value': value})
+
+ def _process_histogram_line(self, line_match):
+ """Processes a line that matches the HISTOGRAM line format.
+
+ Args:
+ line_match: A MatchObject as returned by re.search.
+ """
+ match_dict = line_match.groupdict()
+ graph_name = self._cleanup_perf_string(match_dict['GRAPH'].strip())
+ trace_name = self._cleanup_perf_string(match_dict['TRACE'].strip())
+ units = self._cleanup_units_string(
+ (match_dict['UNITS'] or 'units').strip())
+ histogram_json = match_dict['VALUE_JSON'].strip()
+ unused_important = match_dict['IMPORTANT'] or False # Unused now.
+ histogram_data = json.loads(histogram_json)
+
+ # Compute geometric mean
+ count = 0
+ sum_of_logs = 0
+ for bucket in histogram_data['buckets']:
+ mean = (bucket['low'] + bucket['high']) / 2.0
+ if mean > 0:
+ sum_of_logs += math.log(mean) * bucket['count']
+ count += bucket['count']
+
+ value = math.exp(sum_of_logs / count) if count > 0 else 0.0
+
+ self.perf_data.append({'graph':graph_name, 'trace': trace_name,
+ 'units': units, 'value': value})
+
+
class telemetry_Crosperf(test.test):
"""Run one or more telemetry benchmarks under the crosperf script."""
version = 1
@@ -143,9 +362,9 @@
'\nstdout:%s\nstderr:%s', exit_code,
stdout_str, stderr_str)
- result = telemetry_runner.TelemetryResult(exit_code=exit_code,
- stdout=stdout_str,
- stderr=stderr_str)
+ result = TelemetryResult(exit_code=exit_code,
+ stdout=stdout_str,
+ stderr=stderr_str)
result.parse_benchmark_results()
for data in result.perf_data: