| # Lint as: python2, python3 |
| # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import abc |
| import datetime |
| import difflib |
| import functools |
| import hashlib |
| import logging |
| import operator |
| import os |
| import re |
| import six |
| import sys |
| import warnings |
| |
| import common |
| |
| from autotest_lib.frontend.afe.json_rpc import proxy |
| from autotest_lib.client.common_lib import autotest_enum |
| from autotest_lib.client.common_lib import error |
| from autotest_lib.client.common_lib import global_config |
| from autotest_lib.client.common_lib import priorities |
| from autotest_lib.client.common_lib import time_utils |
| from autotest_lib.client.common_lib import utils |
| from autotest_lib.frontend.afe import model_attributes |
| from autotest_lib.frontend.afe.json_rpc import proxy |
| from autotest_lib.server.cros import provision |
| from autotest_lib.server.cros.dynamic_suite import constants |
| from autotest_lib.server.cros.dynamic_suite import control_file_getter |
| from autotest_lib.server.cros.dynamic_suite import frontend_wrappers |
| from autotest_lib.server.cros.dynamic_suite import job_status |
| from autotest_lib.server.cros.dynamic_suite import suite_common |
| from autotest_lib.server.cros.dynamic_suite import tools |
| from autotest_lib.server.cros.dynamic_suite.job_status import Status |
| |
| try: |
| from autotest_lib.server.cros.dynamic_suite import boolparse_lib |
| except ImportError as e: |
| print('Unable to import boolparse_lib: %s' % (e,)) |
| print('This script must be either:') |
| print(' - Be run in the chroot.') |
| print(' - (not yet supported) be run after running ') |
| print(' ../utils/build_externals.py') |
| |
| _FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta', |
| 'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable', |
| 'sanity', 'push_to_prod'] |
| _AUTOTEST_DIR = global_config.global_config.get_config_value( |
| 'SCHEDULER', 'drone_installation_directory') |
| |
| |
| class RetryHandler(object): |
| """Maintain retry information. |
| |
| @var _retry_map: A dictionary that stores retry history. |
| The key is afe job id. The value is a dictionary. |
| {job_id: {'state':RetryHandler.States, 'retry_max':int}} |
| - state: |
| The retry state of a job. |
| NOT_ATTEMPTED: |
| We haven't done anything about the job. |
| ATTEMPTED: |
| We've made an attempt to schedule a retry job. The |
| scheduling may or may not be successful, e.g. |
| it might encounter an rpc error. Note failure |
| in scheduling a retry is different from a retry job failure. |
| For each job, we only attempt to schedule a retry once. |
| For example, assume we have a test with JOB_RETRIES=5 and |
| its second retry job failed. When we attempt to create |
| a third retry job to retry the second, we hit an rpc |
| error. In such case, we will give up on all following |
| retries. |
| RETRIED: |
| A retry job has already been successfully |
| scheduled. |
| - retry_max: |
| The maximum of times the job can still |
| be retried, taking into account retries |
| that have occurred. |
| @var _retry_level: A retry might be triggered only if the result |
| is worse than the level. |
| @var _max_retries: Maximum retry limit at suite level. |
| Regardless how many times each individual test |
| has been retried, the total number of retries happening in |
| the suite can't exceed _max_retries. |
| """ |
| |
| States = autotest_enum.AutotestEnum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED', |
| start_value=1, step=1) |
| |
| def __init__(self, initial_jobs_to_tests, retry_level='WARN', |
| max_retries=None): |
| """Initialize RetryHandler. |
| |
| @param initial_jobs_to_tests: A dictionary that maps a job id to |
| a ControlData object. This dictionary should contain |
| jobs that are originally scheduled by the suite. |
| @param retry_level: A retry might be triggered only if the result is |
| worse than the level. |
| @param max_retries: Integer, maxmium total retries allowed |
| for the suite. Default to None, no max. |
| """ |
| self._retry_map = {} |
| self._retry_level = retry_level |
| self._max_retries = (max_retries |
| if max_retries is not None else sys.maxsize) |
| for job_id, test in initial_jobs_to_tests.items(): |
| if test.job_retries > 0: |
| self._add_job(new_job_id=job_id, |
| retry_max=test.job_retries) |
| else: |
| logging.debug("Test %s has no retries", test.name) |
| |
| |
| def _add_job(self, new_job_id, retry_max): |
| """Add a newly-created job to the retry map. |
| |
| @param new_job_id: The afe_job_id of a newly created job. |
| @param retry_max: The maximum of times that we could retry |
| the test if the job fails. |
| |
| @raises ValueError if new_job_id is already in retry map. |
| |
| """ |
| if new_job_id in self._retry_map: |
| raise ValueError('add_job called when job is already in retry map.') |
| |
| self._retry_map[new_job_id] = { |
| 'state': self.States.NOT_ATTEMPTED, |
| 'retry_max': retry_max} |
| |
| |
| def _suite_max_reached(self): |
| """Return whether maximum retry limit for a suite has been reached.""" |
| return self._max_retries <= 0 |
| |
| |
| def add_retry(self, old_job_id, new_job_id): |
| """Record a retry. |
| |
| Update retry map with the retry information. |
| |
| @param old_job_id: The afe_job_id of the job that is retried. |
| @param new_job_id: The afe_job_id of the retry job. |
| |
| @raises KeyError if old_job_id isn't in the retry map. |
| @raises ValueError if we have already retried or made an attempt |
| to retry the old job. |
| |
| """ |
| old_record = self._retry_map[old_job_id] |
| if old_record['state'] != self.States.NOT_ATTEMPTED: |
| raise ValueError( |
| 'We have already retried or attempted to retry job %d' % |
| old_job_id) |
| old_record['state'] = self.States.RETRIED |
| self._add_job(new_job_id=new_job_id, |
| retry_max=old_record['retry_max'] - 1) |
| self._max_retries -= 1 |
| |
| |
| def set_attempted(self, job_id): |
| """Set the state of the job to ATTEMPTED. |
| |
| @param job_id: afe_job_id of a job. |
| |
| @raises KeyError if job_id isn't in the retry map. |
| @raises ValueError if the current state is not NOT_ATTEMPTED. |
| |
| """ |
| current_state = self._retry_map[job_id]['state'] |
| if current_state != self.States.NOT_ATTEMPTED: |
| # We are supposed to retry or attempt to retry each job |
| # only once. Raise an error if this is not the case. |
| raise ValueError('Unexpected state transition: %s -> %s' % |
| (self.States.get_string(current_state), |
| self.States.get_string(self.States.ATTEMPTED))) |
| else: |
| self._retry_map[job_id]['state'] = self.States.ATTEMPTED |
| |
| |
| def has_following_retry(self, result): |
| """Check whether there will be a following retry. |
| |
| We have the following cases for a given job id (result.id), |
| - no retry map entry -> retry not required, no following retry |
| - has retry map entry: |
| - already retried -> has following retry |
| - has not retried |
| (this branch can be handled by checking should_retry(result)) |
| - retry_max == 0 --> the last retry job, no more retry |
| - retry_max > 0 |
| - attempted, but has failed in scheduling a |
| following retry due to rpc error --> no more retry |
| - has not attempped --> has following retry if test failed. |
| |
| @param result: A result, encapsulating the status of the job. |
| |
| @returns: True, if there will be a following retry. |
| False otherwise. |
| |
| """ |
| return (result.test_executed |
| and result.id in self._retry_map |
| and (self._retry_map[result.id]['state'] == self.States.RETRIED |
| or self._should_retry(result))) |
| |
| |
| def _should_retry(self, result): |
| """Check whether we should retry a job based on its result. |
| |
| We will retry the job that corresponds to the result |
| when all of the following are true. |
| a) The test was actually executed, meaning that if |
| a job was aborted before it could ever reach the state |
| of 'Running', the job will not be retried. |
| b) The result is worse than |self._retry_level| which |
| defaults to 'WARN'. |
| c) The test requires retry, i.e. the job has an entry in the retry map. |
| d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED |
| Note that if a test has JOB_RETRIES=5, and the second time |
| it was retried it hit an rpc error, we will give up on |
| all following retries. |
| e) The job has not reached its retry max, i.e. retry_max > 0 |
| |
| @param result: A result, encapsulating the status of the job. |
| |
| @returns: True if we should retry the job. |
| |
| """ |
| return ( |
| result.test_executed |
| and result.id in self._retry_map |
| and not self._suite_max_reached() |
| and result.is_worse_than( |
| job_status.Status(self._retry_level, '', 'reason')) |
| and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED |
| and self._retry_map[result.id]['retry_max'] > 0 |
| ) |
| |
| def _should_retry_local_job(self, job_id): |
| """Check whether we should retry a job based on information available |
| for a local job without a Result object. |
| |
| We will retry the job that corresponds to the result |
| when all of the following are true. |
| a) The test requires retry, i.e. the job has an entry in the retry map. |
| b) We haven't made any retry attempt yet for this job, i.e. |
| state == NOT_ATTEMPTED |
| If the job is aborted, we will give up on all following retries, |
| regardless of max_retries. |
| c) The job has not reached its retry max, i.e. retry_max > 0 |
| |
| @param job_id: the id for the job, to look up relevant information. |
| |
| @returns: True if we should retry the job. |
| |
| """ |
| if self._suite_max_reached(): |
| logging.debug('suite max_retries reached, not retrying.') |
| return False |
| if job_id not in self._retry_map: |
| logging.debug('job_id not in retry map, not retrying.') |
| return False |
| if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED: |
| logging.debug("job state was %s not 'Not Attempted', not retrying", |
| self._retry_map[job_id]['state']) |
| return False |
| if self._retry_map[job_id]['retry_max'] <= 0: |
| logging.debug('test-level retries exhausted, not retrying') |
| return False |
| return True |
| |
| |
| def job_present(self, job_id): |
| """Check whether a job id present in the retry map. |
| |
| @param job_id: afe_job_id of a job. |
| |
| @returns: A True if the job is present, False if not. |
| """ |
| return bool(self._retry_map.get(job_id)) |
| |
| |
| |
| def get_retry_max(self, job_id): |
| """Get the maximum times the job can still be retried. |
| |
| @param job_id: afe_job_id of a job. |
| |
| @returns: An int, representing the maximum times the job can still be |
| retried. |
| @raises KeyError if job_id isn't in the retry map. |
| |
| """ |
| return self._retry_map[job_id]['retry_max'] |
| |
| |
| class _SuiteChildJobCreator(object): |
| """Create test jobs for a suite.""" |
| |
| def __init__( |
| self, |
| tag, |
| builds, |
| board, |
| afe=None, |
| max_runtime_mins=24*60, |
| timeout_mins=24*60, |
| suite_job_id=None, |
| ignore_deps=False, |
| extra_deps=(), |
| priority=priorities.Priority.DEFAULT, |
| offload_failures_only=False, |
| test_source_build=None, |
| job_keyvals=None, |
| ): |
| """ |
| Constructor |
| |
| @param tag: a string with which to tag jobs run in this suite. |
| @param builds: the builds on which we're running this suite. |
| @param board: the board on which we're running this suite. |
| @param afe: an instance of AFE as defined in server/frontend.py. |
| @param max_runtime_mins: Maximum suite runtime, in minutes. |
| @param timeout_mins: Maximum job lifetime, in minutes. |
| @param suite_job_id: Job id that will act as parent id to all sub jobs. |
| Default: None |
| @param ignore_deps: True if jobs should ignore the DEPENDENCIES |
| attribute and skip applying of dependency labels. |
| (Default:False) |
| @param extra_deps: A list of strings which are the extra DEPENDENCIES |
| to add to each test being scheduled. |
| @param priority: Integer priority level. Higher is more important. |
| @param offload_failures_only: Only enable gs_offloading for failed |
| jobs. |
| @param test_source_build: Build that contains the server-side test code. |
| @param job_keyvals: General job keyvals to be inserted into keyval file, |
| which will be used by tko/parse later. |
| """ |
| self._tag = tag |
| self._builds = builds |
| self._board = board |
| self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, |
| delay_sec=10, |
| debug=False) |
| self._max_runtime_mins = max_runtime_mins |
| self._timeout_mins = timeout_mins |
| self._suite_job_id = suite_job_id |
| self._ignore_deps = ignore_deps |
| self._extra_deps = tuple(extra_deps) |
| self._priority = priority |
| self._offload_failures_only = offload_failures_only |
| self._test_source_build = test_source_build |
| self._job_keyvals = job_keyvals |
| |
| |
| @property |
| def cros_build(self): |
| """Return the CrOS build or the first build in the builds dict.""" |
| # TODO(ayatane): Note that the builds dict isn't ordered. I'm not |
| # sure what the implications of this are, but it's probably not a |
| # good thing. |
| return self._builds.get(provision.CROS_VERSION_PREFIX, |
| list(self._builds.values())[0]) |
| |
| |
| def create_job(self, test, retry_for=None): |
| """ |
| Thin wrapper around frontend.AFE.create_job(). |
| |
| @param test: ControlData object for a test to run. |
| @param retry_for: If the to-be-created job is a retry for an |
| old job, the afe_job_id of the old job will |
| be passed in as |retry_for|, which will be |
| recorded in the new job's keyvals. |
| @returns: A frontend.Job object with an added test_name member. |
| test_name is used to preserve the higher level TEST_NAME |
| name of the job. |
| """ |
| # For a system running multiple suites which share tests, the priority |
| # overridden may lead to unexpected scheduling order that adds extra |
| # provision jobs. |
| test_priority = self._priority |
| if utils.is_moblab(): |
| test_priority = max(self._priority, test.priority) |
| |
| reboot_before = (model_attributes.RebootBefore.NEVER if test.fast |
| else None) |
| |
| test_obj = self._afe.create_job( |
| control_file=test.text, |
| name=tools.create_job_name( |
| self._test_source_build or self.cros_build, |
| self._tag, |
| test.name), |
| control_type=test.test_type.capitalize(), |
| meta_hosts=[self._board]*test.sync_count, |
| dependencies=self._create_job_deps(test), |
| keyvals=self._create_keyvals_for_test_job(test, retry_for), |
| max_runtime_mins=self._max_runtime_mins, |
| timeout_mins=self._timeout_mins, |
| parent_job_id=self._suite_job_id, |
| reboot_before=reboot_before, |
| run_reset=not test.fast, |
| priority=test_priority, |
| synch_count=test.sync_count, |
| require_ssp=test.require_ssp) |
| |
| test_obj.test_name = test.name |
| return test_obj |
| |
| |
| def _create_job_deps(self, test): |
| """Create job deps list for a test job. |
| |
| @returns: A list of dependency strings. |
| """ |
| if self._ignore_deps: |
| job_deps = [] |
| else: |
| job_deps = list(test.dependencies) |
| job_deps.extend(self._extra_deps) |
| return job_deps |
| |
| |
| def _create_keyvals_for_test_job(self, test, retry_for=None): |
| """Create keyvals dict for creating a test job. |
| |
| @param test: ControlData object for a test to run. |
| @param retry_for: If the to-be-created job is a retry for an |
| old job, the afe_job_id of the old job will |
| be passed in as |retry_for|, which will be |
| recorded in the new job's keyvals. |
| @returns: A keyvals dict for creating the test job. |
| """ |
| keyvals = { |
| constants.JOB_BUILD_KEY: self.cros_build, |
| constants.JOB_SUITE_KEY: self._tag, |
| constants.JOB_EXPERIMENTAL_KEY: test.experimental, |
| constants.JOB_BUILDS_KEY: self._builds |
| } |
| # test_source_build is saved to job_keyvals so scheduler can retrieve |
| # the build name from database when compiling autoserv commandline. |
| # This avoid a database change to add a new field in afe_jobs. |
| # |
| # Only add `test_source_build` to job keyvals if the build is different |
| # from the CrOS build or the job uses more than one build, e.g., both |
| # firmware and CrOS will be updated in the dut. |
| # This is for backwards compatibility, so the update Autotest code can |
| # compile an autoserv command line to run in a SSP container using |
| # previous builds. |
| if (self._test_source_build and |
| (self.cros_build != self._test_source_build or |
| len(self._builds) > 1)): |
| keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \ |
| self._test_source_build |
| for prefix, build in six.iteritems(self._builds): |
| if prefix == provision.FW_RW_VERSION_PREFIX: |
| keyvals[constants.FWRW_BUILD]= build |
| elif prefix == provision.FW_RO_VERSION_PREFIX: |
| keyvals[constants.FWRO_BUILD] = build |
| # Add suite job id to keyvals so tko parser can read it from keyval |
| # file. |
| if self._suite_job_id: |
| keyvals[constants.PARENT_JOB_ID] = self._suite_job_id |
| # We drop the old job's id in the new job's keyval file so that |
| # later our tko parser can figure out the retry relationship and |
| # invalidate the results of the old job in tko database. |
| if retry_for: |
| keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for |
| if self._offload_failures_only: |
| keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True |
| if self._job_keyvals: |
| for key in constants.INHERITED_KEYVALS: |
| if key in self._job_keyvals: |
| keyvals[key] = self._job_keyvals[key] |
| return keyvals |
| |
| |
| class _ControlFileRetriever(object): |
| """Retrieves control files. |
| |
| This returns control data instances, unlike control file getters |
| which simply return the control file text contents. |
| """ |
| |
| def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False, |
| test_args=None): |
| """Initialize instance. |
| |
| @param cf_getter: a control_file_getter.ControlFileGetter used to list |
| and fetch the content of control files |
| @param forgiving_parser: If False, will raise ControlVariableExceptions |
| if any are encountered when parsing control |
| files. Note that this can raise an exception |
| for syntax errors in unrelated files, because |
| we parse them before applying the predicate. |
| @param run_prod_code: If true, the retrieved tests will run the test |
| code that lives in prod aka the test code |
| currently on the lab servers by disabling |
| SSP for the discovered tests. |
| @param test_args: A dict of args to be seeded in test control file under |
| the name |args_dict|. |
| """ |
| self._cf_getter = cf_getter |
| self._forgiving_parser = forgiving_parser |
| self._run_prod_code = run_prod_code |
| self._test_args = test_args |
| |
| |
| def retrieve_for_test(self, test_name): |
| """Retrieve a test's control data. |
| |
| This ignores forgiving_parser because we cannot return a |
| forgiving value. |
| |
| @param test_name: Name of test to retrieve. |
| |
| @raises ControlVariableException: There is a syntax error in a |
| control file. |
| |
| @returns a ControlData object |
| """ |
| return suite_common.retrieve_control_data_for_test( |
| self._cf_getter, test_name) |
| |
| |
| def retrieve_for_suite(self, suite_name=''): |
| """Scan through all tests and find all tests. |
| |
| @param suite_name: If specified, this method will attempt to restrain |
| the search space to just this suite's control files. |
| |
| @raises ControlVariableException: If forgiving_parser is False and there |
| is a syntax error in a control file. |
| |
| @returns a dictionary of ControlData objects that based on given |
| parameters. |
| """ |
| tests = suite_common.retrieve_for_suite( |
| self._cf_getter, suite_name, self._forgiving_parser, |
| self._test_args) |
| if self._run_prod_code: |
| for test in six.itervalues(tests): |
| test.require_ssp = False |
| |
| return tests |
| |
| |
| def list_all_suites(build, devserver, cf_getter=None): |
| """ |
| Parses all ControlData objects with a SUITE tag and extracts all |
| defined suite names. |
| |
| @param build: the build on which we're running this suite. |
| @param devserver: the devserver which contains the build. |
| @param cf_getter: control_file_getter.ControlFileGetter. Defaults to |
| using DevServerGetter. |
| |
| @return list of suites |
| """ |
| if cf_getter is None: |
| cf_getter = _create_ds_getter(build, devserver) |
| |
| suites = set() |
| predicate = lambda t: True |
| for test in find_and_parse_tests(cf_getter, predicate): |
| suites.update(test.suite_tag_parts) |
| return list(suites) |
| |
| |
| def test_file_similarity_predicate(test_file_pattern): |
| """Returns predicate that gets the similarity based on a test's file |
| name pattern. |
| |
| Builds a predicate that takes in a parsed control file (a ControlData) |
| and returns a tuple of (file path, ratio), where ratio is the |
| similarity between the test file name and the given test_file_pattern. |
| |
| @param test_file_pattern: regular expression (string) to match against |
| control file names. |
| @return a callable that takes a ControlData and and returns a tuple of |
| (file path, ratio), where ratio is the similarity between the |
| test file name and the given test_file_pattern. |
| """ |
| return lambda t: ((None, 0) if not hasattr(t, 'path') else |
| (t.path, difflib.SequenceMatcher(a=t.path, |
| b=test_file_pattern).ratio())) |
| |
| |
| def test_name_similarity_predicate(test_name): |
| """Returns predicate that matched based on a test's name. |
| |
| Builds a predicate that takes in a parsed control file (a ControlData) |
| and returns a tuple of (test name, ratio), where ratio is the similarity |
| between the test name and the given test_name. |
| |
| @param test_name: the test name to base the predicate on. |
| @return a callable that takes a ControlData and returns a tuple of |
| (test name, ratio), where ratio is the similarity between the |
| test name and the given test_name. |
| """ |
| return lambda t: ((None, 0) if not hasattr(t, 'name') else |
| (t.name, |
| difflib.SequenceMatcher(a=t.name, b=test_name).ratio())) |
| |
| |
| def matches_attribute_expression_predicate(test_attr_boolstr): |
| """Returns predicate that matches based on boolean expression of |
| attributes. |
| |
| Builds a predicate that takes in a parsed control file (a ControlData) |
| ans returns True if the test attributes satisfy the given attribute |
| boolean expression. |
| |
| @param test_attr_boolstr: boolean expression of the attributes to be |
| test, like 'system:all and interval:daily'. |
| |
| @return a callable that takes a ControlData and returns True if the test |
| attributes satisfy the given boolean expression. |
| """ |
| return lambda t: boolparse_lib.BoolstrResult( |
| test_attr_boolstr, t.attributes) |
| |
| |
| def test_file_matches_pattern_predicate(test_file_pattern): |
| """Returns predicate that matches based on a test's file name pattern. |
| |
| Builds a predicate that takes in a parsed control file (a ControlData) |
| and returns True if the test's control file name matches the given |
| regular expression. |
| |
| @param test_file_pattern: regular expression (string) to match against |
| control file names. |
| @return a callable that takes a ControlData and and returns |
| True if control file name matches the pattern. |
| """ |
| return lambda t: hasattr(t, 'path') and re.match(test_file_pattern, |
| t.path) |
| |
| |
| def test_name_matches_pattern_predicate(test_name_pattern): |
| """Returns predicate that matches based on a test's name pattern. |
| |
| Builds a predicate that takes in a parsed control file (a ControlData) |
| and returns True if the test name matches the given regular expression. |
| |
| @param test_name_pattern: regular expression (string) to match against |
| test names. |
| @return a callable that takes a ControlData and returns |
| True if the name fields matches the pattern. |
| """ |
| return lambda t: hasattr(t, 'name') and re.match(test_name_pattern, |
| t.name) |
| |
| |
| def test_name_equals_predicate(test_name): |
| """Returns predicate that matched based on a test's name. |
| |
| Builds a predicate that takes in a parsed control file (a ControlData) |
| and returns True if the test name is equal to |test_name|. |
| |
| @param test_name: the test name to base the predicate on. |
| @return a callable that takes a ControlData and looks for |test_name| |
| in that ControlData's name. |
| """ |
| return lambda t: hasattr(t, 'name') and test_name == t.name |
| |
| |
| def name_in_tag_similarity_predicate(name): |
| """Returns predicate that takes a control file and gets the similarity |
| of the suites in the control file and the given name. |
| |
| Builds a predicate that takes in a parsed control file (a ControlData) |
| and returns a list of tuples of (suite name, ratio), where suite name |
| is each suite listed in the control file, and ratio is the similarity |
| between each suite and the given name. |
| |
| @param name: the suite name to base the predicate on. |
| @return a callable that takes a ControlData and returns a list of tuples |
| of (suite name, ratio), where suite name is each suite listed in |
| the control file, and ratio is the similarity between each suite |
| and the given name. |
| """ |
| return lambda t: [(suite, |
| difflib.SequenceMatcher(a=suite, b=name).ratio()) |
| for suite in t.suite_tag_parts] or [(None, 0)] |
| |
| |
| def name_in_tag_predicate(name): |
| """Returns predicate that takes a control file and looks for |name|. |
| |
| Builds a predicate that takes in a parsed control file (a ControlData) |
| and returns True if the SUITE tag is present and contains |name|. |
| |
| @param name: the suite name to base the predicate on. |
| @return a callable that takes a ControlData and looks for |name| in that |
| ControlData object's suite member. |
| """ |
| return suite_common.name_in_tag_predicate(name) |
| |
| |
| def create_fs_getter(autotest_dir): |
| """ |
| @param autotest_dir: the place to find autotests. |
| @return a FileSystemGetter instance that looks under |autotest_dir|. |
| """ |
| # currently hard-coded places to look for tests. |
| subpaths = ['server/site_tests', 'client/site_tests', |
| 'server/tests', 'client/tests'] |
| directories = [os.path.join(autotest_dir, p) for p in subpaths] |
| return control_file_getter.FileSystemGetter(directories) |
| |
| |
| def _create_ds_getter(build, devserver): |
| """ |
| @param build: the build on which we're running this suite. |
| @param devserver: the devserver which contains the build. |
| @return a FileSystemGetter instance that looks under |autotest_dir|. |
| """ |
| return control_file_getter.DevServerGetter(build, devserver) |
| |
| |
| def _non_experimental_tests_predicate(test_data): |
| """Test predicate for non-experimental tests.""" |
| return not test_data.experimental |
| |
| |
| def find_and_parse_tests(cf_getter, predicate, suite_name='', |
| add_experimental=False, forgiving_parser=True, |
| run_prod_code=False, test_args=None): |
| """ |
| Function to scan through all tests and find eligible tests. |
| |
| Search through all tests based on given cf_getter, suite_name, |
| add_experimental and forgiving_parser, return the tests that match |
| given predicate. |
| |
| @param cf_getter: a control_file_getter.ControlFileGetter used to list |
| and fetch the content of control files |
| @param predicate: a function that should return True when run over a |
| ControlData representation of a control file that should be in |
| this Suite. |
| @param suite_name: If specified, this method will attempt to restrain |
| the search space to just this suite's control files. |
| @param add_experimental: add tests with experimental attribute set. |
| @param forgiving_parser: If False, will raise ControlVariableExceptions |
| if any are encountered when parsing control |
| files. Note that this can raise an exception |
| for syntax errors in unrelated files, because |
| we parse them before applying the predicate. |
| @param run_prod_code: If true, the suite will run the test code that |
| lives in prod aka the test code currently on the |
| lab servers by disabling SSP for the discovered |
| tests. |
| @param test_args: A dict of args to be seeded in test control file. |
| |
| @raises ControlVariableException: If forgiving_parser is False and there |
| is a syntax error in a control file. |
| |
| @return list of ControlData objects that should be run, with control |
| file text added in |text| attribute. Results are sorted based |
| on the TIME setting in control file, slowest test comes first. |
| """ |
| logging.debug('Getting control file list for suite: %s', suite_name) |
| retriever = _ControlFileRetriever(cf_getter, |
| forgiving_parser=forgiving_parser, |
| run_prod_code=run_prod_code, |
| test_args=test_args) |
| tests = retriever.retrieve_for_suite(suite_name) |
| if not add_experimental: |
| predicate = _ComposedPredicate([predicate, |
| _non_experimental_tests_predicate]) |
| return suite_common.filter_tests(tests, predicate) |
| |
| |
| def find_possible_tests(cf_getter, predicate, suite_name='', count=10): |
| """ |
| Function to scan through all tests and find possible tests. |
| |
| Search through all tests based on given cf_getter, suite_name, |
| add_experimental and forgiving_parser. Use the given predicate to |
| calculate the similarity and return the top 10 matches. |
| |
| @param cf_getter: a control_file_getter.ControlFileGetter used to list |
| and fetch the content of control files |
| @param predicate: a function that should return a tuple of (name, ratio) |
| when run over a ControlData representation of a control file that |
| should be in this Suite. `name` is the key to be compared, e.g., |
| a suite name or test name. `ratio` is a value between [0,1] |
| indicating the similarity of `name` and the value to be compared. |
| @param suite_name: If specified, this method will attempt to restrain |
| the search space to just this suite's control files. |
| @param count: Number of suggestions to return, default to 10. |
| |
| @return list of top names that similar to the given test, sorted by |
| match ratio. |
| """ |
| logging.debug('Getting control file list for suite: %s', suite_name) |
| tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name) |
| logging.debug('Parsed %s control files.', len(tests)) |
| similarities = {} |
| for test in six.itervalues(tests): |
| ratios = predicate(test) |
| # Some predicates may return a list of tuples, e.g., |
| # name_in_tag_similarity_predicate. Convert all returns to a list. |
| if not isinstance(ratios, list): |
| ratios = [ratios] |
| for name, ratio in ratios: |
| similarities[name] = ratio |
| return [s[0] for s in |
| sorted(list(similarities.items()), key=operator.itemgetter(1), |
| reverse=True)][:count] |
| |
| |
| def _deprecated_suite_method(func): |
| """Decorator for deprecated Suite static methods. |
| |
| TODO(ayatane): This is used to decorate functions that are called as |
| static methods on Suite. |
| """ |
| @functools.wraps(func) |
| def wrapper(*args, **kwargs): |
| """Wraps |func| for warning.""" |
| warnings.warn('Calling method "%s" from Suite is deprecated' % |
| func.__name__) |
| return func(*args, **kwargs) |
| return staticmethod(wrapper) |
| |
| |
| class _BaseSuite(object): |
| """ |
| A suite of tests, defined by some predicate over control file variables. |
| |
| Given a place to search for control files a predicate to match the desired |
| tests, can gather tests and fire off jobs to run them, and then wait for |
| results. |
| |
| @var _predicate: a function that should return True when run over a |
| ControlData representation of a control file that should be in |
| this Suite. |
| @var _tag: a string with which to tag jobs run in this suite. |
| @var _builds: the builds on which we're running this suite. |
| @var _afe: an instance of AFE as defined in server/frontend.py. |
| @var _tko: an instance of TKO as defined in server/frontend.py. |
| @var _jobs: currently scheduled jobs, if any. |
| @var _jobs_to_tests: a dictionary that maps job ids to tests represented |
| ControlData objects. |
| @var _retry: a bool value indicating whether jobs should be retried on |
| failure. |
| @var _retry_handler: a RetryHandler object. |
| |
| """ |
| |
| |
| def __init__( |
| self, |
| tests, |
| tag, |
| builds, |
| board, |
| afe=None, |
| tko=None, |
| pool=None, |
| results_dir=None, |
| max_runtime_mins=24*60, |
| timeout_mins=24*60, |
| file_bugs=False, |
| suite_job_id=None, |
| ignore_deps=False, |
| extra_deps=None, |
| priority=priorities.Priority.DEFAULT, |
| wait_for_results=True, |
| job_retry=False, |
| max_retries=sys.maxsize, |
| offload_failures_only=False, |
| test_source_build=None, |
| job_keyvals=None, |
| child_dependencies=(), |
| result_reporter=None, |
| ): |
| """Initialize instance. |
| |
| @param tests: Iterable of tests to run. |
| @param tag: a string with which to tag jobs run in this suite. |
| @param builds: the builds on which we're running this suite. |
| @param board: the board on which we're running this suite. |
| @param afe: an instance of AFE as defined in server/frontend.py. |
| @param tko: an instance of TKO as defined in server/frontend.py. |
| @param pool: Specify the pool of machines to use for scheduling |
| purposes. |
| @param results_dir: The directory where the job can write results to. |
| This must be set if you want job_id of sub-jobs |
| list in the job keyvals. |
| @param max_runtime_mins: Maximum suite runtime, in minutes. |
| @param timeout: Maximum job lifetime, in hours. |
| @param suite_job_id: Job id that will act as parent id to all sub jobs. |
| Default: None |
| @param ignore_deps: True if jobs should ignore the DEPENDENCIES |
| attribute and skip applying of dependency labels. |
| (Default:False) |
| @param extra_deps: A list of strings which are the extra DEPENDENCIES |
| to add to each test being scheduled. |
| @param priority: Integer priority level. Higher is more important. |
| @param wait_for_results: Set to False to run the suite job without |
| waiting for test jobs to finish. Default is |
| True. |
| @param job_retry: A bool value indicating whether jobs should be retried |
| on failure. If True, the field 'JOB_RETRIES' in |
| control files will be respected. If False, do not |
| retry. |
| @param max_retries: Maximum retry limit at suite level. |
| Regardless how many times each individual test |
| has been retried, the total number of retries |
| happening in the suite can't exceed _max_retries. |
| Default to sys.maxint. |
| @param offload_failures_only: Only enable gs_offloading for failed |
| jobs. |
| @param test_source_build: Build that contains the server-side test code. |
| @param job_keyvals: General job keyvals to be inserted into keyval file, |
| which will be used by tko/parse later. |
| @param child_dependencies: (optional) list of dependency strings |
| to be added as dependencies to child jobs. |
| @param result_reporter: A _ResultReporter instance to report results. If |
| None, an _EmailReporter will be created. |
| """ |
| |
| self.tests = list(tests) |
| self._tag = tag |
| self._builds = builds |
| self._results_dir = results_dir |
| self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, |
| delay_sec=10, |
| debug=False) |
| self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30, |
| delay_sec=10, |
| debug=False) |
| self._jobs = [] |
| self._jobs_to_tests = {} |
| |
| self._file_bugs = file_bugs |
| self._suite_job_id = suite_job_id |
| self._job_retry=job_retry |
| self._max_retries = max_retries |
| # RetryHandler to be initialized in schedule() |
| self._retry_handler = None |
| self.wait_for_results = wait_for_results |
| self._job_keyvals = job_keyvals |
| if result_reporter is None: |
| self._result_reporter = _EmailReporter(self) |
| else: |
| self._result_reporter = result_reporter |
| |
| if extra_deps is None: |
| extra_deps = [] |
| extra_deps.append(board) |
| if pool: |
| extra_deps.append(pool) |
| extra_deps.extend(child_dependencies) |
| self._dependencies = tuple(extra_deps) |
| |
| self._job_creator = _SuiteChildJobCreator( |
| tag=tag, |
| builds=builds, |
| board=board, |
| afe=afe, |
| max_runtime_mins=max_runtime_mins, |
| timeout_mins=timeout_mins, |
| suite_job_id=suite_job_id, |
| ignore_deps=ignore_deps, |
| extra_deps=extra_deps, |
| priority=priority, |
| offload_failures_only=offload_failures_only, |
| test_source_build=test_source_build, |
| job_keyvals=job_keyvals, |
| ) |
| |
| |
| def _schedule_test(self, record, test, retry_for=None): |
| """Schedule a single test and return the job. |
| |
| Schedule a single test by creating a job, and then update relevant |
| data structures that are used to keep track of all running jobs. |
| |
| Emits a TEST_NA status log entry if it failed to schedule the test due |
| to NoEligibleHostException or a non-existent board label. |
| |
| Returns a frontend.Job object if the test is successfully scheduled. |
| If scheduling failed due to NoEligibleHostException or a non-existent |
| board label, returns None. |
| |
| @param record: A callable to use for logging. |
| prototype: record(base_job.status_log_entry) |
| @param test: ControlData for a test to run. |
| @param retry_for: If we are scheduling a test to retry an |
| old job, the afe_job_id of the old job |
| will be passed in as |retry_for|. |
| |
| @returns: A frontend.Job object or None |
| """ |
| msg = 'Scheduling %s' % test.name |
| if retry_for: |
| msg = msg + ', to retry afe job %d' % retry_for |
| logging.debug(msg) |
| begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT) |
| try: |
| job = self._job_creator.create_job(test, retry_for=retry_for) |
| except (error.NoEligibleHostException, proxy.ValidationError) as e: |
| if (isinstance(e, error.NoEligibleHostException) |
| or (isinstance(e, proxy.ValidationError) |
| and _is_nonexistent_board_error(e))): |
| # Treat a dependency on a non-existent board label the same as |
| # a dependency on a board that exists, but for which there's no |
| # hardware. |
| logging.debug('%s not applicable for this board/pool. ' |
| 'Emitting TEST_NA.', test.name) |
| Status('TEST_NA', test.name, |
| 'Skipping: test not supported on this board/pool.', |
| begin_time_str=begin_time_str).record_all(record) |
| return None |
| else: |
| raise e |
| except (error.RPCException, proxy.JSONRPCException): |
| if retry_for: |
| # Mark that we've attempted to retry the old job. |
| logging.debug("RPC exception occurred") |
| self._retry_handler.set_attempted(job_id=retry_for) |
| raise |
| else: |
| self._jobs.append(job) |
| self._jobs_to_tests[job.id] = test |
| if retry_for: |
| # A retry job was just created, record it. |
| self._retry_handler.add_retry( |
| old_job_id=retry_for, new_job_id=job.id) |
| retry_count = (test.job_retries - |
| self._retry_handler.get_retry_max(job.id)) |
| logging.debug('Job %d created to retry job %d. ' |
| 'Have retried for %d time(s)', |
| job.id, retry_for, retry_count) |
| self._remember_job_keyval(job) |
| return job |
| |
| def schedule(self, record): |
| """ |
| Schedule jobs using |self._afe|. |
| |
| frontend.Job objects representing each scheduled job will be put in |
| |self._jobs|. |
| |
| @param record: A callable to use for logging. |
| prototype: record(base_job.status_log_entry) |
| @returns: The number of tests that were scheduled. |
| """ |
| scheduled_test_names = [] |
| logging.debug('Discovered %d tests.', len(self.tests)) |
| |
| Status('INFO', 'Start %s' % self._tag).record_result(record) |
| try: |
| # Write job_keyvals into keyval file. |
| if self._job_keyvals: |
| utils.write_keyval(self._results_dir, self._job_keyvals) |
| |
| # TODO(crbug.com/730885): This is a hack to protect tests that are |
| # not usually retried from getting hit by a provision error when run |
| # as part of a suite. Remove this hack once provision is separated |
| # out in its own suite. |
| self._bump_up_test_retries(self.tests) |
| for test in self.tests: |
| scheduled_job = self._schedule_test(record, test) |
| if scheduled_job is not None: |
| scheduled_test_names.append(test.name) |
| |
| # Write the num of scheduled tests and name of them to keyval file. |
| logging.debug('Scheduled %d tests, writing the total to keyval.', |
| len(scheduled_test_names)) |
| utils.write_keyval( |
| self._results_dir, |
| self._make_scheduled_tests_keyvals(scheduled_test_names)) |
| except Exception: |
| logging.exception('Exception while scheduling suite') |
| Status('FAIL', self._tag, |
| 'Exception while scheduling suite').record_result(record) |
| |
| if self._job_retry: |
| logging.debug("Initializing RetryHandler for suite %s.", self._tag) |
| self._retry_handler = RetryHandler( |
| initial_jobs_to_tests=self._jobs_to_tests, |
| max_retries=self._max_retries) |
| logging.debug("retry map created: %s ", |
| self._retry_handler._retry_map) |
| else: |
| logging.info("Will not retry jobs from suite %s.", self._tag) |
| return len(scheduled_test_names) |
| |
| |
| def _bump_up_test_retries(self, tests): |
| """Bump up individual test retries to match suite retry options.""" |
| if not self._job_retry: |
| return |
| |
| for test in tests: |
| # We do honor if a test insists on JOB_RETRIES = 0. |
| if test.job_retries is None: |
| logging.debug( |
| 'Test %s did not request retries, but suite requires ' |
| 'retries. Bumping retries up to 1. ' |
| '(See crbug.com/730885)', |
| test.name) |
| test.job_retries = 1 |
| |
| |
| def _make_scheduled_tests_keyvals(self, scheduled_test_names): |
| """Make a keyvals dict to write for scheduled test names. |
| |
| @param scheduled_test_names: A list of scheduled test name strings. |
| |
| @returns: A keyvals dict. |
| """ |
| return { |
| constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names), |
| constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names), |
| } |
| |
| |
| def _should_report(self, result): |
| """ |
| Returns True if this failure requires to be reported. |
| |
| @param result: A result, encapsulating the status of the failed job. |
| @return: True if we should report this failure. |
| """ |
| return (self._file_bugs and result.test_executed and |
| not result.is_testna() and |
| result.is_worse_than(job_status.Status('GOOD', '', 'reason'))) |
| |
| |
| def _has_retry(self, result): |
| """ |
| Return True if this result gets to retry. |
| |
| @param result: A result, encapsulating the status of the failed job. |
| @return: bool |
| """ |
| return (self._job_retry |
| and self._retry_handler.has_following_retry(result)) |
| |
| |
| def wait(self, record): |
| """ |
| Polls for the job statuses, using |record| to print status when each |
| completes. |
| |
| @param record: callable that records job status. |
| prototype: |
| record(base_job.status_log_entry) |
| """ |
| waiter = job_status.JobResultWaiter(self._afe, self._tko) |
| try: |
| if self._suite_job_id: |
| jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id) |
| else: |
| logging.warning('Unknown suite_job_id, falling back to less ' |
| 'efficient results_generator.') |
| jobs = self._jobs |
| waiter.add_jobs(jobs) |
| for result in waiter.wait_for_results(): |
| self._handle_result(result=result, record=record, waiter=waiter) |
| if self._finished_waiting(): |
| break |
| except Exception: # pylint: disable=W0703 |
| logging.exception('Exception waiting for results') |
| Status('FAIL', self._tag, |
| 'Exception waiting for results').record_result(record) |
| |
| |
| def _finished_waiting(self): |
| """Return whether the suite is finished waiting for child jobs.""" |
| return False |
| |
| |
| def _handle_result(self, result, record, waiter): |
| """ |
| Handle a test job result. |
| |
| @param result: Status instance for job. |
| @param record: callable that records job status. |
| prototype: |
| record(base_job.status_log_entry) |
| @param waiter: JobResultsWaiter instance. |
| |
| @instance_param _result_reporter: _ResultReporter instance. |
| """ |
| self._record_result(result, record) |
| rescheduled = False |
| if self._job_retry and self._retry_handler._should_retry(result): |
| rescheduled = self._retry_result(result, record, waiter) |
| # TODO (crbug.com/751428): If the suite times out before a retry could |
| # finish, we would lose the chance to report errors from the original |
| # job. |
| if self._has_retry(result) and rescheduled: |
| return |
| |
| if self._should_report(result): |
| self._result_reporter.report(result) |
| |
| def _record_result(self, result, record): |
| """ |
| Record a test job result. |
| |
| @param result: Status instance for job. |
| @param record: callable that records job status. |
| prototype: |
| record(base_job.status_log_entry) |
| """ |
| result.record_all(record) |
| self._remember_job_keyval(result) |
| |
| |
| def _retry_result(self, result, record, waiter): |
| """ |
| Retry a test job result. |
| |
| @param result: Status instance for job. |
| @param record: callable that records job status. |
| prototype: |
| record(base_job.status_log_entry) |
| @param waiter: JobResultsWaiter instance. |
| @returns: True if a job was scheduled for retry, False otherwise. |
| """ |
| test = self._jobs_to_tests[result.id] |
| try: |
| # It only takes effect for CQ retriable job: |
| # 1) in first try, test.fast=True. |
| # 2) in second try, test will be run in normal mode, so reset |
| # test.fast=False. |
| test.fast = False |
| new_job = self._schedule_test( |
| record=record, test=test, retry_for=result.id) |
| except (error.RPCException, proxy.JSONRPCException) as e: |
| logging.error('Failed to schedule test: %s, Reason: %s', |
| test.name, e) |
| return False |
| else: |
| waiter.add_job(new_job) |
| return bool(new_job) |
| |
| @property |
| def jobs(self): |
| """Give a copy of the associated jobs |
| |
| @returns: array of jobs""" |
| return [job for job in self._jobs] |
| |
| |
| @property |
| def _should_file_bugs(self): |
| """Return whether bugs should be filed. |
| |
| @returns: bool |
| """ |
| # File bug when failure is one of the _FILE_BUG_SUITES, |
| # otherwise send an email to the owner anc cc. |
| return self._tag in _FILE_BUG_SUITES |
| |
| |
| def abort(self): |
| """ |
| Abort all scheduled test jobs. |
| """ |
| if self._jobs: |
| job_ids = [job.id for job in self._jobs] |
| self._afe.run('abort_host_queue_entries', job__id__in=job_ids) |
| |
| |
| def _remember_job_keyval(self, job): |
| """ |
| Record provided job as a suite job keyval, for later referencing. |
| |
| @param job: some representation of a job that has the attributes: |
| id, test_name, and owner |
| """ |
| if self._results_dir and job.id and job.owner and job.test_name: |
| job_id_owner = '%s-%s' % (job.id, job.owner) |
| logging.debug('Adding job keyval for %s=%s', |
| job.test_name, job_id_owner) |
| utils.write_keyval( |
| self._results_dir, |
| {hashlib.md5(job.test_name).hexdigest(): job_id_owner}) |
| |
| |
| class Suite(_BaseSuite): |
| """ |
| A suite of tests, defined by some predicate over control file variables. |
| |
| Given a place to search for control files a predicate to match the desired |
| tests, can gather tests and fire off jobs to run them, and then wait for |
| results. |
| |
| @var _predicate: a function that should return True when run over a |
| ControlData representation of a control file that should be in |
| this Suite. |
| @var _tag: a string with which to tag jobs run in this suite. |
| @var _builds: the builds on which we're running this suite. |
| @var _afe: an instance of AFE as defined in server/frontend.py. |
| @var _tko: an instance of TKO as defined in server/frontend.py. |
| @var _jobs: currently scheduled jobs, if any. |
| @var _jobs_to_tests: a dictionary that maps job ids to tests represented |
| ControlData objects. |
| @var _cf_getter: a control_file_getter.ControlFileGetter |
| @var _retry: a bool value indicating whether jobs should be retried on |
| failure. |
| @var _retry_handler: a RetryHandler object. |
| |
| """ |
| |
| # TODO(ayatane): These methods are kept on the Suite class for |
| # backward compatibility. |
| find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests) |
| find_possible_tests = _deprecated_suite_method(find_possible_tests) |
| create_fs_getter = _deprecated_suite_method(create_fs_getter) |
| name_in_tag_predicate = _deprecated_suite_method( |
| suite_common.name_in_tag_predicate) |
| name_in_tag_similarity_predicate = _deprecated_suite_method( |
| name_in_tag_similarity_predicate) |
| test_name_equals_predicate = _deprecated_suite_method( |
| test_name_equals_predicate) |
| test_name_in_list_predicate = _deprecated_suite_method( |
| suite_common.test_name_in_list_predicate) |
| test_name_matches_pattern_predicate = _deprecated_suite_method( |
| test_name_matches_pattern_predicate) |
| test_file_matches_pattern_predicate = _deprecated_suite_method( |
| test_file_matches_pattern_predicate) |
| matches_attribute_expression_predicate = _deprecated_suite_method( |
| matches_attribute_expression_predicate) |
| test_name_similarity_predicate = _deprecated_suite_method( |
| test_name_similarity_predicate) |
| test_file_similarity_predicate = _deprecated_suite_method( |
| test_file_similarity_predicate) |
| list_all_suites = _deprecated_suite_method(list_all_suites) |
| get_test_source_build = _deprecated_suite_method( |
| suite_common.get_test_source_build) |
| |
| |
| @classmethod |
| def create_from_predicates(cls, predicates, builds, board, devserver, |
| cf_getter=None, name='ad_hoc_suite', |
| run_prod_code=False, **dargs): |
| """ |
| Create a Suite using a given predicate test filters. |
| |
| Uses supplied predicate(s) to instantiate a Suite. Looks for tests in |
| |autotest_dir| and will schedule them using |afe|. Pulls control files |
| from the default dev server. Results will be pulled from |tko| upon |
| completion. |
| |
| @param predicates: A list of callables that accept ControlData |
| representations of control files. A test will be |
| included in suite if all callables in this list |
| return True on the given control file. |
| @param builds: the builds on which we're running this suite. It's a |
| dictionary of version_prefix:build. |
| @param board: the board on which we're running this suite. |
| @param devserver: the devserver which contains the build. |
| @param cf_getter: control_file_getter.ControlFileGetter. Defaults to |
| using DevServerGetter. |
| @param name: name of suite. Defaults to 'ad_hoc_suite' |
| @param run_prod_code: If true, the suite will run the tests that |
| lives in prod aka the test code currently on the |
| lab servers. |
| @param **dargs: Any other Suite constructor parameters, as described |
| in Suite.__init__ docstring. |
| @return a Suite instance. |
| """ |
| if cf_getter is None: |
| if run_prod_code: |
| cf_getter = create_fs_getter(_AUTOTEST_DIR) |
| else: |
| build = suite_common.get_test_source_build(builds, **dargs) |
| cf_getter = _create_ds_getter(build, devserver) |
| |
| return cls(predicates, |
| name, builds, board, cf_getter, run_prod_code, **dargs) |
| |
| |
| @classmethod |
| def create_from_name(cls, name, builds, board, devserver, cf_getter=None, |
| **dargs): |
| """ |
| Create a Suite using a predicate based on the SUITE control file var. |
| |
| Makes a predicate based on |name| and uses it to instantiate a Suite |
| that looks for tests in |autotest_dir| and will schedule them using |
| |afe|. Pulls control files from the default dev server. |
| Results will be pulled from |tko| upon completion. |
| |
| @param name: a value of the SUITE control file variable to search for. |
| @param builds: the builds on which we're running this suite. It's a |
| dictionary of version_prefix:build. |
| @param board: the board on which we're running this suite. |
| @param devserver: the devserver which contains the build. |
| @param cf_getter: control_file_getter.ControlFileGetter. Defaults to |
| using DevServerGetter. |
| @param **dargs: Any other Suite constructor parameters, as described |
| in Suite.__init__ docstring. |
| @return a Suite instance. |
| """ |
| if cf_getter is None: |
| build = suite_common.get_test_source_build(builds, **dargs) |
| cf_getter = _create_ds_getter(build, devserver) |
| |
| return cls([suite_common.name_in_tag_predicate(name)], |
| name, builds, board, cf_getter, **dargs) |
| |
| |
| def __init__( |
| self, |
| predicates, |
| tag, |
| builds, |
| board, |
| cf_getter, |
| run_prod_code=False, |
| afe=None, |
| tko=None, |
| pool=None, |
| results_dir=None, |
| max_runtime_mins=24*60, |
| timeout_mins=24*60, |
| file_bugs=False, |
| suite_job_id=None, |
| ignore_deps=False, |
| extra_deps=None, |
| priority=priorities.Priority.DEFAULT, |
| forgiving_parser=True, |
| wait_for_results=True, |
| job_retry=False, |
| max_retries=sys.maxsize, |
| offload_failures_only=False, |
| test_source_build=None, |
| job_keyvals=None, |
| test_args=None, |
| child_dependencies=(), |
| result_reporter=None, |
| ): |
| """ |
| Constructor |
| |
| @param predicates: A list of callables that accept ControlData |
| representations of control files. A test will be |
| included in suite if all callables in this list |
| return True on the given control file. |
| @param tag: a string with which to tag jobs run in this suite. |
| @param builds: the builds on which we're running this suite. |
| @param board: the board on which we're running this suite. |
| @param cf_getter: a control_file_getter.ControlFileGetter |
| @param afe: an instance of AFE as defined in server/frontend.py. |
| @param tko: an instance of TKO as defined in server/frontend.py. |
| @param pool: Specify the pool of machines to use for scheduling |
| purposes. |
| @param run_prod_code: If true, the suite will run the test code that |
| lives in prod aka the test code currently on the |
| lab servers. |
| @param results_dir: The directory where the job can write results to. |
| This must be set if you want job_id of sub-jobs |
| list in the job keyvals. |
| @param max_runtime_mins: Maximum suite runtime, in minutes. |
| @param timeout: Maximum job lifetime, in hours. |
| @param suite_job_id: Job id that will act as parent id to all sub jobs. |
| Default: None |
| @param ignore_deps: True if jobs should ignore the DEPENDENCIES |
| attribute and skip applying of dependency labels. |
| (Default:False) |
| @param extra_deps: A list of strings which are the extra DEPENDENCIES |
| to add to each test being scheduled. |
| @param priority: Integer priority level. Higher is more important. |
| @param wait_for_results: Set to False to run the suite job without |
| waiting for test jobs to finish. Default is |
| True. |
| @param job_retry: A bool value indicating whether jobs should be retried |
| on failure. If True, the field 'JOB_RETRIES' in |
| control files will be respected. If False, do not |
| retry. |
| @param max_retries: Maximum retry limit at suite level. |
| Regardless how many times each individual test |
| has been retried, the total number of retries |
| happening in the suite can't exceed _max_retries. |
| Default to sys.maxint. |
| @param offload_failures_only: Only enable gs_offloading for failed |
| jobs. |
| @param test_source_build: Build that contains the server-side test code. |
| @param job_keyvals: General job keyvals to be inserted into keyval file, |
| which will be used by tko/parse later. |
| @param test_args: A dict of args passed all the way to each individual |
| test that will be actually ran. |
| @param child_dependencies: (optional) list of dependency strings |
| to be added as dependencies to child jobs. |
| @param result_reporter: A _ResultReporter instance to report results. If |
| None, an _EmailReporter will be created. |
| """ |
| tests = find_and_parse_tests( |
| cf_getter, |
| _ComposedPredicate(predicates), |
| tag, |
| forgiving_parser=forgiving_parser, |
| run_prod_code=run_prod_code, |
| test_args=test_args, |
| ) |
| super(Suite, self).__init__( |
| tests=tests, |
| tag=tag, |
| builds=builds, |
| board=board, |
| afe=afe, |
| tko=tko, |
| pool=pool, |
| results_dir=results_dir, |
| max_runtime_mins=max_runtime_mins, |
| timeout_mins=timeout_mins, |
| file_bugs=file_bugs, |
| suite_job_id=suite_job_id, |
| ignore_deps=ignore_deps, |
| extra_deps=extra_deps, |
| priority=priority, |
| wait_for_results=wait_for_results, |
| job_retry=job_retry, |
| max_retries=max_retries, |
| offload_failures_only=offload_failures_only, |
| test_source_build=test_source_build, |
| job_keyvals=job_keyvals, |
| child_dependencies=child_dependencies, |
| result_reporter=result_reporter, |
| ) |
| |
| |
| class ProvisionSuite(_BaseSuite): |
| """ |
| A suite for provisioning DUTs. |
| |
| This is done by creating dummy_Pass tests. |
| """ |
| |
| |
| def __init__( |
| self, |
| tag, |
| builds, |
| board, |
| devserver, |
| num_required, |
| num_max=float('inf'), |
| cf_getter=None, |
| run_prod_code=False, |
| test_args=None, |
| test_source_build=None, |
| **kwargs): |
| """ |
| Constructor |
| |
| @param tag: a string with which to tag jobs run in this suite. |
| @param builds: the builds on which we're running this suite. |
| @param board: the board on which we're running this suite. |
| @param devserver: the devserver which contains the build. |
| @param num_required: number of tests that must pass. This is |
| capped by the number of tests that are run. |
| @param num_max: max number of tests to make. By default there |
| is no cap, a test is created for each eligible host. |
| @param cf_getter: a control_file_getter.ControlFileGetter. |
| @param test_args: A dict of args passed all the way to each individual |
| test that will be actually ran. |
| @param test_source_build: Build that contains the server-side test code. |
| @param kwargs: Various keyword arguments passed to |
| _BaseSuite constructor. |
| """ |
| super(ProvisionSuite, self).__init__( |
| tests=[], |
| tag=tag, |
| builds=builds, |
| board=board, |
| **kwargs) |
| self._num_successful = 0 |
| self._num_required = 0 |
| self.tests = [] |
| |
| static_deps = [dep for dep in self._dependencies |
| if not provision.Provision.acts_on(dep)] |
| if 'pool:suites' in static_deps: |
| logging.info('Provision suite is disabled on suites pool') |
| return |
| logging.debug('Looking for hosts matching %r', static_deps) |
| hosts = self._afe.get_hosts( |
| invalid=False, multiple_labels=static_deps) |
| logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts)) |
| available_hosts = [h for h in hosts if h.is_available()] |
| logging.debug('Found %d available hosts for ProvisionSuite', |
| len(available_hosts)) |
| dummy_test = _load_dummy_test( |
| builds, devserver, cf_getter, |
| run_prod_code, test_args, test_source_build) |
| self.tests = [dummy_test] * min(len(available_hosts), num_max) |
| logging.debug('Made %d tests for ProvisionSuite', len(self.tests)) |
| self._num_required = min(num_required, len(self.tests)) |
| logging.debug('Expecting %d tests to pass for ProvisionSuite', |
| self._num_required) |
| |
| def _handle_result(self, result, record, waiter): |
| super(ProvisionSuite, self)._handle_result(result, record, waiter) |
| if result.is_good(): |
| self._num_successful += 1 |
| |
| def _finished_waiting(self): |
| return self._num_successful >= self._num_required |
| |
| |
| def _load_dummy_test( |
| builds, |
| devserver, |
| cf_getter=None, |
| run_prod_code=False, |
| test_args=None, |
| test_source_build=None): |
| """ |
| Load and return the dummy pass test. |
| |
| @param builds: the builds on which we're running this suite. |
| @param devserver: the devserver which contains the build. |
| @param cf_getter: a control_file_getter.ControlFileGetter. |
| @param test_args: A dict of args passed all the way to each individual |
| test that will be actually ran. |
| @param test_source_build: Build that contains the server-side test code. |
| """ |
| if cf_getter is None: |
| if run_prod_code: |
| cf_getter = create_fs_getter(_AUTOTEST_DIR) |
| else: |
| build = suite_common.get_test_source_build( |
| builds, test_source_build=test_source_build) |
| devserver.stage_artifacts(image=build, |
| artifacts=['control_files']) |
| cf_getter = _create_ds_getter(build, devserver) |
| retriever = _ControlFileRetriever(cf_getter, |
| run_prod_code=run_prod_code, |
| test_args=test_args) |
| return retriever.retrieve_for_test('dummy_Pass') |
| |
| |
| class _ComposedPredicate(object): |
| """Return the composition of the predicates. |
| |
| Predicates are functions that take a test control data object and |
| return True of that test is to be included. The returned |
| predicate's set is the intersection of all of the input predicates' |
| sets (it returns True if all predicates return True). |
| """ |
| |
| def __init__(self, predicates): |
| """Initialize instance. |
| |
| @param predicates: Iterable of predicates. |
| """ |
| self._predicates = list(predicates) |
| |
| def __repr__(self): |
| return '{cls}({this._predicates!r})'.format( |
| cls=type(self).__name__, |
| this=self, |
| ) |
| |
| def __call__(self, control_data_): |
| return all(f(control_data_) for f in self._predicates) |
| |
| |
| def _is_nonexistent_board_error(e): |
| """Return True if error is caused by nonexistent board label. |
| |
| As of this writing, the particular case we want looks like this: |
| |
| 1) e.problem_keys is a dictionary |
| 2) e.problem_keys['meta_hosts'] exists as the only key |
| in the dictionary. |
| 3) e.problem_keys['meta_hosts'] matches this pattern: |
| "Label "board:.*" not found" |
| |
| We check for conditions 1) and 2) on the |
| theory that they're relatively immutable. |
| We don't check condition 3) because it seems |
| likely to be a maintenance burden, and for the |
| times when we're wrong, being right shouldn't |
| matter enough (we _hope_). |
| |
| @param e: proxy.ValidationError instance |
| @returns: boolean |
| """ |
| return (isinstance(e.problem_keys, dict) |
| and len(e.problem_keys) == 1 |
| and 'meta_hosts' in e.problem_keys) |
| |
| |
| class _ResultReporter(six.with_metaclass(abc.ABCMeta, object)): |
| """Abstract base class for reporting test results. |
| |
| Usually, this is used to report test failures. |
| """ |
| |
| @abc.abstractmethod |
| def report(self, result): |
| """Report test result. |
| |
| @param result: Status instance for job. |
| """ |
| |
| |
| class _EmailReporter(_ResultReporter): |
| """Class that emails based on test failures.""" |
| |
| def __init__(self, suite, bug_template=None): |
| self._suite = suite |
| self._bug_template = bug_template or {} |
| |
| def _get_test_bug(self, result): |
| """Get TestBug for the given result. |
| |
| @param result: Status instance for a test job. |
| @returns: TestBug instance. |
| """ |
| # reporting modules have dependency on external packages, e.g., httplib2 |
| # Such dependency can cause issue to any module tries to import suite.py |
| # without building site-packages first. Since the reporting modules are |
| # only used in this function, move the imports here avoid the |
| # requirement of building site packages to use other functions in this |
| # module. |
| from autotest_lib.server.cros.dynamic_suite import reporting |
| |
| job_views = self._suite._tko.run('get_detailed_test_views', |
| afe_job_id=result.id) |
| return reporting.TestBug(self._suite._job_creator.cros_build, |
| utils.get_chrome_version(job_views), |
| self._suite._tag, |
| result) |
| |
| def _get_bug_template(self, result): |
| """Get BugTemplate for test job. |
| |
| @param result: Status instance for job. |
| @param bug_template: A template dictionary specifying the default bug |
| filing options for failures in this suite. |
| @returns: BugTemplate instance |
| """ |
| # reporting modules have dependency on external packages, e.g., httplib2 |
| # Such dependency can cause issue to any module tries to import suite.py |
| # without building site-packages first. Since the reporting modules are |
| # only used in this function, move the imports here avoid the |
| # requirement of building site packages to use other functions in this |
| # module. |
| from autotest_lib.server.cros.dynamic_suite import reporting_utils |
| |
| # Try to merge with bug template in test control file. |
| template = reporting_utils.BugTemplate(self._bug_template) |
| try: |
| test_data = self._suite._jobs_to_tests[result.id] |
| return template.finalize_bug_template( |
| test_data.bug_template) |
| except AttributeError: |
| # Test control file does not have bug template defined. |
| return template.bug_template |
| except reporting_utils.InvalidBugTemplateException as e: |
| logging.error('Merging bug templates failed with ' |
| 'error: %s An empty bug template will ' |
| 'be used.', e) |
| return {} |
| |
| def report(self, result): |
| # reporting modules have dependency on external |
| # packages, e.g., httplib2 Such dependency can cause |
| # issue to any module tries to import suite.py without |
| # building site-packages first. Since the reporting |
| # modules are only used in this function, move the |
| # imports here avoid the requirement of building site |
| # packages to use other functions in this module. |
| from autotest_lib.server.cros.dynamic_suite import reporting |
| |
| reporting.send_email( |
| self._get_test_bug(result), |
| self._get_bug_template(result)) |