cbuildbot/stages/release_stages.py - third_party/chromite - Git at Google

 # -*- coding: utf-8 -*-
 # Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Module containing the various stages that a builder runs."""

 from __future__ import print_function

 import json
 import os

 from chromite.cbuildbot import commands
 from chromite.lib import failures_lib
 from chromite.lib import config_lib
 from chromite.cbuildbot.stages import artifact_stages
 from chromite.cbuildbot.stages import generic_stages
 from chromite.lib import constants
 from chromite.lib import cros_logging as logging
 from chromite.lib import gs
 from chromite.lib import osutils
 from chromite.lib import parallel
 from chromite.lib import timeout_util
 from chromite.lib.paygen import gspaths
 from chromite.lib.paygen import paygen_build_lib


 class InvalidTestConditionException(Exception):
   """Raised when pre-conditions for a test aren't met."""


 class SignerTestStage(artifact_stages.ArchivingStage):
   """Run signer related tests."""

   option_name = 'tests'
   config_name = 'signer_tests'
   category = constants.CI_INFRA_STAGE

   # If the signer tests take longer than 30 minutes, abort. They usually take
   # five minutes to run.
   SIGNER_TEST_TIMEOUT = 30 * 60

   def PerformStage(self):
     if not self.archive_stage.WaitForRecoveryImage():
       raise InvalidTestConditionException('Missing recovery image.')
     with timeout_util.Timeout(self.SIGNER_TEST_TIMEOUT):
       commands.RunSignerTests(self._build_root, self._current_board)


 class SignerResultsTimeout(failures_lib.StepFailure):
   """The signer did not produce any results inside the expected time."""


 class SignerFailure(failures_lib.StepFailure):
   """The signer returned an error result."""


 class MissingInstructionException(failures_lib.StepFailure):
   """We didn't receive the list of signing instructions PushImage uploaded."""


 class MalformedResultsException(failures_lib.StepFailure):
   """The Signer results aren't formatted as we expect."""


 class PaygenSigningRequirementsError(failures_lib.StepFailure):
   """Paygen stage can't run if signing failed."""


 class PaygenCrostoolsNotAvailableError(failures_lib.StepFailure):
   """Paygen stage can't run if signing failed."""


 class PaygenNoPaygenConfigForBoard(failures_lib.StepFailure):
   """Paygen can't run with a release.conf config for the board."""


 class SigningStage(generic_stages.BoardSpecificBuilderStage):
   """Stage that waits for image signing.

   This stage waits for values from ArchiveStage (push_image), then waits until
   the signing servers sign the uploaded images.
   """
   option_name = 'paygen'
   config_name = 'paygen'
   category = constants.CI_INFRA_STAGE

   # Poll for new results every 30 seconds.
   SIGNING_PERIOD = 30

   # Timeout for the signing process. 2 hours in seconds.
   SIGNING_TIMEOUT = 2 * 60 * 60

   def __init__(self, builder_run, buildstore, board, **kwargs):
     """Init that accepts the channels argument, if present.

     Args:
       builder_run: See builder_run on ArchivingStage.
       buildstore: BuildStore instance to make DB calls with.
       board: See board on ArchivingStage.
     """
     super(SigningStage, self).__init__(builder_run, buildstore, board, **kwargs)

     # Used to remember partial results between retries.
     self.signing_results = {}

     # Filled in via WaitUntilReady, Of the form:
     #   {'channel': ['gs://instruction_uri1', 'gs://signer_instruction_uri2']}
     self.instruction_urls_per_channel = None

   def _HandleStageException(self, exc_info):
     """Override and don't set status to FAIL but FORGIVEN instead."""
     exc_type, _exc_value, _exc_tb = exc_info

     # Notify stages blocked on us if we error out.
     self.board_runattrs.SetParallel('signed_images_ready', None)

     # Warn so people look at ArchiveStage for the real error.
     if issubclass(exc_type, MissingInstructionException):
       return self._HandleExceptionAsWarning(exc_info)

     return super(SigningStage, self)._HandleStageException(exc_info)

   def _JsonFromUrl(self, gs_ctx, url):
     """Fetch a GS Url, and parse it as Json.

     Args:
       gs_ctx: GS Context.
       url: Url to fetch and parse.

     Returns:
       None if the Url doesn't exist.
       Parsed Json structure if it did.

     Raises:
       MalformedResultsException if it failed to parse.
     """
     try:
       signer_txt = gs_ctx.Cat(url)
     except gs.GSNoSuchKey:
       return None

     try:
       return json.loads(signer_txt)
     except ValueError:
       # We should never see malformed Json, even for intermediate statuses.
       raise MalformedResultsException(signer_txt)

   def _SigningStatusFromJson(self, signer_json):
     """Extract a signing status from a signer result Json DOM.

     Args:
       signer_json: The parsed json status from a signer operation.

     Returns:
       string with a simple status: SIGNER_STATUS_PASSED, SIGNER_STATUS_FAILED,
       etc, or '' if the json doesn't contain a status.
     """
     return (signer_json or {}).get('status', {}).get('status', '')

   def _CheckForResults(self, gs_ctx, instruction_urls_per_channel,
                        channel_notifier=None):
     """timeout_util.WaitForSuccess func to check a list of signer results.

     Args:
       gs_ctx: Google Storage Context.
       instruction_urls_per_channel: Urls of the signer result files
                                     we're expecting.
       channel_notifier: Method to call when a channel is ready or None.

     Returns:
       Number of results not yet collected.
     """
     COMPLETED_STATUS = (constants.SIGNER_STATUS_PASSED,
                         constants.SIGNER_STATUS_FAILED)

     # Assume we are done, then try to prove otherwise.
     results_completed = True

     for channel in instruction_urls_per_channel.keys():
       self.signing_results.setdefault(channel, {})

       if (len(self.signing_results[channel]) ==
           len(instruction_urls_per_channel[channel])):
         continue

       for url in instruction_urls_per_channel[channel]:
         # Convert from instructions URL to instructions result URL.
         url += '.json'

         # We already have a result for this URL.
         if url in self.signing_results[channel]:
           continue

         try:
           signer_json = self._JsonFromUrl(gs_ctx, url)
         except MalformedResultsException as e:
           logging.warning('Received malformed json: %s', e)
           continue

         if self._SigningStatusFromJson(signer_json) in COMPLETED_STATUS:
           # If we find a completed result, remember it.
           self.signing_results[channel][url] = signer_json

       # If we don't have full results for this channel, we aren't done
       # waiting.
       if (len(self.signing_results[channel]) !=
           len(instruction_urls_per_channel[channel])):
         results_completed = False
         continue

       # If we reach here, the channel has just been completed for the first
       # time.

       # If all results passed the channel was successfully signed.
       channel_success = True
       for signer_result in self.signing_results[channel].values():
         if (self._SigningStatusFromJson(signer_result) !=
             constants.SIGNER_STATUS_PASSED):
           channel_success = False

       # If we successfully completed the channel, inform someone.
       if channel_success and channel_notifier:
         channel_notifier(channel)

     return results_completed

   def _WaitForSigningResults(self,
                              instruction_urls_per_channel,
                              channel_notifier=None):
     """Do the work of waiting for signer results and logging them.

     Args:
       instruction_urls_per_channel: push_image data (see _WaitForPushImage).
       channel_notifier: Method to call with channel name when ready or None.

     Raises:
       ValueError: If the signer result isn't valid json.
       RunCommandError: If we are unable to download signer results.
     """
     gs_ctx = gs.GSContext(dry_run=self._run.options.debug)

     try:
       logging.info('Waiting for signer results.')
       timeout_util.WaitForReturnTrue(
           self._CheckForResults,
           func_args=(gs_ctx, instruction_urls_per_channel, channel_notifier),
           timeout=self.SIGNING_TIMEOUT, period=self.SIGNING_PERIOD)
     except timeout_util.TimeoutError:
       msg = 'Image signing timed out.'
       logging.error(msg)
       logging.PrintBuildbotStepText(msg)
       raise SignerResultsTimeout(msg)

     # Log all signer results, then handle any signing failures.
     failures = []
     for url_results in self.signing_results.values():
       for url, signer_result in url_results.items():
         result_description = os.path.basename(url)
         logging.PrintBuildbotStepText(result_description)
         logging.info('Received results for: %s', result_description)
         logging.info(json.dumps(signer_result, indent=4))

         status = self._SigningStatusFromJson(signer_result)
         if status != constants.SIGNER_STATUS_PASSED:
           failures.append(result_description)
           logging.error('Signing failed for: %s', result_description)

     if failures:
       logging.error('Failure summary:')
       for failure in failures:
         logging.error('  %s', failure)
       raise SignerFailure(', '.join([str(f) for f in failures]))

   def WaitUntilReady(self):
     """Block until push_image data is ready.

     Sets self.instruction_urls_per_channel as described in __init__.

     Returns:
       Boolean that tells if we can run this stage.
     """
     # This call will NEVER time out.
     self.instruction_urls_per_channel = self.board_runattrs.GetParallel(
         'instruction_urls_per_channel', timeout=None)

     # A value of None signals an error in PushImage.
     if self.instruction_urls_per_channel is None:
       # ArchiveStage PushImage failed. Signing won't run at all.
       self.board_runattrs.SetParallel('signed_images_ready', None)
       return False

     return True

   def PerformStage(self):
     """Do the work of generating our release payloads."""
     # Convert to release tools naming for boards.
     board = self._current_board.replace('_', '-')
     version = self._run.attrs.release_tag

     logging.info('Waiting for image signing for: %s, %s', board, version)
     logging.info('GS errors are a normal part of the polling for results.')
     self._WaitForSigningResults(self.instruction_urls_per_channel)

     # Notify stages blocked on us that images are for the given channel list.
     channels = list(self.instruction_urls_per_channel)
     self.board_runattrs.SetParallel('signed_images_ready', channels)


 class PaygenStage(generic_stages.BoardSpecificBuilderStage):
   """Stage that generates release payloads.

   If this stage is created with a 'channels' argument, it can run
   independently. Otherwise, it's dependent on values queued up by
   the SigningStage.
   """
   option_name = 'paygen'
   config_name = 'paygen'
   category = constants.CI_INFRA_STAGE

   def __init__(self, builder_run, buildstore, board, channels=None, **kwargs):
     """Init that accepts the channels argument, if present.

     Args:
       builder_run: See builder_run on ArchivingStage.
       buildstore: BuildStore instance to make DB calls with.
       board: See board on ArchivingStage.
       channels: Explicit list of channels to generate payloads for.
                 If empty, will instead wait on values from push_image.
                 Channels is normally None in release builds, and normally set
                 for trybot 'payloads' builds.
     """
     super(PaygenStage, self).__init__(builder_run, buildstore, board, **kwargs)
     self.channels = channels

   def _HandleStageException(self, exc_info):
     """Override and don't set status to FAIL but FORGIVEN instead."""
     exc_type, _exc_value, _exc_tb = exc_info

     # If Paygen fails to find anything needed in release.conf, treat it
     # as a warning. This is common during new board bring up.
     if issubclass(exc_type, PaygenNoPaygenConfigForBoard):
       return self._HandleExceptionAsWarning(exc_info)

     # If the SigningStage failed, we warn that we didn't run, but don't fail
     # outright. Let SigningStage decide if this should kill the build.
     if issubclass(exc_type, SignerFailure):
       return self._HandleExceptionAsWarning(exc_info)
     return super(PaygenStage, self)._HandleStageException(exc_info)

   def WaitUntilReady(self):
     """Block until signed images are ready.

     Returns:
       Boolean that tells if we can run this stage.
     """
     # If we did got an explicit channel list, there is no need to wait.
     if self.channels is None:
       # Wait for channels from signing stage.
       self.channels = self.board_runattrs.GetParallel(
           'signed_images_ready', timeout=None)

       # If the signing stage errored out for any reason.
       if self.channels is None:
         # SigningStage failed. Payloads can't be generated.
         return False

     return True

   def PerformStage(self):
     """Do the work of generating our release payloads."""
     # Convert to release tools naming for boards.
     board = self._current_board.replace('_', '-')
     version = self._run.attrs.release_tag

     assert version, "We can't generate payloads without a release_tag."
     logging.info('Generating payloads for: %s, %s', board, version)

     # Test to see if the current board has a Paygen configuration. We do
     # this here, not in the sub-process so we don't have to pass back a
     # failure reason.
     try:
       paygen_build_lib.ValidateBoardConfig(board)
     except paygen_build_lib.BoardNotConfigured:
       raise PaygenNoPaygenConfigForBoard(
           'Golden Eye (%s) has no entry for board %s. Get a TPM to fix.' %
           (paygen_build_lib.PAYGEN_URI, board))

     # Default to False, set to True if it's a canary type build
     skip_duts_check = False
     if config_lib.IsCanaryType(self._run.config.build_type):
       skip_duts_check = True

     with parallel.BackgroundTaskRunner(self._RunPaygenInProcess) as per_channel:
       logging.info('Using channels: %s', self.channels)

       # Set an metadata with the channels we've had configured.
       self._run.attrs.metadata.UpdateWithDict({'channels':
                                                ','.join(self.channels)})

       # If we have an explicit list of channels, use it.
       for channel in self.channels:
         per_channel.put((channel, board, version, self._run.options.debug,
                          self._run.config.paygen_skip_testing,
                          self._run.config.paygen_skip_delta_payloads,
                          skip_duts_check))

   def _RunPaygenInProcess(self, channel, board, version, debug,
                           disable_tests, skip_delta_payloads,
                           skip_duts_check):
     """Runs the PaygenBuild and PaygenTest stage (if applicable)"""
     PaygenBuildStage(self._run, self.buildstore, board, channel, version, debug,
                      disable_tests, skip_delta_payloads, skip_duts_check).Run()


 class PaygenBuildStage(generic_stages.BoardSpecificBuilderStage):
   """Stage that generates payloads and uploads to Google Storage."""

   category = constants.CI_INFRA_STAGE

   def __init__(self, builder_run, buildstore, board, channel, version, debug,
                skip_testing, skip_delta_payloads, skip_duts_check, **kwargs):
     """Init that accepts the channels argument, if present.

     Args:
       builder_run: See builder_run on ArchiveStage
       buildstore: BuildStore instance to make DB calls with.
       board: Board of payloads to generate ('x86-mario', 'x86-alex-he', etc)
       channel: Channel of payloads to generate ('stable', 'beta', etc)
       version: Version of payloads to generate.
       debug: Flag telling if this is a real run, or a test run.
       skip_testing: Do not generate test artifacts or run payload tests.
       skip_delta_payloads: Skip generating delta payloads.
       skip_duts_check: Do not check minimum available DUTs before tests.
     """
     super(PaygenBuildStage, self).__init__(
         builder_run, buildstore, board, suffix=channel.capitalize(), **kwargs)
     self._run = builder_run
     self.board = board
     self.channel = channel
     self.version = version
     self.debug = debug
     self.skip_testing = skip_testing
     self.skip_delta_payloads = skip_delta_payloads
     self.skip_duts_check = skip_duts_check

   def PerformStage(self):
     """Invoke payload generation. If testing is enabled, schedule tests.

     This method is intended to be safe to invoke inside a process.
     """
     # Convert to release tools naming for channels.
     if not self.channel.endswith('-channel'):
       self.channel += '-channel'

     with osutils.TempDir(sudo_rm=True) as tempdir:
       # Create the definition of the build to generate payloads for.
       build = gspaths.Build(channel=self.channel,
                             board=self.board,
                             version=self.version,
                             bucket=gspaths.ChromeosReleases.BUCKET)
       payload_build = gspaths.Build(build)
       if self.debug:
         payload_build.bucket = gspaths.ChromeosReleases.TEST_BUCKET

       try:
         # Generate the payloads.
         self._PrintLoudly('Starting %s, %s, %s' % (self.channel, self.version,
                                                    self.board))
         paygen = paygen_build_lib.PaygenBuild(
             build,
             payload_build,
             work_dir=tempdir,
             site_config=self._run.site_config,
             dry_run=self.debug,
             skip_delta_payloads=self.skip_delta_payloads,
             skip_duts_check=self.skip_duts_check)

         testdata = paygen.CreatePayloads()

         # Now, schedule the payload tests if desired.
         if not self.skip_testing:
           (suite_name, archive_board, archive_build,
            payload_test_configs) = testdata
           # For unified builds, only test against the specified models.
           if self._run.config.models:
             models = []
             for model in self._run.config.models:
               # 'au' is a test suite generated in ge_build_config.json
               if model.test_suites and 'au' in model.test_suites:
                 models.append(model)

             if len(models) > 1:
               fsi_configs = set(p for p in payload_test_configs
                                 if p.payload_type ==
                                 paygen_build_lib.PAYLOAD_TYPE_FSI)
               non_fsi_configs = set(p for p in payload_test_configs
                                     if p not in fsi_configs)
               stages = self._ScheduleForApplicableModels(
                   archive_board, archive_build, fsi_configs, suite_name)
               stages += self._ScheduleForModels(
                   archive_board, archive_build, models, non_fsi_configs,
                   suite_name)
               steps = [stage.Run for stage in stages]
               parallel.RunParallelSteps(steps)
             elif len(models) == 1:
               model = models[0]
               PaygenTestStage(
                   self._run, self.buildstore, suite_name, archive_board,
                   model.name, model.lab_board_name, self.channel,
                   archive_build, self.skip_duts_check, self.debug,
                   payload_test_configs,
                   config_lib.GetHWTestEnv(self._run.config,
                                           model_config=model)).Run()
           else:
             lab_board_name = config_lib.GetNonUniBuildLabBoardName(
                 archive_board)
             PaygenTestStage(self._run, self.buildstore, suite_name,
                             archive_board, None, lab_board_name,
                             self.channel, archive_build, self.skip_duts_check,
                             self.debug,
                             payload_test_configs,
                             config_lib.GetHWTestEnv(self._run.config)).Run()


       except (paygen_build_lib.BuildLocked) as e:
         # These errors are normal if it's possible that another builder is
         # processing the same build. (perhaps by a trybot generating payloads on
         # request).
         logging.info('PaygenBuild for %s skipped because: %s', self.channel, e)

   def _ScheduleForModels(self, archive_board, archive_build, models,
                          non_fsi_configs, suite_name):
     """Schedule AU tests on models in the 'au' suite.

     Args:
       archive_board: The board we schedule against.
       archive_build: The build of the payload config.
       models: The models with 'au' enabled.
       non_fsi_configs: The list of payload configs.
       suite_name: The name of the suite we are scheduling.
     """
     return [
         PaygenTestStage(
             self._run, self.buildstore, suite_name, archive_board,
             model.name, model.lab_board_name, self.channel,
             archive_build, self.skip_duts_check, self.debug,
             non_fsi_configs,
             config_lib.GetHWTestEnv(self._run.config, model_config=model))
         for model in models
     ]

   def _ScheduleForApplicableModels(self, archive_board, archive_build,
                                    fsi_configs, suite_name):
     """Schedule FSI AU tests on every applicable_model.

     We schedule on every model even if it is not in the 'au' suite.
     This ensures no FSI tests are missed from models being disabled in the lab.

     Args:
       archive_board: The board we schedule against.
       archive_build: The build of the payload config.
       fsi_configs: The list of payload configs of type FSI.
       suite_name: The name of the suite we are scheduling.
     """
     stages = []
     for payload_config in fsi_configs:
       applicable_models = [m for m in self._run.config.models
                            if m.name in payload_config.applicable_models]
       stages += self._ScheduleForModels(archive_board, archive_build,
                                         applicable_models, [payload_config],
                                         suite_name)
     return stages


 class PaygenTestStage(generic_stages.BoardSpecificBuilderStage):
   """Stage that schedules the payload tests."""

   category = constants.CI_INFRA_STAGE

   def __init__(self, builder_run, buildstore, suite_name, board, model,
                lab_board_name, channel, build, skip_duts_check, debug,
                payload_test_configs, test_env, **kwargs):
     """Init that accepts the channels argument, if present.

     Args:
       builder_run: See builder_run on ArchiveStage
       buildstore: BuildStore instance to make DB calls with.
       suite_name: See builder_run on ArchiveStage
       board: Board overlay name.
       model: Model that will be tested. ('reef', 'pyro', etc)
       lab_board_name: The actual board label tested against in Autotest
       channel: Channel of payloads to generate ('stable', 'beta', etc)
       build: Version of payloads to generate.
       skip_duts_check: Do not check minimum available DUTs before tests.
       debug: Boolean indicating if this is a test run or a real run.
       payload_test_configs: A list of test_params.TestConfig objects. Only used
                             for scheduling HWTest with skylab tool.
       test_env: A string to indicate the env that the test should run in. The
                 value could be constants.ENV_SKYLAB or constants.ENV_AUTOTEST.
     """
     self.suite_name = suite_name
     self.board = board
     self.model = model
     self.lab_board_name = lab_board_name

     self.build = build
     self.skip_duts_check = skip_duts_check
     self.debug = debug
     self.payload_test_configs = payload_test_configs
     assert test_env in [constants.ENV_SKYLAB, constants.ENV_AUTOTEST]
     self.test_env = test_env
     # We don't need the '-channel'suffix.
     if channel.endswith('-channel'):
       channel = channel[0:-len('-channel')]
     suffix = channel.capitalize()
     if model:
       suffix += ' [%s]' % model

     super(PaygenTestStage, self).__init__(
         builder_run, buildstore, board, suffix=suffix, **kwargs)

   def PerformStage(self):
     """Schedule the tests to run."""
     # Schedule the tests to run and wait for the results.
     paygen_build_lib.ScheduleAutotestTests(self.suite_name,
                                            self.lab_board_name,
                                            self.model,
                                            self.build,
                                            self.skip_duts_check,
                                            self.debug,
                                            self.payload_test_configs,
                                            self.test_env,
                                            job_keyvals=self.GetJobKeyvals())

   def _HandleStageException(self, exc_info):
     """Override and don't set status to FAIL but FORGIVEN instead."""
     exc_type, exc_value, _exc_tb = exc_info

     # If the exception is a TestLabFailure that means we couldn't schedule the
     # test. We don't fail the build for that. We do the CompoundFailure dance,
     # because that's how we'll get failures from background processes returned
     # to us.
     if (issubclass(exc_type, failures_lib.TestLabFailure) or
         (issubclass(exc_type, failures_lib.CompoundFailure) and
          exc_value.MatchesFailureType(failures_lib.TestLabFailure))):
       return self._HandleExceptionAsWarning(exc_info)

     return super(PaygenTestStage, self)._HandleStageException(exc_info)