blob: d03417a85373647770e4cb3c32063792ea1a8303 [file] [log] [blame]
# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Module containing the various stages that a builder runs."""
import json
import logging
import os
from chromite.cbuildbot import cbuildbot_commands as commands
from chromite.cbuildbot import cbuildbot_failures
from chromite.cbuildbot import cbuildbot_run
from chromite.cbuildbot.stages import artifact_stages
from chromite.lib import cros_build_lib
from chromite.lib import gs
from chromite.lib import osutils
from chromite.lib import parallel
from chromite.lib import timeout_util
class InvalidTestConditionException(Exception):
"""Raised when pre-conditions for a test aren't met."""
class SignerTestStage(artifact_stages.ArchivingStage):
"""Run signer related tests."""
option_name = 'tests'
config_name = 'signer_tests'
# If the signer tests take longer than 30 minutes, abort. They usually take
# five minutes to run.
SIGNER_TEST_TIMEOUT = 1800
def PerformStage(self):
if not self.archive_stage.WaitForRecoveryImage():
raise InvalidTestConditionException('Missing recovery image.')
with timeout_util.Timeout(self.SIGNER_TEST_TIMEOUT):
commands.RunSignerTests(self._build_root, self._current_board)
class SignerResultsTimeout(cbuildbot_failures.StepFailure):
"""The signer did not produce any results inside the expected time."""
class SignerFailure(cbuildbot_failures.StepFailure):
"""The signer returned an error result."""
class MissingInstructionException(cbuildbot_failures.StepFailure):
"""We didn't receive the list of signing instructions PushImage uploaded."""
class MalformedResultsException(cbuildbot_failures.StepFailure):
"""The Signer results aren't formatted as we expect."""
class PaygenSigningRequirementsError(cbuildbot_failures.StepFailure):
"""Paygen stage can't run if signing failed."""
class PaygenCrostoolsNotAvailableError(cbuildbot_failures.StepFailure):
"""Paygen stage can't run if signing failed."""
class PaygenNoPaygenConfigForBoard(cbuildbot_failures.StepFailure):
"""Paygen can't run with a release.conf config for the board."""
class PaygenStage(artifact_stages.ArchivingStage):
"""Stage that generates release payloads.
If this stage is created with a 'channels' argument, it can run
independantly. Otherwise, it's dependent on values queued up by
the ArchiveStage (push_image).
"""
option_name = 'paygen'
config_name = 'paygen'
# Poll for new results every 30 seconds.
SIGNING_PERIOD = 30
# Timeout for PushImage to finish uploading images. 2 hours in seconds.
PUSHIMAGE_TIMEOUT = 2 * 60 * 60
# Timeout for the signing process. 2 hours in seconds.
SIGNING_TIMEOUT = 2 * 60 * 60
FINISHED = 'finished'
def __init__(self, builder_run, board, archive_stage, channels=None,
**kwargs):
"""Init that accepts the channels argument, if present.
Args:
builder_run: See builder_run on ArchivingStage.
board: See board on ArchivingStage.
archive_stage: See archive_stage on ArchivingStage.
channels: Explicit list of channels to generate payloads for.
If empty, will instead wait on values from push_image.
Channels is normally None in release builds, and normally set
for trybot 'payloads' builds.
"""
super(PaygenStage, self).__init__(builder_run, board, archive_stage,
**kwargs)
self.signing_results = {}
self.channels = channels
def _HandleStageException(self, exc_info):
"""Override and don't set status to FAIL but FORGIVEN instead."""
exc_type = exc_info[0]
# If Paygen fails to find anything needed in release.conf, treat it
# as a warning, not a failure. This is common during new board bring up.
if issubclass(exc_type, PaygenNoPaygenConfigForBoard):
return self._HandleExceptionAsWarning(exc_info)
return super(PaygenStage, self)._HandleStageException(exc_info)
def _JsonFromUrl(self, gs_ctx, url):
"""Fetch a GS Url, and parse it as Json.
Args:
gs_ctx: GS Context.
url: Url to fetch and parse.
Returns:
None if the Url doesn't exist.
Parsed Json structure if it did.
Raises:
MalformedResultsException if it failed to parse.
"""
try:
signer_txt = gs_ctx.Cat(url).output
except gs.GSNoSuchKey:
return None
try:
return json.loads(signer_txt)
except ValueError:
# We should never see malformed Json, even for intermediate statuses.
raise MalformedResultsException(signer_txt)
def _SigningStatusFromJson(self, signer_json):
"""Extract a signing status from a signer result Json DOM.
Args:
signer_json: The parsed json status from a signer operation.
Returns:
string with a simple status: 'passed', 'failed', 'downloading', etc,
or '' if the json doesn't contain a status.
"""
return (signer_json or {}).get('status', {}).get('status', '')
def _CheckForResults(self, gs_ctx, instruction_urls_per_channel,
channel_notifier):
"""timeout_util.WaitForSuccess func to check a list of signer results.
Args:
gs_ctx: Google Storage Context.
instruction_urls_per_channel: Urls of the signer result files
we're expecting.
channel_notifier: BackgroundTaskRunner into which we push channels for
processing.
Returns:
Number of results not yet collected.
"""
COMPLETED_STATUS = ('passed', 'failed')
# Assume we are done, then try to prove otherwise.
results_completed = True
for channel in instruction_urls_per_channel.keys():
self.signing_results.setdefault(channel, {})
if (len(self.signing_results[channel]) ==
len(instruction_urls_per_channel[channel])):
continue
for url in instruction_urls_per_channel[channel]:
# Convert from instructions URL to instructions result URL.
url += '.json'
# We already have a result for this URL.
if url in self.signing_results[channel]:
continue
signer_json = self._JsonFromUrl(gs_ctx, url)
if self._SigningStatusFromJson(signer_json) in COMPLETED_STATUS:
# If we find a completed result, remember it.
self.signing_results[channel][url] = signer_json
# If we don't have full results for this channel, we aren't done
# waiting.
if (len(self.signing_results[channel]) !=
len(instruction_urls_per_channel[channel])):
results_completed = False
continue
# If we reach here, the channel has just been completed for the first
# time.
# If all results 'passed' the channel was successfully signed.
channel_success = True
for signer_result in self.signing_results[channel].values():
if self._SigningStatusFromJson(signer_result) != 'passed':
channel_success = False
# If we successfully completed the channel, inform paygen.
if channel_success:
channel_notifier(channel)
return results_completed
def _WaitForPushImage(self):
"""Block until push_image data is ready.
Returns:
Push_image results, expected to be of the form:
{ 'channel': ['gs://instruction_uri1', 'gs://signer_instruction_uri2'] }
Raises:
MissingInstructionException: If push_image sent us an error, or timed out.
"""
try:
instruction_urls_per_channel = self.board_runattrs.GetParallel(
'instruction_urls_per_channel', timeout=self.PUSHIMAGE_TIMEOUT)
except cbuildbot_run.AttrTimeoutError:
instruction_urls_per_channel = None
# A value of None signals an error, either in PushImage, or a timeout.
if instruction_urls_per_channel is None:
raise MissingInstructionException('PushImage results not available.')
return instruction_urls_per_channel
def _WaitForSigningResults(self,
instruction_urls_per_channel,
channel_notifier):
"""Do the work of waiting for signer results and logging them.
Args:
instruction_urls_per_channel: push_image data (see _WaitForPushImage).
channel_notifier: BackgroundTaskRunner into which we push channels for
processing.
Raises:
ValueError: If the signer result isn't valid json.
RunCommandError: If we are unable to download signer results.
"""
gs_ctx = gs.GSContext(dry_run=self._run.debug)
try:
cros_build_lib.Info('Waiting for signer results.')
timeout_util.WaitForReturnTrue(
self._CheckForResults,
func_args=(gs_ctx, instruction_urls_per_channel, channel_notifier),
timeout=self.SIGNING_TIMEOUT, period=self.SIGNING_PERIOD)
except timeout_util.TimeoutError:
msg = 'Image signing timed out.'
cros_build_lib.Error(msg)
cros_build_lib.PrintBuildbotStepText(msg)
raise SignerResultsTimeout(msg)
# Log all signer results, then handle any signing failures.
failures = []
for url_results in self.signing_results.values():
for url, signer_result in url_results.iteritems():
result_description = os.path.basename(url)
cros_build_lib.PrintBuildbotStepText(result_description)
cros_build_lib.Info('Received results for: %s', result_description)
cros_build_lib.Info(json.dumps(signer_result, indent=4))
status = self._SigningStatusFromJson(signer_result)
if status != 'passed':
failures.append(result_description)
cros_build_lib.Error('Signing failed for: %s', result_description)
if failures:
cros_build_lib.Error('Failure summary:')
for failure in failures:
cros_build_lib.Error(' %s', failure)
raise SignerFailure(failures)
def PerformStage(self):
"""Do the work of generating our release payloads."""
# Convert to release tools naming for boards.
board = self._current_board.replace('_', '-')
version = self._run.attrs.release_tag
assert version, "We can't generate payloads without a release_tag."
logging.info("Generating payloads for: %s, %s", board, version)
# Test to see if the current board has a Paygen configuration. We do
# this here, no in the sub-process so we don't have to pass back a
# failure reason.
try:
from crostools.lib import paygen_build_lib
paygen_build_lib.ValidateBoardConfig(board)
except paygen_build_lib.BoardNotConfigured:
raise PaygenNoPaygenConfigForBoard(
'No release.conf entry was found for board %s. Get a TPM to fix.' %
board)
except ImportError:
raise PaygenCrostoolsNotAvailableError()
with parallel.BackgroundTaskRunner(self._RunPaygenInProcess) as per_channel:
def channel_notifier(channel):
per_channel.put((channel, board, version, self._run.debug,
self._run.config.perform_paygen_testing))
if self.channels:
logging.info("Using explicit channels: %s", self.channels)
# If we have an explicit list of channels, use it.
for channel in self.channels:
channel_notifier(channel)
else:
instruction_urls_per_channel = self._WaitForPushImage()
self._WaitForSigningResults(instruction_urls_per_channel,
channel_notifier)
def _RunPaygenInProcess(self, channel, board, version, debug, test_payloads):
"""Helper for PaygenStage that invokes payload generation.
This method is intended to be safe to invoke inside a process.
Args:
channel: Channel of payloads to generate ('stable', 'beta', etc)
board: Board of payloads to generate ('x86-mario', 'x86-alex-he', etc)
version: Version of payloads to generate.
debug: Flag telling if this is a real run, or a test run.
test_payloads: Generate test payloads, and schedule auto testing.
"""
# TODO(dgarrett): Remove when crbug.com/341152 is fixed.
# These modules are imported here because they aren't always available at
# cbuildbot startup.
# pylint: disable=F0401
try:
from crostools.lib import gspaths
from crostools.lib import paygen_build_lib
except ImportError:
# We can't generate payloads without crostools.
raise PaygenCrostoolsNotAvailableError()
# Convert to release tools naming for channels.
if not channel.endswith('-channel'):
channel += '-channel'
with osutils.TempDir(sudo_rm=True) as tempdir:
# Create the definition of the build to generate payloads for.
build = gspaths.Build(channel=channel,
board=board,
version=version)
try:
# Generate the payloads.
self._PrintLoudly('Starting %s, %s, %s' % (channel, version, board))
paygen_build_lib.CreatePayloads(build,
work_dir=tempdir,
dry_run=debug,
run_parallel=True,
run_on_builder=True,
skip_test_payloads=not test_payloads,
skip_autotest=not test_payloads)
except (paygen_build_lib.BuildFinished,
paygen_build_lib.BuildLocked,
paygen_build_lib.BuildSkip) as e:
# These errors are normal if it's possible for another process to
# work on the same build. This process could be a Paygen server, or
# another builder (perhaps by a trybot generating payloads on request).
#
# This means the build was finished by the other process, is already
# being processed (so the build is locked), or that it's been marked
# to skip (probably done manually).
cros_build_lib.Info('Paygen skipped because: %s', e)
except cbuildbot_failures.InfrastructureFailure as e:
# TODO(dgarrett): chromium:377208 We should generate a warning for the
# stage. That's current difficult, but should be easy after CL:200682
# lands.
# Tests were not correctly scheduled.
logging.warning('Failed to schedule Payload tests: %s', e)