blob: 47771374862196d9d1dac8928465d9a15423f7b3 [file] [log] [blame]
# Copyright 2014 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Module that helps to triage Commit Queue failures."""
from __future__ import print_function
import logging
import os
import pprint
from chromite.cbuildbot import cbuildbot_config
from chromite.cbuildbot import failures_lib
from chromite.cbuildbot import constants
from chromite.lib import git
from chromite.lib import patch as cros_patch
from chromite.lib import portage_util
def GetRelevantOverlaysForConfig(config, build_root):
"""Returns a list of overlays relevant to |config|.
Args:
config: A cbuildbot config name.
build_root: Path to the build root.
Returns:
A set of overlays.
"""
relevant_overlays = set()
for board in config.boards:
overlays = portage_util.FindOverlays(
constants.BOTH_OVERLAYS, board, build_root)
relevant_overlays.update(overlays)
return relevant_overlays
def GetAffectedOverlays(change, manifest, all_overlays):
"""Get the set of overlays affected by a given change.
Args:
change: The GerritPatch instance to look at.
manifest: A ManifestCheckout instance representing our build directory.
all_overlays: The set of all valid overlays.
Returns:
The set of overlays affected by the specified |change|. If the change
affected something other than an overlay, return None.
"""
checkout = change.GetCheckout(manifest, strict=False)
if checkout:
git_repo = checkout.GetPath(absolute=True)
# The whole git repo is an overlay. Return it.
# Example: src/private-overlays/overlay-x86-zgb-private
if git_repo in all_overlays:
return set([git_repo])
# Get the set of immediate subdirs affected by the change.
# Example: src/overlays/overlay-x86-zgb
subdirs = set([os.path.join(git_repo, path.split(os.path.sep)[0])
for path in change.GetDiffStatus(git_repo)])
# If all of the subdirs are overlays, return them.
if subdirs.issubset(all_overlays):
return subdirs
class CategorizeChanges(object):
"""A collection of methods to help categorize GerritPatch changes."""
@classmethod
def ClassifyOverlayChanges(cls, changes, config, build_root, manifest):
"""Classifies overlay changes in |changes|.
Args:
changes: The list or set of GerritPatch instances.
config: The cbuildbot config.
build_root: Path to the build root.
manifest: A ManifestCheckout instance representing our build directory.
Returns:
A (overlay_changes, irrelevant_overlay_changes) tuple; overlay_changes
is a subset of |changes| that have modified one or more overlays, and
irrelevant_overlay_changes is a subset of overlay_changes which are
irrelevant to |config|.
"""
visible_overlays = set(portage_util.FindOverlays(config.overlays, None,
build_root))
# The overlays relevant to this build.
relevant_overlays = GetRelevantOverlaysForConfig(config, build_root)
overlay_changes = set()
irrelevant_overlay_changes = set()
for change in changes:
affected_overlays = GetAffectedOverlays(change, manifest,
visible_overlays)
if affected_overlays is not None:
# The change modifies an overlay.
overlay_changes.add(change)
if not any(x in relevant_overlays for x in affected_overlays):
# The change touched an irrelevant overlay.
irrelevant_overlay_changes.add(change)
return overlay_changes, irrelevant_overlay_changes
@classmethod
def ClassifyWorkOnChanges(cls, changes, config, build_root,
manifest, packages_under_test):
"""Classifies WorkOn package changes in |changes|.
Args:
changes: The list or set of GerritPatch instances.
config: The cbuildbot config.
build_root: Path to the build root.
manifest: A ManifestCheckout instance representing our build directory.
packages_under_test: A list of packages names included in the build.
(e.g. ['chromeos-base/chromite-0.0.1-r1258']).
Returns:
A (workon_changes, irrelevant_workon_changes) tuple; workon_changes
is a subset of |changes| that have modified workon packages, and
irrelevant_workon_changes is a subset of workon_changes which are
irrelevant to |config|.
"""
workon_changes = set()
irrelevant_workon_changes = set()
# Strip the version of the package in packages_under_test
cpv_list = [portage_util.SplitCPV(x) for x in packages_under_test]
cp_under_test = ['%s/%s' % (x.category, x.package) for x in cpv_list]
workon_dict = portage_util.BuildFullWorkonPackageDictionary(
build_root, config.overlays, manifest)
pp = pprint.PrettyPrinter(indent=2)
logging.info('(project, branch) to workon package mapping:\n %s',
pp.pformat(workon_dict))
logging.info('packages under test\n: %s', pp.pformat(cp_under_test))
for change in changes:
packages = workon_dict.get((change.project, change.tracking_branch))
if packages:
# The CL modifies a workon package.
workon_changes.add(change)
if all(x not in cp_under_test for x in packages):
irrelevant_workon_changes.add(change)
return workon_changes, irrelevant_workon_changes
@classmethod
def _FilterProjectsInManifestByGroup(cls, manifest, groups):
"""Filters projects in |manifest| by |groups|.
Args:
manifest: A git.Manifest instance.
groups: A list of groups to filter.
Returns:
A set of (project, branch) tuples where each tuple is asssociated
with at least one group in |groups|.
"""
results = set()
for project, checkout_list in manifest.checkouts_by_name.iteritems():
for checkout in checkout_list:
if any(x in checkout['groups'] for x in groups):
branch = git.StripRefs(checkout['tracking_branch'])
results.add((project, branch))
return results
@classmethod
def GetChangesToBuildTools(cls, changes, manifest):
"""Returns a changes associated with buildtools projects.
Args:
changes: The list or set of GerritPatch instances.
manifest: A git.Manifest instance.
Returns:
A subset of |changes| to projects of "buildtools" group.
"""
buildtool_set = cls._FilterProjectsInManifestByGroup(
manifest, ['buildtools'])
return set([x for x in changes if (x.project, x.tracking_branch)
in buildtool_set])
@classmethod
def GetIrrelevantChanges(cls, changes, config, build_root, manifest,
packages_under_test):
"""Determine changes irrelavant to build |config|.
This method determine a set of changes that are irrelevant to the
build |config|. The general rule of thumb is that if we are unsure
whether a change is relevant, consider it relevant.
Args:
changes: The list or set of GerritPatch instances.
config: The cbuildbot config.
build_root: Path to the build root.
manifest: A ManifestCheckout instance representing our build directory.
packages_under_test: A list of packages that were tested in this build.
Returns:
A subset of |changes| which are irrelevant to |config|.
"""
untriaged_changes = set(changes)
irrelevant_changes = set()
# Changes that modify projects used in building are always relevant.
untriaged_changes -= cls.GetChangesToBuildTools(changes, manifest)
# Handles overlay changes.
# ClassifyOverlayChanges only handles overlays visible to this
# build. For example, an external build may not be able to view
# the internal overlays. However, in that case, the internal changes
# have already been filtered out in CommitQueueSyncStage, and are
# not included in |changes|.
overlay_changes, irrelevant_overlay_changes = cls.ClassifyOverlayChanges(
untriaged_changes, config, build_root, manifest)
untriaged_changes -= overlay_changes
irrelevant_changes |= irrelevant_overlay_changes
# Handles workon package changes.
if packages_under_test is not None:
try:
workon_changes, irrelevant_workon_changes = cls.ClassifyWorkOnChanges(
untriaged_changes, config, build_root, manifest, packages_under_test)
except Exception as e:
# Ignore the exception if we cannot categorize workon
# changes. We will conservatively assume the changes are
# relevant.
logging.warning('Unable to categorize cros workon changes: %s', e)
else:
untriaged_changes -= workon_changes
irrelevant_changes |= irrelevant_workon_changes
return irrelevant_changes
class CalculateSuspects(object):
"""Diagnose the cause for a given set of failures."""
@classmethod
def GetBlamedChanges(cls, changes):
"""Returns the changes that have been manually blamed.
Args:
changes: List of GerritPatch changes.
Returns:
A list of |changes| that were marked verified: -1 or
code-review: -2.
"""
return [x for x in changes if
any(x.HasApproval(f, v) for f, v in
constants.DEFAULT_CQ_SHOULD_REJECT_FIELDS.iteritems())]
@classmethod
def _FindPackageBuildFailureSuspects(cls, changes, messages):
"""Figure out what CLs are at fault for a set of build failures.
Args:
changes: A list of cros_patch.GerritPatch instances to consider.
messages: A list of build failure messages, of type
BuildFailureMessage.
"""
suspects = set()
for message in messages:
suspects.update(message.FindPackageBuildFailureSuspects(changes))
return suspects
@classmethod
def FilterChromiteChanges(cls, changes):
"""Returns a list of chromite changes in |changes|."""
return [x for x in changes if x.project == constants.CHROMITE_PROJECT]
@classmethod
def _MatchesFailureType(cls, messages, fail_type, strict=True):
"""Returns True if all failures are instances of |fail_type|.
Args:
messages: A list of BuildFailureMessage or NoneType objects
from the failed slaves.
fail_type: The exception class to look for.
strict: If False, treat NoneType message as a match.
Returns:
True if all objects in |messages| are non-None and all failures are
instances of |fail_type|.
"""
return ((not strict or all(messages)) and
all(x.MatchesFailureType(fail_type) for x in messages if x))
@classmethod
def OnlyLabFailures(cls, messages, no_stat):
"""Determine if the cause of build failure was lab failure.
Args:
messages: A list of BuildFailureMessage or NoneType objects
from the failed slaves.
no_stat: A list of builders which failed prematurely without reporting
status.
Returns:
True if the build failed purely due to lab failures.
"""
# If any builder failed prematuely, lab failure was not the only cause.
return (not no_stat and
cls._MatchesFailureType(messages, failures_lib.TestLabFailure))
@classmethod
def OnlyInfraFailures(cls, messages, no_stat):
"""Determine if the cause of build failure was infrastructure failure.
Args:
messages: A list of BuildFailureMessage or NoneType objects
from the failed slaves.
no_stat: A list of builders which failed prematurely without reporting
status.
Returns:
True if the build failed purely due to infrastructure failures.
"""
# "Failed to report status" and "NoneType" messages are considered
# infra failures.
return ((not messages and no_stat) or
cls._MatchesFailureType(
messages, failures_lib.InfrastructureFailure, strict=False))
@classmethod
def FindSuspects(cls, changes, messages, infra_fail=False, lab_fail=False):
"""Find out what changes probably caused our failure.
In cases where there were no internal failures, we can assume that the
external failures are at fault. Otherwise, this function just defers to
_FindPackageBuildFailureSuspects and FindPreviouslyFailedChanges as needed.
If the failures don't match either case, just fail everything.
Args:
changes: A list of cros_patch.GerritPatch instances to consider.
messages: A list of build failure messages, of type
BuildFailureMessage or of type NoneType.
infra_fail: The build failed purely due to infrastructure failures.
lab_fail: The build failed purely due to test lab infrastructure
failures.
Returns:
A set of changes as suspects.
"""
bad_changes = cls.GetBlamedChanges(changes)
if bad_changes:
# If there are changes that have been set verified=-1 or
# code-review=-2, these changes are the ONLY suspects of the
# failed build.
logging.warning('Detected that some changes have been blamed for '
'the build failure. Only these CLs will be rejected: %s',
cros_patch.GetChangesAsString(bad_changes))
return set(bad_changes)
elif lab_fail:
logging.warning('Detected that the build failed purely due to HW '
'Test Lab failure(s). Will not reject any changes')
return set()
elif not lab_fail and infra_fail:
# The non-lab infrastructure errors might have been caused
# by chromite changes.
logging.warning(
'Detected that the build failed due to non-lab infrastructure '
'issue(s). Will only reject chromite changes')
return set(cls.FilterChromiteChanges(changes))
if all(message and message.IsPackageBuildFailure()
for message in messages):
# If we are here, there are no None messages.
suspects = cls._FindPackageBuildFailureSuspects(changes, messages)
else:
suspects = set(changes)
return suspects
@classmethod
def GetResponsibleOverlays(cls, build_root, messages):
"""Get the set of overlays that could have caused failures.
This loops through the set of builders that failed in a given run and
finds what overlays could have been responsible for the failure.
Args:
build_root: Build root directory.
messages: A list of build failure messages from supporting builders.
These must be BuildFailureMessage objects or NoneType objects.
Returns:
The set of overlays that could have caused the failures. If we can't
determine what overlays are responsible, returns None.
"""
responsible_overlays = set()
for message in messages:
if message is None:
return None
bot_id = message.builder
config = cbuildbot_config.config.get(bot_id)
if not config:
return None
responsible_overlays.update(
GetRelevantOverlaysForConfig(config, build_root))
return responsible_overlays
@classmethod
def FilterOutInnocentChanges(cls, build_root, changes, messages):
"""Filter out innocent changes based on failure messages.
Args:
build_root: Build root directory.
changes: GitRepoPatches that might be guilty.
messages: A list of build failure messages from supporting builders.
These must be BuildFailureMessage objects or NoneType objects.
Returns:
A list of the changes that we could not prove innocent.
"""
# If there were no internal failures, only kick out external changes.
# (Still, fail all changes if we received any None messages.)
candidates = changes
if all(messages) and not any(message.internal for message in messages):
candidates = [change for change in changes if not change.internal]
return cls.FilterOutInnocentOverlayChanges(build_root, candidates, messages)
@classmethod
def FilterOutInnocentOverlayChanges(cls, build_root, changes, messages):
"""Filter out innocent overlay changes based on failure messages.
It is not possible to break a x86-generic builder via a change to an
unrelated overlay (e.g. amd64-generic). Filter out changes that are
known to be innocent.
Args:
build_root: Build root directory.
changes: GitRepoPatches that might be guilty.
messages: A list of build failure messages from supporting builders.
These must be BuildFailureMessage objects or NoneType objects.
Returns:
A list of the changes that we could not prove innocent.
"""
all_overlays = set(portage_util.FindOverlays(
constants.BOTH_OVERLAYS, None, build_root))
responsible_overlays = cls.GetResponsibleOverlays(build_root, messages)
if responsible_overlays is None:
return changes
manifest = git.ManifestCheckout.Cached(build_root)
candidates = []
for change in changes:
overlays = GetAffectedOverlays(change, manifest, all_overlays)
if overlays is None or overlays.issubset(responsible_overlays):
candidates.append(change)
return candidates