blob: fb47215aacc1bf65049da04caf6fbb4ad2eadeaf [file] [log] [blame]
# Copyright 2016 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Build database associating Gerrit change # with commit metadata."""
from __future__ import print_function
import collections
import pickle
import re
import subprocess
import git
from chromite.lib import clactions
from chromite.lib import commandline
_GERRIT_MESSAGE_REXP = (
"Reviewed-on: https://(.*?)(?:/gerrit)?(?:/r)?/([0-9]*)\n")
# These are gerrit hosts that we know have been used for some CLs in our
# history. We do not need to log them as anomalies.
_SILENTLY_IGNORED_GERRIT_HOSTS = (
'10.10.10.29',
'android.intel.com',
'gerrit.rds.intel.com',
'review.coreboot.org',
'weave-review.googlesource.com',
)
def _GetParser():
"""Create the argparse parser."""
parser = commandline.ArgumentParser(description=__doc__)
parser.add_argument('repos', action='store', nargs='*',
help='Paths to git repository to examine. If none '
'provided, iterate over entire repo checkout.')
parser.add_argument('--since', action='store', default='2014-01-01',
help='Date of earliest commit to examine. '
'Default: 2014-01-01')
parser.add_argument('--verbose', '-v', action='store_true', default=False,
help='Print more logging.')
parser.add_argument('--output', '-o', action='store', default=None,
help='Path to file to which to write pickled database.')
return parser
CommitInfo = collections.namedtuple('CommitInfo',
['gerrit_host', 'change_number',
'hexsha'])
ChangeDatabase = collections.namedtuple('ChangeDatabase',
['unique_changes',
'duplicate_changes'])
def _ParseCommitMessage(commit_message):
"""Extract gerrit_host and change_number from commit message.
Args:
commit_message: String commit message.
Returns:
gerrit_host, change_number tuple.
Raises:
ValueError if commit message does not match.
"""
m = re.findall(_GERRIT_MESSAGE_REXP, commit_message)
if not m:
raise ValueError(
'Commit message does not conform to Gerrit-reviewed pattern.')
m = m[-1]
return (m[0], m[1])
def _ProcessCommit(commit):
"""Extract info from a given commit.
Args:
commit: a git.Commit instance to process.
Returns:
CommitInfo instance if the given commit is a Gerrit-reviewed commit.
None otherwise.
"""
try:
gerrit_host, change_number = _ParseCommitMessage(commit.message)
return CommitInfo(gerrit_host, change_number, commit.hexsha)
except ValueError:
pass
return None
def _ProcessRepo(repo, since):
"""Extracts gerrit information and associates with commit info.
Args:
repo: Path to git repository.
since: date in YYYY-MM-DD format to process from.
Returns:
A list of (GerritChangeTuple, CommitInfo) for Gerrit-reviewed
commits found in this |repo| after time |since|. Note: this list may
contain duplicates, as historically some commits were cherry-picked
forcefully or landed outside of the CQ, and may have innaccurate
gerrit info extracted from their commit message.
"""
r = git.Repo(repo)
print('Examining git repository at %s' % repo)
commits = r.iter_commits(since=since)
commit_infos = []
for c in commits:
ci = _ProcessCommit(c)
if ci:
try:
change_tuple = clactions.GerritChangeTuple.FromHostAndNumber(
ci.gerrit_host, ci.change_number)
commit_infos.append((change_tuple, ci))
except clactions.UnknownGerritHostError as e:
if e.gerrit_host not in _SILENTLY_IGNORED_GERRIT_HOSTS:
print('Unknown gerrit host %s for commit %s'
% (e.gerrit_host, ci.hexsha))
return commit_infos
def main(argv):
parser = _GetParser()
options = parser.parse_args(argv)
repo_list = options.repos
if not repo_list:
repo_list = subprocess.check_output(['repo', 'list', '-fp']).splitlines()
duplicates = {}
changes_dict = {}
for repo in repo_list:
changes_list = _ProcessRepo(repo, options.since)
print('Found %s Gerrit-reviewed commits since %s.' %
(len(changes_list), options.since))
# Merge this repo info into database
for item in changes_list:
change, commit_info = item
record = (repo, commit_info)
if change in duplicates:
duplicates[change].append(record)
elif change in changes_dict:
duplicates[change] = [changes_dict.pop(change)]
duplicates[change].append(record)
else:
changes_dict[change] = record
print('Up to %s uniques, %s duplicates' %
(len(changes_dict), len(duplicates)))
print('There are %s unique changes and %s duplicates.'
% (len(changes_dict), len(duplicates)))
if options.output:
db = ChangeDatabase(changes_dict, duplicates)
print('Writing database as pickle to %s' % options.output)
with open(options.output, 'w') as f:
pickle.dump(db, f)