gather_builder_stats: Refactor CL actions summary class in its own module. This is a preparatory CL that splits out summarize_build_stats from gather_builder_stats. The new module currently only contains the logic necessary to summarize CL actions. This CL prepares the way to remove GS and spreadsheet dependence from the script that summarizes CL actions. BUG=chromium:454592 TEST=(1) bin/summarize_build_stats --cl-actions --email=myemail@chromium.org \ --cred-dir path/to/cidb/dir --past-week (2) bin/gather_builder_stats --cq-master --past-week (3) (updated) chromite unittests. Change-Id: I690222a3e5d2c04fc9806f6eb0d48ec0b4ecce60 Reviewed-on: https://chromium-review.googlesource.com/245920 Reviewed-by: Aviv Keshet <akeshet@chromium.org> Commit-Queue: Prathmesh Prabhu <pprabhu@chromium.org> Tested-by: Prathmesh Prabhu <pprabhu@chromium.org>

commit: 67aa844382722a987e434615aac937d330f78191 [log] [tgz]
author: Prathmesh Prabhu <pprabhu@chromium.org> Tue Feb 03 14:26:48 2015 -0800
committer: ChromeOS Commit Bot <chromeos-commit-bot@chromium.org> Thu Feb 12 00:41:32 2015 +0000
tree: 25c758212907e1220dfa65408c9cdd2298819012
parent: 5cc788c5c031341aa9925924ceb65bad680e1b35 [diff]
diff --git a/bin/summarize_build_stats b/bin/summarize_build_stats
new file mode 120000
index 0000000..72196ce
--- /dev/null
+++ b/bin/summarize_build_stats

@@ -0,0 +1 @@
+../scripts/wrapper.py
\ No newline at end of file

diff --git a/scripts/gather_builder_stats.py b/scripts/gather_builder_stats.py
index 41e6a21..acc2739 100644
--- a/scripts/gather_builder_stats.py
+++ b/scripts/gather_builder_stats.py

@@ -7,10 +7,7 @@
 from __future__ import division
 from __future__ import print_function
 
-import collections
 import datetime
-import logging
-import numpy
 import os
 import re
 import sys
@@ -18,8 +15,6 @@
 from chromite.cbuildbot import cbuildbot_config
 from chromite.cbuildbot import metadata_lib
 from chromite.cbuildbot import constants
-from chromite.lib import cidb
-from chromite.lib import clactions
 from chromite.lib import commandline
 from chromite.lib import cros_build_lib
 from chromite.lib import gdata_lib
@@ -56,13 +51,6 @@
 PFQ_SS_KEY = '0AhFPeDq6pmwxdDdrYXk3cnJJV05jN3Zja0s5VjFfNlE'
 CANARY_SS_KEY = '0AhFPeDq6pmwxdDBWM0t3YnYyeVFoM3VaRXNianc1VVE'
 
-# These are the preferred base URLs we use to canonicalize bugs/CLs.
-BUGANIZER_BASE_URL = 'b/'
-GUTS_BASE_URL = 't/'
-CROS_BUG_BASE_URL = 'crbug.com/'
-INTERNAL_CL_BASE_URL = 'crosreview.com/i/'
-EXTERNAL_CL_BASE_URL = 'crosreview.com/'
-
 
 class GatherStatsError(Exception):
   """Base exception class for exceptions in this module."""
@@ -106,27 +94,6 @@
   return sorted(slave_config.name for slave_config in slave_configs)
 
 
-def _RemoveBuildsWithNoBuildId(builds, description):
-  """Remove builds without a build_id.
-
-  They happen during some failure modes, but we can't process them, and
-  they don't affect the stats. We do log a warning message if any were
-  filtered.
-
-  Args:
-    builds: List of metadata_lib.BuildData objects.
-    description: A string describing the source of missing build_ids.
-  """
-  result = [b for b in builds if 'build_id' in b.metadata_dict]
-  filtered = len(builds) - len(result)
-
-  if filtered:
-    logging.warn('Found %d %s builds without a build_id.',
-                 filtered, description)
-
-  return result
-
-
 class StatsTable(table.Table):
   """Stats table for any specific target on a waterfall."""
 
@@ -609,12 +576,11 @@
   # This is needed if you are writing data to the Google Sheets spreadsheet.
   GET_SHEETS_VERSION = True
 
-  def __init__(self, config_target, db=None, ss_key=None,
+  def __init__(self, config_target, ss_key=None,
                no_sheets_version_filter=False):
     self.builds = []
     self.gs_ctx = gs.GSContext()
     self.config_target = config_target
-    self.db = db
     self.ss_key = ss_key
     self.no_sheets_version_filter = no_sheets_version_filter
     self.summary = {}
@@ -635,7 +601,7 @@
                             sorted by build number.
       starting_build_number: The lowest build number to include in
                              self.builds.
-      creds: Login credentials as returned by _PrepareCreds. (optional)
+      creds: Login credentials as returned by PrepareCreds. (optional)
     """
     self.builds = self._FetchBuildData(start_date, end_date, self.config_target,
                                        self.gs_ctx)
@@ -888,515 +854,8 @@
     super(CanaryMasterStats, self).__init__(CANARY_MASTER, **kwargs)
 
 
-class CLStats(StatsManager):
-  """Manager for stats about CL actions taken by the Commit Queue."""
-  PATCH_HANDLING_TIME_SUMMARY_KEY = 'patch_handling_time'
-  SUMMARY_SPREADSHEET_COLUMNS = {
-      PATCH_HANDLING_TIME_SUMMARY_KEY: ('PatchHistogram', 1)}
-  COL_FAILURE_CATEGORY = 'failure category'
-  COL_FAILURE_BLAME = 'bug or bad CL'
-  REASON_BAD_CL = 'Bad CL'
-  BOT_TYPE = CQ
-  GET_SHEETS_VERSION = False
-
-  def __init__(self, email, **kwargs):
-    super(CLStats, self).__init__(CQ_MASTER, **kwargs)
-    self.actions = []
-    self.per_patch_actions = {}
-    self.per_cl_actions = {}
-    self.email = email
-    self.reasons = {}
-    self.blames = {}
-    self.summary = {}
-    self.build_numbers_by_build_id = {}
-
-  def GatherFailureReasons(self, creds):
-    """Gather the reasons why our builds failed and the blamed bugs or CLs.
-
-    Args:
-      creds: A gdata_lib.Creds object.
-    """
-    data_table = CQMasterStats.TABLE_CLASS()
-    uploader = SSUploader(creds, self.ss_key)
-    ss_failure_category = gdata_lib.PrepColNameForSS(self.COL_FAILURE_CATEGORY)
-    ss_failure_blame = gdata_lib.PrepColNameForSS(self.COL_FAILURE_BLAME)
-    rows = uploader.GetRowCacheByCol(data_table.WORKSHEET_NAME,
-                                     data_table.COL_BUILD_NUMBER)
-    for b in self.builds:
-      try:
-        row = rows[str(b.build_number)]
-      except KeyError:
-        self.reasons[b.build_number] = 'None'
-        self.blames[b.build_number] = []
-      else:
-        self.reasons[b.build_number] = str(row[ss_failure_category])
-        self.blames[b.build_number] = self.ProcessBlameString(
-            str(row[ss_failure_blame]))
-
-  @staticmethod
-  def ProcessBlameString(blame_string):
-    """Parse a human-created |blame_string| from the spreadsheet.
-
-    Returns:
-      A list of canonicalized URLs for bugs or CLs that appear in the blame
-      string. Canonicalized form will be 'crbug.com/1234',
-      'crosreview.com/1234', 'b/1234', 't/1234', or 'crosreview.com/i/1234' as
-      applicable.
-    """
-    urls = []
-    tokens = blame_string.split()
-
-    # Format to generate the regex patterns. Matches one of provided domain
-    # names, followed by lazy wildcard, followed by greedy digit wildcard,
-    # followed by optional slash and optional comma.
-    general_regex = r'^.*(%s).*?([0-9]+)/?,?$'
-
-    crbug = general_regex % r'crbug.com|code.google.com'
-    internal_review = general_regex % (
-        r'chrome-internal-review.googlesource.com|crosreview.com/i')
-    external_review = general_regex % (
-        r'crosreview.com|chromium-review.googlesource.com')
-    guts = general_regex % r't/|gutsv\d.corp.google.com/#ticket/'
-
-    # Buganizer regex is different, as buganizer urls do not end with the bug
-    # number.
-    buganizer = r'^.*(b/|b.corp.google.com/issue\?id=)([0-9]+).*$'
-
-    # Patterns need to be tried in a specific order -- internal review needs
-    # to be tried before external review, otherwise urls like crosreview.com/i
-    # will be incorrectly parsed as external.
-    patterns = [crbug,
-                internal_review,
-                external_review,
-                buganizer,
-                guts]
-    url_patterns = [CROS_BUG_BASE_URL,
-                    INTERNAL_CL_BASE_URL,
-                    EXTERNAL_CL_BASE_URL,
-                    BUGANIZER_BASE_URL,
-                    GUTS_BASE_URL]
-
-    for t in tokens:
-      for p, u in zip(patterns, url_patterns):
-        m = re.match(p, t)
-        if m:
-          urls.append(u + m.group(2))
-          break
-
-    return urls
-
-  def Gather(self, start_date, end_date, sort_by_build_number=True,
-             starting_build_number=0, creds=None):
-    """Fetches build data and failure reasons.
-
-    Args:
-      start_date: A datetime.date instance for the earliest build to
-                  examine.
-      end_date: A datetime.date instance for the latest build to
-                examine.
-      sort_by_build_number: Optional boolean. If True, builds will be
-                            sorted by build number.
-      starting_build_number: The lowest build number from the CQ to include in
-                             the results.
-      creds: Login credentials as returned by _PrepareCreds. (optional)
-    """
-    if not creds:
-      creds = _PrepareCreds(self.email)
-    super(CLStats, self).Gather(start_date,
-                                end_date,
-                                sort_by_build_number=sort_by_build_number,
-                                starting_build_number=starting_build_number)
-    self.actions = self.db.GetActionHistory(start_date, end_date)
-    self.GatherFailureReasons(creds)
-
-    # Remove builds without a build_id.
-    self.builds = _RemoveBuildsWithNoBuildId(self.builds, 'CQ')
-
-    self.build_numbers_by_build_id.update(
-        {b['build_id'] : b.build_number for b in self.builds})
-
-  def GetSubmittedPatchNumber(self, actions):
-    """Get the patch number of the final patchset submitted.
-
-    This function only makes sense for patches that were submitted.
-
-    Args:
-      actions: A list of actions for a single change.
-    """
-    submit = [a for a in actions if a.action == constants.CL_ACTION_SUBMITTED]
-    assert len(submit) > 0, 'Expected change to be submitted, got %r' % actions
-    if len(submit) > 1:
-      # Patches may be submitted more than once if we mark the patch as
-      # submitted when it is still in "SUBMITTING" state and Gerrit later bumps
-      # it back to "NEW". This should only happen due to Gerrit bugs.
-      cros_build_lib.Info('Change %s was submitted more than once: %r',
-                          submit[-1].patch, submit)
-
-    return submit[-1].patch_number
-
-  def ClassifyRejections(self, submitted_changes):
-    """Categorize rejected CLs, deciding whether the rejection was incorrect.
-
-    We figure out what patches were falsely rejected by looking for patches
-    which were later submitted unmodified after being rejected. These patches
-    are considered to be likely good CLs.
-
-    Args:
-      submitted_changes: A dict mapping submitted GerritChangeTuple objects to
-        a list of associated actions.
-
-    Yields:
-      change: The GerritChangeTuple that was rejected.
-      actions: A list of actions applicable to the CL.
-      a: The reject action that kicked out the CL.
-      falsely_rejected: Whether the CL was incorrectly rejected. A CL rejection
-        is considered incorrect if the same patch is later submitted, with no
-        changes. It's a heuristic.
-    """
-    for change, actions in submitted_changes.iteritems():
-      submitted_patch_number = self.GetSubmittedPatchNumber(actions)
-      for a in actions:
-        if a.action == constants.CL_ACTION_KICKED_OUT:
-          # If the patch wasn't picked up in the run, this means that it "failed
-          # to apply" rather than "failed to validate". Ignore it.
-          picked_up = [x for x in actions if x.build_id == a.build_id and
-                       x.patch == a.patch and
-                       x.action == constants.CL_ACTION_PICKED_UP]
-          falsely_rejected = a.patch_number == submitted_patch_number
-          if picked_up:
-            # Check whether the patch was updated after submission.
-            yield change, actions, a, falsely_rejected
-
-  def _PrintCounts(self, reasons, fmt):
-    """Print a sorted list of reasons in descending order of frequency.
-
-    Args:
-      reasons: A key/value mapping mapping the reason to the count.
-      fmt: A format string for our log message, containing %(cnt)d
-        and %(reason)s.
-    """
-    d = reasons
-    for cnt, reason in sorted(((v, k) for (k, v) in d.items()), reverse=True):
-      logging.info(fmt, dict(cnt=cnt, reason=reason))
-    if not d:
-      logging.info('  None')
-
-  def BotType(self, action):
-    """Return whether |action| applies to the CQ or PRE_CQ."""
-    build_config = action.build_config
-    if build_config.endswith('-%s' % cbuildbot_config.CONFIG_TYPE_PALADIN):
-      return CQ
-    else:
-      return PRE_CQ
-
-  def GoodPatchRejections(self, submitted_changes):
-    """Find good patches that were incorrectly rejected.
-
-    Args:
-      submitted_changes: A dict mapping submitted GerritChangeTuple objects to
-        a list of associated actions.
-
-    Returns:
-      A dict, where d[patch] = reject_actions for each good patch that was
-      incorrectly rejected.
-    """
-    # falsely_rejected_changes maps GerritChangeTuple objects to their actions.
-    # bad_cl_builds is a set of builds that contain a bad patch.
-    falsely_rejected_changes = {}
-    bad_cl_builds = set()
-    for x in self.ClassifyRejections(submitted_changes):
-      _, actions, a, falsely_rejected = x
-      if falsely_rejected:
-        falsely_rejected_changes[a.patch] = actions
-      elif self.BotType(a) == PRE_CQ:
-        # If a developer writes a bad patch and it fails the Pre-CQ, it
-        # may cause many other patches from the same developer to be
-        # rejected. This is expected and correct behavior. Treat all of
-        # the patches in the Pre-CQ run as bad so that they don't skew our
-        # our statistics.
-        #
-        # Since we don't have a spreadsheet for the Pre-CQ, we guess what
-        # CLs were bad by looking at what patches needed to be changed
-        # before submission.
-        #
-        # NOTE: We intentionally only apply this logic to the Pre-CQ here.
-        # The CQ is different because it may have many innocent patches in
-        # a single run which should not be treated as bad.
-        bad_cl_builds.add(a.build_id)
-
-    # Make a list of candidate patches that got incorrectly rejected. We track
-    # them in a dict, setting good_patch_rejections[patch] = rejections for
-    # each patch.
-    good_patch_rejections = collections.defaultdict(list)
-    for v in falsely_rejected_changes.itervalues():
-      for a in v:
-        if (a.action == constants.CL_ACTION_KICKED_OUT and
-            a.build_id not in bad_cl_builds):
-          good_patch_rejections[a.patch].append(a)
-
-    return good_patch_rejections
-
-  def FalseRejectionRate(self, good_patch_count, good_patch_rejection_count):
-    """Calculate the false rejection ratio.
-
-    This is the chance that a good patch will be rejected by the Pre-CQ or CQ
-    in a given run.
-
-    Args:
-      good_patch_count: The number of good patches in the run.
-      good_patch_rejection_count: A dict containing the number of false
-        rejections for the CQ and PRE_CQ.
-
-    Returns:
-      A dict containing the false rejection ratios for CQ, PRE_CQ, and combined.
-    """
-    false_rejection_rate = dict()
-    for bot, rejection_count in good_patch_rejection_count.iteritems():
-      false_rejection_rate[bot] = (
-          rejection_count * 100 / (rejection_count + good_patch_count)
-      )
-    false_rejection_rate['combined'] = 0
-    if good_patch_count:
-      rejection_count = sum(good_patch_rejection_count.values())
-      false_rejection_rate['combined'] = (
-          rejection_count * 100 / (good_patch_count + rejection_count)
-      )
-    return false_rejection_rate
-
-  def Summarize(self):
-    """Process, print, and return a summary of cl action statistics.
-
-    As a side effect, save summary to self.summary.
-
-    Returns:
-      A dictionary summarizing the statistics.
-    """
-    super_summary = super(CLStats, self).Summarize()
-
-    (self.per_patch_actions,
-     self.per_cl_actions) = self.CollateActions(self.actions)
-
-    submit_actions = [a for a in self.actions
-                      if a.action == constants.CL_ACTION_SUBMITTED]
-    reject_actions = [a for a in self.actions
-                      if a.action == constants.CL_ACTION_KICKED_OUT]
-    sbfail_actions = [a for a in self.actions
-                      if a.action == constants.CL_ACTION_SUBMIT_FAILED]
-
-    build_reason_counts = {}
-    for reason in self.reasons.values():
-      if reason != 'None':
-        build_reason_counts[reason] = build_reason_counts.get(reason, 0) + 1
-
-    unique_blames = set()
-    for blames in self.blames.itervalues():
-      unique_blames.update(blames)
-
-    unique_cl_blames = {blame for blame in unique_blames if
-                        EXTERNAL_CL_BASE_URL in blame}
-
-    submitted_changes = {k: v for k, v, in self.per_cl_actions.iteritems()
-                         if any(a.action == constants.CL_ACTION_SUBMITTED
-                                for a in v)}
-    submitted_patches = {
-        k: v for k, v, in self.per_patch_actions.iteritems()
-        if any(a.action == constants.CL_ACTION_SUBMITTED and
-               a.build_config == constants.CQ_MASTER for a in v)}
-
-    patch_handle_times = [
-        clactions.GetCLHandlingTime(patch, actions) for
-        (patch, actions) in submitted_patches.iteritems()]
-
-    pre_cq_handle_times = [
-        clactions.GetPreCQTime(patch, actions) for
-        (patch, actions) in submitted_patches.iteritems()]
-
-    cq_wait_times = [
-        clactions.GetCQWaitTime(patch, actions) for
-        (patch, actions) in submitted_patches.iteritems()]
-
-    cq_handle_times = [
-        clactions.GetCQRunTime(patch, actions) for
-        (patch, actions) in submitted_patches.iteritems()]
-
-    # Count CLs that were rejected, then a subsequent patch was submitted.
-    # These are good candidates for bad CLs. We track them in a dict, setting
-    # submitted_after_new_patch[bot_type][patch] = actions for each bad patch.
-    submitted_after_new_patch = {}
-    for x in self.ClassifyRejections(submitted_changes):
-      change, actions, a, falsely_rejected = x
-      if not falsely_rejected:
-        d = submitted_after_new_patch.setdefault(self.BotType(a), {})
-        d[change] = actions
-
-    # Sort the candidate bad CLs in order of submit time.
-    bad_cl_candidates = {}
-    for bot_type, patch_actions in submitted_after_new_patch.items():
-      bad_cl_candidates[bot_type] = [
-          k for k, _ in sorted(patch_actions.items(),
-                               key=lambda x: x[1][-1].timestamp)]
-
-    # Calculate how many good patches were falsely rejected and why.
-    # good_patch_rejections maps patches to the rejection actions.
-    # patch_reason_counts maps failure reasons to counts.
-    # patch_blame_counts maps blame targets to counts.
-    good_patch_rejections = self.GoodPatchRejections(submitted_changes)
-    patch_reason_counts = {}
-    patch_blame_counts = {}
-    for k, v in good_patch_rejections.iteritems():
-      for a in v:
-        if a.action == constants.CL_ACTION_KICKED_OUT:
-          build_number = self.build_numbers_by_build_id.get(a.build_id)
-          if self.BotType(a) == CQ and build_number:
-            reason = self.reasons.get(build_number, 'None')
-            blames = self.blames.get(build_number, ['None'])
-            patch_reason_counts[reason] = patch_reason_counts.get(reason, 0) + 1
-            for blame in blames:
-              patch_blame_counts[blame] = patch_blame_counts.get(blame, 0) + 1
-
-    # good_patch_count: The number of good patches.
-    # good_patch_rejection_count maps the bot type (CQ or PRE_CQ) to the number
-    #   of times that bot has falsely rejected good patches.
-    good_patch_count = len(submit_actions)
-    good_patch_rejection_count = collections.defaultdict(int)
-    for k, v in good_patch_rejections.iteritems():
-      for a in v:
-        good_patch_rejection_count[self.BotType(a)] += 1
-    false_rejection_rate = self.FalseRejectionRate(good_patch_count,
-                                                   good_patch_rejection_count)
-
-    # This list counts how many times each good patch was rejected.
-    rejection_counts = [0] * (good_patch_count - len(good_patch_rejections))
-    rejection_counts += [len(x) for x in good_patch_rejections.values()]
-
-    # Break down the frequency of how many times each patch is rejected.
-    good_patch_rejection_breakdown = []
-    if rejection_counts:
-      for x in range(max(rejection_counts) + 1):
-        good_patch_rejection_breakdown.append((x, rejection_counts.count(x)))
-
-    summary = {
-        'total_cl_actions': len(self.actions),
-        'unique_cls': len(self.per_cl_actions),
-        'unique_patches': len(self.per_patch_actions),
-        'submitted_patches': len(submit_actions),
-        'rejections': len(reject_actions),
-        'submit_fails': len(sbfail_actions),
-        'good_patch_rejections': sum(rejection_counts),
-        'mean_good_patch_rejections': numpy.mean(rejection_counts),
-        'good_patch_rejection_breakdown': good_patch_rejection_breakdown,
-        'good_patch_rejection_count': dict(good_patch_rejection_count),
-        'false_rejection_rate': false_rejection_rate,
-        'median_handling_time': numpy.median(patch_handle_times),
-        self.PATCH_HANDLING_TIME_SUMMARY_KEY: patch_handle_times,
-        'bad_cl_candidates': bad_cl_candidates,
-        'unique_blames_change_count': len(unique_cl_blames),
-    }
-
-    logging.info('CQ committed %s changes', summary['submitted_patches'])
-    logging.info('CQ correctly rejected %s unique changes',
-                 summary['unique_blames_change_count'])
-    logging.info('pre-CQ and CQ incorrectly rejected %s changes a total of '
-                 '%s times (pre-CQ: %s; CQ: %s)',
-                 len(good_patch_rejections),
-                 sum(good_patch_rejection_count.values()),
-                 good_patch_rejection_count[PRE_CQ],
-                 good_patch_rejection_count[CQ])
-
-    logging.info('      Total CL actions: %d.', summary['total_cl_actions'])
-    logging.info('    Unique CLs touched: %d.', summary['unique_cls'])
-    logging.info('Unique patches touched: %d.', summary['unique_patches'])
-    logging.info('   Total CLs submitted: %d.', summary['submitted_patches'])
-    logging.info('      Total rejections: %d.', summary['rejections'])
-    logging.info(' Total submit failures: %d.', summary['submit_fails'])
-    logging.info(' Good patches rejected: %d.',
-                 len(good_patch_rejections))
-    logging.info('   Mean rejections per')
-    logging.info('            good patch: %.2f',
-                 summary['mean_good_patch_rejections'])
-    logging.info(' False rejection rate for CQ: %.1f%%',
-                 summary['false_rejection_rate'].get(CQ, 0))
-    logging.info(' False rejection rate for Pre-CQ: %.1f%%',
-                 summary['false_rejection_rate'].get(PRE_CQ, 0))
-    logging.info(' Combined false rejection rate: %.1f%%',
-                 summary['false_rejection_rate']['combined'])
-
-    for x, p in summary['good_patch_rejection_breakdown']:
-      logging.info('%d good patches were rejected %d times.', p, x)
-    logging.info('')
-    logging.info('Good patch handling time:')
-    logging.info('  10th percentile: %.2f hours',
-                 numpy.percentile(patch_handle_times, 10) / 3600.0)
-    logging.info('  25th percentile: %.2f hours',
-                 numpy.percentile(patch_handle_times, 25) / 3600.0)
-    logging.info('  50th percentile: %.2f hours',
-                 summary['median_handling_time'] / 3600.0)
-    logging.info('  75th percentile: %.2f hours',
-                 numpy.percentile(patch_handle_times, 75) / 3600.0)
-    logging.info('  90th percentile: %.2f hours',
-                 numpy.percentile(patch_handle_times, 90) / 3600.0)
-    logging.info('')
-    logging.info('Time spent in Pre-CQ:')
-    logging.info('  10th percentile: %.2f hours',
-                 numpy.percentile(pre_cq_handle_times, 10) / 3600.0)
-    logging.info('  25th percentile: %.2f hours',
-                 numpy.percentile(pre_cq_handle_times, 25) / 3600.0)
-    logging.info('  50th percentile: %.2f hours',
-                 numpy.percentile(pre_cq_handle_times, 50) / 3600.0)
-    logging.info('  75th percentile: %.2f hours',
-                 numpy.percentile(pre_cq_handle_times, 75) / 3600.0)
-    logging.info('  90th percentile: %.2f hours',
-                 numpy.percentile(pre_cq_handle_times, 90) / 3600.0)
-    logging.info('')
-    logging.info('Time spent waiting for CQ:')
-    logging.info('  10th percentile: %.2f hours',
-                 numpy.percentile(cq_wait_times, 10) / 3600.0)
-    logging.info('  25th percentile: %.2f hours',
-                 numpy.percentile(cq_wait_times, 25) / 3600.0)
-    logging.info('  50th percentile: %.2f hours',
-                 numpy.percentile(cq_wait_times, 50) / 3600.0)
-    logging.info('  75th percentile: %.2f hours',
-                 numpy.percentile(cq_wait_times, 75) / 3600.0)
-    logging.info('  90th percentile: %.2f hours',
-                 numpy.percentile(cq_wait_times, 90) / 3600.0)
-    logging.info('')
-    logging.info('Time spent in CQ:')
-    logging.info('  10th percentile: %.2f hours',
-                 numpy.percentile(cq_handle_times, 10) / 3600.0)
-    logging.info('  25th percentile: %.2f hours',
-                 numpy.percentile(cq_handle_times, 25) / 3600.0)
-    logging.info('  50th percentile: %.2f hours',
-                 numpy.percentile(cq_handle_times, 50) / 3600.0)
-    logging.info('  75th percentile: %.2f hours',
-                 numpy.percentile(cq_handle_times, 75) / 3600.0)
-    logging.info('  90th percentile: %.2f hours',
-                 numpy.percentile(cq_handle_times, 90) / 3600.0)
-    logging.info('')
-
-    for bot_type, patches in summary['bad_cl_candidates'].items():
-      logging.info('%d bad patch candidates were rejected by the %s',
-                   len(patches), bot_type)
-      for k in patches:
-        logging.info('Bad patch candidate in: %s', k)
-
-    fmt_fai = '  %(cnt)d failures in %(reason)s'
-    fmt_rej = '  %(cnt)d rejections due to %(reason)s'
-
-    logging.info('Reasons why good patches were rejected:')
-    self._PrintCounts(patch_reason_counts, fmt_rej)
-
-    logging.info('Bugs or CLs responsible for good patches rejections:')
-    self._PrintCounts(patch_blame_counts, fmt_rej)
-
-    logging.info('Reasons why builds failed:')
-    self._PrintCounts(build_reason_counts, fmt_fai)
-
-    super_summary.update(summary)
-    self.summary = super_summary
-    return super_summary
-
 # TODO(mtennant): Add token file support.  See upload_package_status.py.
-def _PrepareCreds(email, password=None):
+def PrepareCreds(email, password=None):
   """Return a gdata_lib.Creds object from given credentials.
 
   Args:
@@ -1425,11 +884,6 @@
     cros_build_lib.Error('You must specify --email with --save.')
     return False
 
-  # The --cl-actions option requires --email.
-  if options.cl_actions and not options.email:
-    cros_build_lib.Error('You must specify --email with --cl-actions.')
-    return False
-
   return True
 
 
@@ -1447,9 +901,6 @@
                     help='Gather stats for the Canary master.')
   mode.add_argument('--cq-slaves', action='store_true', default=False,
                     help='Gather stats for all CQ slaves.')
-  mode.add_argument('--cl-actions', action='store_true', default=False,
-                    help='Gather stats about CL actions taken by the CQ '
-                         'master')
   # TODO(mtennant): Other modes as they make sense, like maybe --release.
 
   mode = parser.add_mutually_exclusive_group(required=True)
@@ -1488,11 +939,6 @@
   mode.add_argument('--override-ss-key', action='store', default=None,
                     dest='ss_key',
                     help='Override spreadsheet key.')
-  parser.add_argument('--cred-dir', action='store', required=True,
-                      metavar='CIDB_CREDENTIALS_DIR',
-                      help='Database credentials directory with certificates '
-                           'and other connection information. Obtain your '
-                           'credentials at go/cros-cidb-admin .')
 
   return parser
 
@@ -1504,8 +950,6 @@
   if not _CheckOptions(options):
     sys.exit(1)
 
-  db = cidb.CIDBConnection(options.cred_dir)
-
   if options.end_date:
     end_date = options.end_date
   else:
@@ -1529,30 +973,18 @@
   if options.cq_master:
     stats_managers.append(
         CQMasterStats(
-            db=db,
-            ss_key=options.ss_key or CQ_SS_KEY,
-            no_sheets_version_filter=options.no_sheets_version_filter))
-
-  if options.cl_actions:
-    # CL stats manager uses the CQ spreadsheet to fetch failure reasons
-    stats_managers.append(
-        CLStats(
-            options.email,
-            db=db,
             ss_key=options.ss_key or CQ_SS_KEY,
             no_sheets_version_filter=options.no_sheets_version_filter))
 
   if options.pfq_master:
     stats_managers.append(
         PFQMasterStats(
-            db=db,
             ss_key=options.ss_key or PFQ_SS_KEY,
             no_sheets_version_filter=options.no_sheets_version_filter))
 
   if options.canary_master:
     stats_managers.append(
         CanaryMasterStats(
-            db=db,
             ss_key=options.ss_key or CANARY_SS_KEY,
             no_sheets_version_filter=options.no_sheets_version_filter))
 
@@ -1560,7 +992,7 @@
     targets = _GetSlavesOfMaster(CQ_MASTER)
     for target in targets:
       # TODO(mtennant): Add spreadsheet and/or graphite support for cq-slaves.
-      stats_managers.append(CQSlaveStats(target, db=db))
+      stats_managers.append(CQSlaveStats(target))
 
   # If options.save is set and any of the instructions include a table class,
   # or specify summary columns for upload, prepare spreadsheet creds object
@@ -1572,7 +1004,7 @@
     # TODO(mtennant): See if this can work with two-factor authentication.
     # TODO(mtennant): Eventually, we probably want to use 90-day certs to
     # run this as a cronjob on a ganeti instance.
-    creds = _PrepareCreds(options.email)
+    creds = PrepareCreds(options.email)
 
   # Now run through all the stats gathering that is requested.
   for stats_mgr in stats_managers:

diff --git a/scripts/summarize_build_stats.py b/scripts/summarize_build_stats.py
new file mode 100644
index 0000000..2c34d4d
--- /dev/null
+++ b/scripts/summarize_build_stats.py

@@ -0,0 +1,666 @@
+# Copyright 2015 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Script to summarize stats for different builds in prod."""
+
+from __future__ import print_function
+
+import collections
+import datetime
+import logging
+import numpy
+import re
+import sys
+
+from chromite.cbuildbot import cbuildbot_config
+from chromite.cbuildbot import constants
+from chromite.lib import cidb
+from chromite.lib import clactions
+from chromite.lib import commandline
+from chromite.lib import cros_build_lib
+from chromite.lib import gdata_lib
+from chromite.scripts import gather_builder_stats
+
+
+# These are the preferred base URLs we use to canonicalize bugs/CLs.
+BUGANIZER_BASE_URL = 'b/'
+GUTS_BASE_URL = 't/'
+CROS_BUG_BASE_URL = 'crbug.com/'
+INTERNAL_CL_BASE_URL = 'crosreview.com/i/'
+EXTERNAL_CL_BASE_URL = 'crosreview.com/'
+
+def _RemoveBuildsWithNoBuildId(builds, description):
+  """Remove builds without a build_id.
+
+  They happen during some failure modes, but we can't process them, and
+  they don't affect the stats. We do log a warning message if any were
+  filtered.
+
+  Args:
+    builds: List of metadata_lib.BuildData objects.
+    description: A string describing the source of missing build_ids.
+  """
+  result = [b for b in builds if 'build_id' in b.metadata_dict]
+  filtered = len(builds) - len(result)
+
+  if filtered:
+    logging.warn('Found %d %s builds without a build_id.',
+                 filtered, description)
+
+  return result
+
+
+class CLStats(gather_builder_stats.StatsManager):
+  """Manager for stats about CL actions taken by the Commit Queue."""
+  PATCH_HANDLING_TIME_SUMMARY_KEY = 'patch_handling_time'
+  SUMMARY_SPREADSHEET_COLUMNS = {
+      PATCH_HANDLING_TIME_SUMMARY_KEY: ('PatchHistogram', 1)}
+  COL_FAILURE_CATEGORY = 'failure category'
+  COL_FAILURE_BLAME = 'bug or bad CL'
+  REASON_BAD_CL = 'Bad CL'
+  BOT_TYPE = constants.CQ
+  GET_SHEETS_VERSION = False
+
+  def __init__(self, email, db, **kwargs):
+    super(CLStats, self).__init__(constants.CQ_MASTER, **kwargs)
+    self.actions = []
+    self.per_patch_actions = {}
+    self.per_cl_actions = {}
+    self.email = email
+    self.db = db
+    self.reasons = {}
+    self.blames = {}
+    self.summary = {}
+    self.build_numbers_by_build_id = {}
+
+  def GatherFailureReasons(self, creds):
+    """Gather the reasons why our builds failed and the blamed bugs or CLs.
+
+    Args:
+      creds: A gdata_lib.Creds object.
+    """
+    data_table = gather_builder_stats.CQMasterStats.TABLE_CLASS()
+    uploader = gather_builder_stats.SSUploader(creds, self.ss_key)
+    ss_failure_category = gdata_lib.PrepColNameForSS(self.COL_FAILURE_CATEGORY)
+    ss_failure_blame = gdata_lib.PrepColNameForSS(self.COL_FAILURE_BLAME)
+    rows = uploader.GetRowCacheByCol(data_table.WORKSHEET_NAME,
+                                     data_table.COL_BUILD_NUMBER)
+    for b in self.builds:
+      try:
+        row = rows[str(b.build_number)]
+      except KeyError:
+        self.reasons[b.build_number] = 'None'
+        self.blames[b.build_number] = []
+      else:
+        self.reasons[b.build_number] = str(row[ss_failure_category])
+        self.blames[b.build_number] = self.ProcessBlameString(
+            str(row[ss_failure_blame]))
+
+  @staticmethod
+  def ProcessBlameString(blame_string):
+    """Parse a human-created |blame_string| from the spreadsheet.
+
+    Returns:
+      A list of canonicalized URLs for bugs or CLs that appear in the blame
+      string. Canonicalized form will be 'crbug.com/1234',
+      'crosreview.com/1234', 'b/1234', 't/1234', or 'crosreview.com/i/1234' as
+      applicable.
+    """
+    urls = []
+    tokens = blame_string.split()
+
+    # Format to generate the regex patterns. Matches one of provided domain
+    # names, followed by lazy wildcard, followed by greedy digit wildcard,
+    # followed by optional slash and optional comma.
+    general_regex = r'^.*(%s).*?([0-9]+)/?,?$'
+
+    crbug = general_regex % r'crbug.com|code.google.com'
+    internal_review = general_regex % (
+        r'chrome-internal-review.googlesource.com|crosreview.com/i')
+    external_review = general_regex % (
+        r'crosreview.com|chromium-review.googlesource.com')
+    guts = general_regex % r't/|gutsv\d.corp.google.com/#ticket/'
+
+    # Buganizer regex is different, as buganizer urls do not end with the bug
+    # number.
+    buganizer = r'^.*(b/|b.corp.google.com/issue\?id=)([0-9]+).*$'
+
+    # Patterns need to be tried in a specific order -- internal review needs
+    # to be tried before external review, otherwise urls like crosreview.com/i
+    # will be incorrectly parsed as external.
+    patterns = [crbug,
+                internal_review,
+                external_review,
+                buganizer,
+                guts]
+    url_patterns = [CROS_BUG_BASE_URL,
+                    INTERNAL_CL_BASE_URL,
+                    EXTERNAL_CL_BASE_URL,
+                    BUGANIZER_BASE_URL,
+                    GUTS_BASE_URL]
+
+    for t in tokens:
+      for p, u in zip(patterns, url_patterns):
+        m = re.match(p, t)
+        if m:
+          urls.append(u + m.group(2))
+          break
+
+    return urls
+
+  def Gather(self, start_date, end_date, sort_by_build_number=True,
+             starting_build_number=0, creds=None):
+    """Fetches build data and failure reasons.
+
+    Args:
+      start_date: A datetime.date instance for the earliest build to
+                  examine.
+      end_date: A datetime.date instance for the latest build to
+                examine.
+      sort_by_build_number: Optional boolean. If True, builds will be
+                            sorted by build number.
+      starting_build_number: The lowest build number from the CQ to include in
+                             the results.
+      creds: Login credentials as returned by gather_builder_stats.PrepareCreds.
+          (optional)
+    """
+    if not creds:
+      creds = gather_builder_stats.PrepareCreds(self.email)
+    super(CLStats, self).Gather(start_date,
+                                end_date,
+                                sort_by_build_number=sort_by_build_number,
+                                starting_build_number=starting_build_number)
+    self.actions = self.db.GetActionHistory(start_date, end_date)
+    self.GatherFailureReasons(creds)
+
+    # Remove builds without a build_id.
+    self.builds = _RemoveBuildsWithNoBuildId(self.builds, 'CQ')
+
+    self.build_numbers_by_build_id.update(
+        {b['build_id'] : b.build_number for b in self.builds})
+
+  def GetSubmittedPatchNumber(self, actions):
+    """Get the patch number of the final patchset submitted.
+
+    This function only makes sense for patches that were submitted.
+
+    Args:
+      actions: A list of actions for a single change.
+    """
+    submit = [a for a in actions if a.action == constants.CL_ACTION_SUBMITTED]
+    assert len(submit) == 1, \
+        'Expected change to be submitted exactly once, got %r' % submit
+    return submit[-1].patch_number
+
+  def ClassifyRejections(self, submitted_changes):
+    """Categorize rejected CLs, deciding whether the rejection was incorrect.
+
+    We figure out what patches were falsely rejected by looking for patches
+    which were later submitted unmodified after being rejected. These patches
+    are considered to be likely good CLs.
+
+    Args:
+      submitted_changes: A dict mapping submitted GerritChangeTuple objects to
+        a list of associated actions.
+
+    Yields:
+      change: The GerritChangeTuple that was rejected.
+      actions: A list of actions applicable to the CL.
+      a: The reject action that kicked out the CL.
+      falsely_rejected: Whether the CL was incorrectly rejected. A CL rejection
+        is considered incorrect if the same patch is later submitted, with no
+        changes. It's a heuristic.
+    """
+    for change, actions in submitted_changes.iteritems():
+      submitted_patch_number = self.GetSubmittedPatchNumber(actions)
+      for a in actions:
+        if a.action == constants.CL_ACTION_KICKED_OUT:
+          # If the patch wasn't picked up in the run, this means that it "failed
+          # to apply" rather than "failed to validate". Ignore it.
+          picked_up = [x for x in actions if x.build_id == a.build_id and
+                       x.patch == a.patch and
+                       x.action == constants.CL_ACTION_PICKED_UP]
+          falsely_rejected = a.patch_number == submitted_patch_number
+          if picked_up:
+            # Check whether the patch was updated after submission.
+            yield change, actions, a, falsely_rejected
+
+  def _PrintCounts(self, reasons, fmt):
+    """Print a sorted list of reasons in descending order of frequency.
+
+    Args:
+      reasons: A key/value mapping mapping the reason to the count.
+      fmt: A format string for our log message, containing %(cnt)d
+        and %(reason)s.
+    """
+    d = reasons
+    for cnt, reason in sorted(((v, k) for (k, v) in d.items()), reverse=True):
+      logging.info(fmt, dict(cnt=cnt, reason=reason))
+    if not d:
+      logging.info('  None')
+
+  def BotType(self, action):
+    """Return whether |action| applies to the CQ or PRE_CQ."""
+    build_config = action.build_config
+    if build_config.endswith('-%s' % cbuildbot_config.CONFIG_TYPE_PALADIN):
+      return constants.CQ
+    else:
+      return constants.PRE_CQ
+
+  def GoodPatchRejections(self, submitted_changes):
+    """Find good patches that were incorrectly rejected.
+
+    Args:
+      submitted_changes: A dict mapping submitted GerritChangeTuple objects to
+        a list of associated actions.
+
+    Returns:
+      A dict, where d[patch] = reject_actions for each good patch that was
+      incorrectly rejected.
+    """
+    # falsely_rejected_changes maps GerritChangeTuple objects to their actions.
+    # bad_cl_builds is a set of builds that contain a bad patch.
+    falsely_rejected_changes = {}
+    bad_cl_builds = set()
+    for x in self.ClassifyRejections(submitted_changes):
+      _, actions, a, falsely_rejected = x
+      if falsely_rejected:
+        falsely_rejected_changes[a.patch] = actions
+      elif self.BotType(a) == constants.PRE_CQ:
+        # If a developer writes a bad patch and it fails the Pre-CQ, it
+        # may cause many other patches from the same developer to be
+        # rejected. This is expected and correct behavior. Treat all of
+        # the patches in the Pre-CQ run as bad so that they don't skew our
+        # our statistics.
+        #
+        # Since we don't have a spreadsheet for the Pre-CQ, we guess what
+        # CLs were bad by looking at what patches needed to be changed
+        # before submission.
+        #
+        # NOTE: We intentionally only apply this logic to the Pre-CQ here.
+        # The CQ is different because it may have many innocent patches in
+        # a single run which should not be treated as bad.
+        bad_cl_builds.add(a.build_id)
+
+    # Make a list of candidate patches that got incorrectly rejected. We track
+    # them in a dict, setting good_patch_rejections[patch] = rejections for
+    # each patch.
+    good_patch_rejections = collections.defaultdict(list)
+    for v in falsely_rejected_changes.itervalues():
+      for a in v:
+        if (a.action == constants.CL_ACTION_KICKED_OUT and
+            a.build_id not in bad_cl_builds):
+          good_patch_rejections[a.patch].append(a)
+
+    return good_patch_rejections
+
+  def FalseRejectionRate(self, good_patch_count, good_patch_rejection_count):
+    """Calculate the false rejection ratio.
+
+    This is the chance that a good patch will be rejected by the Pre-CQ or CQ
+    in a given run.
+
+    Args:
+      good_patch_count: The number of good patches in the run.
+      good_patch_rejection_count: A dict containing the number of false
+        rejections for the CQ and PRE_CQ.
+
+    Returns:
+      A dict containing the false rejection ratios for CQ, PRE_CQ, and combined.
+    """
+    false_rejection_rate = dict()
+    for bot, rejection_count in good_patch_rejection_count.iteritems():
+      false_rejection_rate[bot] = (
+          rejection_count * 100 / (rejection_count + good_patch_count)
+      )
+    false_rejection_rate['combined'] = 0
+    if good_patch_count:
+      rejection_count = sum(good_patch_rejection_count.values())
+      false_rejection_rate['combined'] = (
+          rejection_count * 100. / (good_patch_count + rejection_count)
+      )
+    return false_rejection_rate
+
+  def Summarize(self):
+    """Process, print, and return a summary of cl action statistics.
+
+    As a side effect, save summary to self.summary.
+
+    Returns:
+      A dictionary summarizing the statistics.
+    """
+    super_summary = super(CLStats, self).Summarize()
+
+    (self.per_patch_actions,
+     self.per_cl_actions) = self.CollateActions(self.actions)
+
+    submit_actions = [a for a in self.actions
+                      if a.action == constants.CL_ACTION_SUBMITTED]
+    reject_actions = [a for a in self.actions
+                      if a.action == constants.CL_ACTION_KICKED_OUT]
+    sbfail_actions = [a for a in self.actions
+                      if a.action == constants.CL_ACTION_SUBMIT_FAILED]
+
+    build_reason_counts = {}
+    for reason in self.reasons.values():
+      if reason != 'None':
+        build_reason_counts[reason] = build_reason_counts.get(reason, 0) + 1
+
+    unique_blames = set()
+    for blames in self.blames.itervalues():
+      unique_blames.update(blames)
+
+    unique_cl_blames = {blame for blame in unique_blames if
+                        EXTERNAL_CL_BASE_URL in blame}
+
+    submitted_changes = {k: v for k, v, in self.per_cl_actions.iteritems()
+                         if any(a.action == constants.CL_ACTION_SUBMITTED
+                                for a in v)}
+    submitted_patches = {
+        k: v for k, v, in self.per_patch_actions.iteritems()
+        if any(a.action == constants.CL_ACTION_SUBMITTED and
+               a.build_config == constants.CQ_MASTER for a in v)}
+
+    patch_handle_times = [
+        clactions.GetCLHandlingTime(patch, actions) for
+        (patch, actions) in submitted_patches.iteritems()]
+
+    pre_cq_handle_times = [
+        clactions.GetPreCQTime(patch, actions) for
+        (patch, actions) in submitted_patches.iteritems()]
+
+    cq_wait_times = [
+        clactions.GetCQWaitTime(patch, actions) for
+        (patch, actions) in submitted_patches.iteritems()]
+
+    cq_handle_times = [
+        clactions.GetCQRunTime(patch, actions) for
+        (patch, actions) in submitted_patches.iteritems()]
+
+    # Count CLs that were rejected, then a subsequent patch was submitted.
+    # These are good candidates for bad CLs. We track them in a dict, setting
+    # submitted_after_new_patch[bot_type][patch] = actions for each bad patch.
+    submitted_after_new_patch = {}
+    for x in self.ClassifyRejections(submitted_changes):
+      change, actions, a, falsely_rejected = x
+      if not falsely_rejected:
+        d = submitted_after_new_patch.setdefault(self.BotType(a), {})
+        d[change] = actions
+
+    # Sort the candidate bad CLs in order of submit time.
+    bad_cl_candidates = {}
+    for bot_type, patch_actions in submitted_after_new_patch.items():
+      bad_cl_candidates[bot_type] = [
+          k for k, _ in sorted(patch_actions.items(),
+                               key=lambda x: x[1][-1].timestamp)]
+
+    # Calculate how many good patches were falsely rejected and why.
+    # good_patch_rejections maps patches to the rejection actions.
+    # patch_reason_counts maps failure reasons to counts.
+    # patch_blame_counts maps blame targets to counts.
+    good_patch_rejections = self.GoodPatchRejections(submitted_changes)
+    patch_reason_counts = {}
+    patch_blame_counts = {}
+    for k, v in good_patch_rejections.iteritems():
+      for a in v:
+        if a.action == constants.CL_ACTION_KICKED_OUT:
+          build_number = self.build_numbers_by_build_id.get(a.build_id)
+          if self.BotType(a) == constants.CQ and build_number:
+            reason = self.reasons.get(build_number, 'None')
+            blames = self.blames.get(build_number, ['None'])
+            patch_reason_counts[reason] = patch_reason_counts.get(reason, 0) + 1
+            for blame in blames:
+              patch_blame_counts[blame] = patch_blame_counts.get(blame, 0) + 1
+
+    # good_patch_count: The number of good patches.
+    # good_patch_rejection_count maps the bot type (CQ or PRE_CQ) to the number
+    #   of times that bot has falsely rejected good patches.
+    good_patch_count = len(submit_actions)
+    good_patch_rejection_count = collections.defaultdict(int)
+    for k, v in good_patch_rejections.iteritems():
+      for a in v:
+        good_patch_rejection_count[self.BotType(a)] += 1
+    false_rejection_rate = self.FalseRejectionRate(good_patch_count,
+                                                   good_patch_rejection_count)
+
+    # This list counts how many times each good patch was rejected.
+    rejection_counts = [0] * (good_patch_count - len(good_patch_rejections))
+    rejection_counts += [len(x) for x in good_patch_rejections.values()]
+
+    # Break down the frequency of how many times each patch is rejected.
+    good_patch_rejection_breakdown = []
+    if rejection_counts:
+      for x in range(max(rejection_counts) + 1):
+        good_patch_rejection_breakdown.append((x, rejection_counts.count(x)))
+
+    summary = {
+        'total_cl_actions': len(self.actions),
+        'unique_cls': len(self.per_cl_actions),
+        'unique_patches': len(self.per_patch_actions),
+        'submitted_patches': len(submit_actions),
+        'rejections': len(reject_actions),
+        'submit_fails': len(sbfail_actions),
+        'good_patch_rejections': sum(rejection_counts),
+        'mean_good_patch_rejections': numpy.mean(rejection_counts),
+        'good_patch_rejection_breakdown': good_patch_rejection_breakdown,
+        'good_patch_rejection_count': dict(good_patch_rejection_count),
+        'false_rejection_rate': false_rejection_rate,
+        'median_handling_time': numpy.median(patch_handle_times),
+        self.PATCH_HANDLING_TIME_SUMMARY_KEY: patch_handle_times,
+        'bad_cl_candidates': bad_cl_candidates,
+        'unique_blames_change_count': len(unique_cl_blames),
+    }
+
+    logging.info('CQ committed %s changes', summary['submitted_patches'])
+    logging.info('CQ correctly rejected %s unique changes',
+                 summary['unique_blames_change_count'])
+    logging.info('pre-CQ and CQ incorrectly rejected %s changes a total of '
+                 '%s times (pre-CQ: %s; CQ: %s)',
+                 len(good_patch_rejections),
+                 sum(good_patch_rejection_count.values()),
+                 good_patch_rejection_count[constants.PRE_CQ],
+                 good_patch_rejection_count[constants.CQ])
+
+    logging.info('      Total CL actions: %d.', summary['total_cl_actions'])
+    logging.info('    Unique CLs touched: %d.', summary['unique_cls'])
+    logging.info('Unique patches touched: %d.', summary['unique_patches'])
+    logging.info('   Total CLs submitted: %d.', summary['submitted_patches'])
+    logging.info('      Total rejections: %d.', summary['rejections'])
+    logging.info(' Total submit failures: %d.', summary['submit_fails'])
+    logging.info(' Good patches rejected: %d.',
+                 len(good_patch_rejections))
+    logging.info('   Mean rejections per')
+    logging.info('            good patch: %.2f',
+                 summary['mean_good_patch_rejections'])
+    logging.info(' False rejection rate for CQ: %.1f%%',
+                 summary['false_rejection_rate'].get(constants.CQ, 0))
+    logging.info(' False rejection rate for Pre-CQ: %.1f%%',
+                 summary['false_rejection_rate'].get(constants.PRE_CQ, 0))
+    logging.info(' Combined false rejection rate: %.1f%%',
+                 summary['false_rejection_rate']['combined'])
+
+    for x, p in summary['good_patch_rejection_breakdown']:
+      logging.info('%d good patches were rejected %d times.', p, x)
+    logging.info('')
+    logging.info('Good patch handling time:')
+    logging.info('  10th percentile: %.2f hours',
+                 numpy.percentile(patch_handle_times, 10) / 3600.0)
+    logging.info('  25th percentile: %.2f hours',
+                 numpy.percentile(patch_handle_times, 25) / 3600.0)
+    logging.info('  50th percentile: %.2f hours',
+                 summary['median_handling_time'] / 3600.0)
+    logging.info('  75th percentile: %.2f hours',
+                 numpy.percentile(patch_handle_times, 75) / 3600.0)
+    logging.info('  90th percentile: %.2f hours',
+                 numpy.percentile(patch_handle_times, 90) / 3600.0)
+    logging.info('')
+    logging.info('Time spent in Pre-CQ:')
+    logging.info('  10th percentile: %.2f hours',
+                 numpy.percentile(pre_cq_handle_times, 10) / 3600.0)
+    logging.info('  25th percentile: %.2f hours',
+                 numpy.percentile(pre_cq_handle_times, 25) / 3600.0)
+    logging.info('  50th percentile: %.2f hours',
+                 numpy.percentile(pre_cq_handle_times, 50) / 3600.0)
+    logging.info('  75th percentile: %.2f hours',
+                 numpy.percentile(pre_cq_handle_times, 75) / 3600.0)
+    logging.info('  90th percentile: %.2f hours',
+                 numpy.percentile(pre_cq_handle_times, 90) / 3600.0)
+    logging.info('')
+    logging.info('Time spent waiting for CQ:')
+    logging.info('  10th percentile: %.2f hours',
+                 numpy.percentile(cq_wait_times, 10) / 3600.0)
+    logging.info('  25th percentile: %.2f hours',
+                 numpy.percentile(cq_wait_times, 25) / 3600.0)
+    logging.info('  50th percentile: %.2f hours',
+                 numpy.percentile(cq_wait_times, 50) / 3600.0)
+    logging.info('  75th percentile: %.2f hours',
+                 numpy.percentile(cq_wait_times, 75) / 3600.0)
+    logging.info('  90th percentile: %.2f hours',
+                 numpy.percentile(cq_wait_times, 90) / 3600.0)
+    logging.info('')
+    logging.info('Time spent in CQ:')
+    logging.info('  10th percentile: %.2f hours',
+                 numpy.percentile(cq_handle_times, 10) / 3600.0)
+    logging.info('  25th percentile: %.2f hours',
+                 numpy.percentile(cq_handle_times, 25) / 3600.0)
+    logging.info('  50th percentile: %.2f hours',
+                 numpy.percentile(cq_handle_times, 50) / 3600.0)
+    logging.info('  75th percentile: %.2f hours',
+                 numpy.percentile(cq_handle_times, 75) / 3600.0)
+    logging.info('  90th percentile: %.2f hours',
+                 numpy.percentile(cq_handle_times, 90) / 3600.0)
+    logging.info('')
+
+    for bot_type, patches in summary['bad_cl_candidates'].items():
+      logging.info('%d bad patch candidates were rejected by the %s',
+                   len(patches), bot_type)
+      for k in patches:
+        logging.info('Bad patch candidate in: CL:%s%s',
+                     constants.INTERNAL_CHANGE_PREFIX
+                     if k.internal else constants.EXTERNAL_CHANGE_PREFIX,
+                     k.gerrit_number)
+
+    fmt_fai = '  %(cnt)d failures in %(reason)s'
+    fmt_rej = '  %(cnt)d rejections due to %(reason)s'
+
+    logging.info('Reasons why good patches were rejected:')
+    self._PrintCounts(patch_reason_counts, fmt_rej)
+
+    logging.info('Bugs or CLs responsible for good patches rejections:')
+    self._PrintCounts(patch_blame_counts, fmt_rej)
+
+    logging.info('Reasons why builds failed:')
+    self._PrintCounts(build_reason_counts, fmt_fai)
+
+    super_summary.update(summary)
+    self.summary = super_summary
+    return super_summary
+
+
+def _CheckOptions(options):
+  # Ensure that specified start date is in the past.
+  now = datetime.datetime.now()
+  if options.start_date and now.date() < options.start_date:
+    cros_build_lib.Error('Specified start date is in the future: %s',
+                         options.start_date)
+    return False
+
+  return True
+
+
+def GetParser():
+  """Creates the argparse parser."""
+  parser = commandline.ArgumentParser(description=__doc__)
+
+  # Put options that control the mode of script into mutually exclusive group.
+  group = parser.add_argument_group('Script mode. (Choose one)')
+  mode = group.add_mutually_exclusive_group(required=True)
+  mode.add_argument('--cl-actions', action='store_true', default=False,
+                    help='Summarize stats about CL actions taken by the CQ '
+                         'master')
+
+  ex_group = parser.add_mutually_exclusive_group(required=True)
+  ex_group.add_argument('--start-date', action='store', type='date',
+                        default=None,
+                        help='Limit scope to a start date in the past.')
+  ex_group.add_argument('--past-month', action='store_true', default=False,
+                        help='Limit scope to the past 30 days up to now.')
+  ex_group.add_argument('--past-week', action='store_true', default=False,
+                        help='Limit scope to the past week up to now.')
+  ex_group.add_argument('--past-day', action='store_true', default=False,
+                        help='Limit scope to the past day up to now.')
+
+  parser.add_argument('--cred-dir', action='store', required=True,
+                      metavar='CIDB_CREDENTIALS_DIR',
+                      help='Database credentials directory with certificates '
+                           'and other connection information. Obtain your '
+                           'credentials at go/cros-cidb-admin .')
+  parser.add_argument('--starting-build', action='store', type=int, default=0,
+                      help='Filter to builds after given number (inclusive).')
+  parser.add_argument('--end-date', action='store', type='date', default=None,
+                      help='Limit scope to an end date in the past.')
+
+  # TODO(pprabhu) Remove the following options once we move off of
+  # gather_builder_stats.
+  group = parser.add_argument_group('Deprecated options. Will disappear soon')
+  group.add_argument('--email', action='store', type=str, default=None,
+                     help='Specify email for Google Sheets account to use.')
+  group.add_argument('--override-ss-key', action='store', default=None,
+                     dest='ss_key', help='Override spreadsheet key.')
+  group.add_argument('--no-sheets-version-filter', action='store_true',
+                     default=False,
+                     help='Upload all parsed metadata to spreasheet regardless '
+                          'of sheets version.')
+
+  return parser
+
+
+def main(argv):
+  parser = GetParser()
+  options = parser.parse_args(argv)
+
+  if not _CheckOptions(options):
+    sys.exit(1)
+
+  db = cidb.CIDBConnection(options.cred_dir)
+
+  if options.end_date:
+    end_date = options.end_date
+  else:
+    end_date = datetime.datetime.now().date()
+
+  # Determine the start date to use, which is required.
+  if options.start_date:
+    start_date = options.start_date
+  else:
+    assert options.past_month or options.past_week or options.past_day
+    if options.past_month:
+      start_date = end_date - datetime.timedelta(days=30)
+    elif options.past_week:
+      start_date = end_date - datetime.timedelta(days=7)
+    else:
+      start_date = end_date - datetime.timedelta(days=1)
+
+  # TODO(pprabhu) Remove this once we remove the dependence on spreadsheet.
+  creds = gather_builder_stats.PrepareCreds(options.email)
+
+  # Prepare the rounds of stats gathering to do.
+  stats_managers = []
+
+  if options.cl_actions:
+    # CL stats manager uses the CQ spreadsheet to fetch failure reasons
+    stats_managers.append(
+        CLStats(
+            options.email,
+            db=db,
+            ss_key=options.ss_key or gather_builder_stats.CQ_SS_KEY,
+            no_sheets_version_filter=options.no_sheets_version_filter))
+
+  # Now run through all the stats gathering that is requested.
+  for stats_mgr in stats_managers:
+    stats_mgr.Gather(start_date, end_date,
+                     starting_build_number=options.starting_build,
+                     creds=creds)
+    stats_mgr.Summarize()
+    cros_build_lib.Info('Finished with %s.\n\n', stats_mgr.config_target)

diff --git a/scripts/gather_builder_stats_unittest b/scripts/summarize_build_stats_unittest
similarity index 100%
rename from scripts/gather_builder_stats_unittest
rename to scripts/summarize_build_stats_unittest


diff --git a/scripts/gather_builder_stats_unittest.py b/scripts/summarize_build_stats_unittest.py
similarity index 94%
rename from scripts/gather_builder_stats_unittest.py
rename to scripts/summarize_build_stats_unittest.py
index a53fb53..aae4763 100644
--- a/scripts/gather_builder_stats_unittest.py
+++ b/scripts/summarize_build_stats_unittest.py

@@ -2,7 +2,7 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
-"""Unit tests for gather_builder_stats."""
+"""Unit tests for summarize_build_stats."""
 
 from __future__ import print_function
 
@@ -16,11 +16,12 @@
 from chromite.lib import cros_test_lib
 from chromite.lib import fake_cidb
 from chromite.scripts import gather_builder_stats
+from chromite.scripts import summarize_build_stats
 from chromite.cbuildbot import metadata_lib
 from chromite.cbuildbot import constants
 
 
-REASON_BAD_CL = gather_builder_stats.CLStats.REASON_BAD_CL
+REASON_BAD_CL = summarize_build_stats.CLStats.REASON_BAD_CL
 CQ = constants.CQ
 PRE_CQ = constants.PRE_CQ
 
@@ -35,17 +36,17 @@
     bad_build = metadata_lib.BuildData('url', {})
 
     # Test No Builds.
-    result = gather_builder_stats._RemoveBuildsWithNoBuildId(
+    result = summarize_build_stats._RemoveBuildsWithNoBuildId(
         [], 'Test No Builds')
     self.assertEqual(result, [])
 
     # Test Good Builds.
-    result = gather_builder_stats._RemoveBuildsWithNoBuildId(
+    result = summarize_build_stats._RemoveBuildsWithNoBuildId(
         [good_build1, good_build2], 'Test No Builds')
     self.assertEqual(result, [good_build1, good_build2])
 
     # Test No Builds.
-    result = gather_builder_stats._RemoveBuildsWithNoBuildId(
+    result = summarize_build_stats._RemoveBuildsWithNoBuildId(
         [good_build1, good_build2, bad_build], 'Test No Builds')
     self.assertEqual(result, [good_build1, good_build2])
 
@@ -196,11 +197,10 @@
       cq_builddata = self._getTestBuildData(cq=True)
       stack.Add(mock.patch.object, gather_builder_stats.StatsManager,
                 '_FetchBuildData', side_effect=[cq_builddata, pre_cq_builddata])
-      stack.Add(mock.patch.object, gather_builder_stats, '_PrepareCreds')
-      stack.Add(mock.patch.object, gather_builder_stats.CLStats,
+      stack.Add(mock.patch.object, gather_builder_stats, 'PrepareCreds')
+      stack.Add(mock.patch.object, summarize_build_stats.CLStats,
                 'GatherFailureReasons')
-      cl_stats = gather_builder_stats.CLStats('foo@bar.com')
-      cl_stats.db = self.fake_db
+      cl_stats = summarize_build_stats.CLStats('foo@bar.com', self.fake_db)
       cl_stats.Gather(datetime.date.today(), datetime.date.today())
       cl_stats.reasons = {1: '', 2: '', 3: REASON_BAD_CL, 4: REASON_BAD_CL}
       cl_stats.blames = {1: '', 2: '', 3: 'crosreview.com/1',
@@ -258,5 +258,5 @@
                 'crosreview.com/i/6789',
                 't/1234',
                 't/4321']
-    self.assertEqual(gather_builder_stats.CLStats.ProcessBlameString(blame),
+    self.assertEqual(summarize_build_stats.CLStats.ProcessBlameString(blame),
                      expected)
commit	67aa844382722a987e434615aac937d330f78191	[log] [tgz]
author	Prathmesh Prabhu <pprabhu@chromium.org>	Tue Feb 03 14:26:48 2015 -0800
committer	ChromeOS Commit Bot <chromeos-commit-bot@chromium.org>	Thu Feb 12 00:41:32 2015 +0000
tree	25c758212907e1220dfa65408c9cdd2298819012
parent	5cc788c5c031341aa9925924ceb65bad680e1b35 [diff]