# Copyright 2016 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""This script scans through all canary builds in gs://chromeos-releases/.

Classify what type of build it was.
"""

from __future__ import print_function

import re

from chromite.cbuildbot import constants
from chromite.lib import cros_logging as logging
from chromite.lib import git

RELEASE = 'RELEASE'
FIRMWARE = 'FIRMWARE'
FACTORY = 'FACTORY'


#
# This section of the library is for locating purge candidates in
#   gs://chromeos-releases.
#

class ParseException(Exception):
  """Raised when something fails to parse."""


def ParseBranchName(branch):
  """Parse a firmware/factory branch name.

  Given a branch name (possibly a remote branch) of a firmware or factory
  branch, extract the version number it was branched from.

  'origin/factory-veyron-7505.B' -> '7505'
  'factory-2723.14.B' -> '2723.14'

  By convention, this extracted version number is a partial version that
  identifies the build branched from. For example, '7505' was branched from
  '7505.0.0'. '2723.14' from '2723.14.0'.

  Any build which starts with matching version numbers (except the build
  branched from) will have been built from this branch. If multiple branches
  were taken from the same root build, then we will have collisions. That
  doesn't matter since the build artifacts would be stored in locations that
  would also collide.

  Args:
    branch: Branch name as string.

  Returns:
    version as string.

  Raises:
    ParseException if the branch isn't a conventially named firmware/factory
    branch.
  """
  PATTERN = r'(factory|firmware)-([a-zA-Z_]+-)?([0-9\.]+)\.B$'
  m = re.search(PATTERN, branch)
  if not m or not m.group(3):
    raise ParseException('Unable to parse branch name: "%s"' % branch)
  return m.group(3)


def ProtectedBranchVersions(remote_branches):
  """Get a list of all protected branch versions.

  This returns a list of all branch versions that appear to be 'protected'
  meaning they are either factory or firmware branches.

  Returns:
    List of branch versions as strings.
  """
  result = []
  for branch in remote_branches:
    try:
      result.append(ParseBranchName(branch))
    except ParseException:
      pass
  return result


def ParseChromeosReleasesBuildUri(uri):
  """Parse a build URI.

  'gs://chromeos-releases/canary-channel/duck/6652.0.0/' -> '6652.0.0'

  Args:
    uri: string describing a build URI.

  Returns:
    version as string.

  Raises:
    ParseException if the URI does not describe a build output directory.
  """
  PATTERN = (r'^gs://.*/.*/.*/([0-9\.]+)/$')
  m = re.match(PATTERN, uri)
  if not m or not m.group(1):
    raise ParseException('Unable to parse build uri: "%s"' % uri)
  return m.group(1)


def VersionBranchMatch(version, branch):
  """Does the given version match the branch description?

  Given a branch version of '1', then '1.2.0' or '1.2.3' were built from this
  branch and will match.

  However, '1.0.0' is the build the branch was built from, and so '1.0.0' was
  NOT built on the branch, and does not match.

  See ParseBranchName for a fuller explaination of branch versions.

  Args:
    version: As a string ('1.2.3').
    branch: As a string for a partial version that was branched from.

  Returns:
    boolean telling if the version is part of the branch.
  """
  version_parts = [int(n) for n in version.split('.')]
  branch_parts = [int(n) for n in branch.split('.')]
  branch_len = len(branch_parts)

  if len(version_parts) <= branch_len:
    return False

  if version_parts[:branch_len] != branch_parts:
    return False

  # If the first digit matching the branch branch is zero, we are the build
  # branched from, not on the branch.
  if version_parts[branch_len] == 0:
    return False

  return True


def InBranches(version, branch_versions):
  """Does a specific build match any of the given parse branch names?

  Args:
    board: Board name of the build.
    version: Version of the build.
    branch_versions: List of parsed branch names.

  Returns:
    boolean telling if there was a match.
  """
  return any(VersionBranchMatch(version, b_version)
             for b_version in branch_versions)


def ListRemoteBranches():
  """Get a list of all remote branches for the chromite repository.

  Returns:
    List of branch names as strings.
  """
  ret = git.RunGit(constants.CHROMITE_DIR, ['branch', '-lr'])
  return [l.strip() for l in ret.output.splitlines()]


def SafeList(ctx, url):
  """Get a GS listing with details enabled.

  Ignore most any error. This is because GS flake can trigger all sorts of
  random failures, and we don't want flake to interrupt a multi-day script run.
  It is generally safe to return [] since any files that would have been
  discovered will be presumed to not exist, and so ignored during the current
  cleanup pass.

  Also, this script is convenient for mocking out results in unittests.
  """
  try:
    return ctx.List(url, details=True)
  except Exception as e:
    # We can fail for lots of repeated random reasons.
    logging.warn('List of "%s" failed, ignoring: "%s"', url, e)
    return []


def LocateChromeosReleasesProtectedPrefixes(ctx, protected_branches):
  """Find all prefixes in gs://chromeos-releases to exclude.

  This determines locations to be preserved when considering files to remove.

  We never cleanup dev, beta, stable channel builds, or signer logs/operational
  directories. We also preverve the Attic (for now) since we don't have file
  permissions to clean it up.

  We look at all builds in the canary channel, and mark them for preservation if
  they were built on a firmware or factory branch. We determine the branch by
  using the version numbers of the branch and the build.

  We preserve the firmware/factory builds, since they are the builds which
  produced binaries which may be used for long periods of time.

  Args:
    ctx: GS context.
    protected_branches: List of branch versions as strings.

  Returns:
    Returns an iterator of URL prefixes to exclude.
  """
  result = [
      'gs://chromeos-releases/Attic',
      'gs://chromeos-releases/stable-channel',
      'gs://chromeos-releases/beta-channel',
      'gs://chromeos-releases/dev-channel',
      'gs://chromeos-releases/logs',
      'gs://chromeos-releases/tobesigned',
  ]

  # We have to examine canary channel builds one at a time.
  boards = SafeList(ctx, 'gs://chromeos-releases/canary-channel/')
  for board in boards:
    builds = SafeList(ctx, board.url)
    for build in builds:
      try:
        version = ParseChromeosReleasesBuildUri(build.url)
        if InBranches(version, protected_branches):
          result.append(build.url)

      except ParseException:
        # Files we don't understand are purge candidates.
        logging.info('Found unexpected: "%s"', build.url)
        result.append(build.url)

  return result

#
# This section is for location purge candidates in
#   gs://chromeos-image-archive/.
#

def LocateChromeosImageArchiveProtectedPrefixes(ctx):
  """Find all prefixes in gs://chromeos-image-archive to exclude.

  We look at all builder names, and protect any that are firmware builders (but
  not trybots), since these firmware builds are needed for manual FAFT tests run
  prior to releasing a new firmware version.

  Args:
    ctx: GS context.

  Returns:
    Returns an iterator of URL prefixes to exclude.
  """
  result = []
  top_levels = SafeList(ctx, 'gs://chromeos-image-archive/')

  for top_level in top_levels:
    # 'gs://chromeos-releases/Attic/' -> 'Attic'
    name = top_level.url.rstrip('/').split('/')[-1]

    # Skip non-trybot firmware builds.
    if not name.startswith('trybot-') and name.endswith('-firmware'):
      result.append(top_level.url)
  return result

#
# This section is for handling purge candidates.
#

def ProduceFilteredCandidates(ctx, root_url, prefixes, search_depth):
  """Given a root URL and a list of prefixes get a list of purge candidates.

  Args:
    ctx: GS context.
    root_url: Url of base directory to consider. IE: gs://chromeos-releases/
    prefixes: Iterable list of URLs to exclude from results.
    search_depth: Minimum directory depth of a directory result.

  Returns:
    Returns an iterable of gs.GSListResult objects for files.
  """
  def depth(url):
    return len(url.split('/'))

  prefix_regex_pattern = '|'.join([re.escape(p) for p in prefixes])
  prefix_re = re.compile(prefix_regex_pattern)

  logging.info('Examining: "%s"', root_url)

  # How many directory levels down do we have to go.
  search_depth = depth(root_url) + search_depth
  prefix_depth = max(depth(p) for p in prefixes)
  assert prefix_depth <= search_depth, (
      'prefix_depth(%s) must be <= search_depth(%s)' %
      (prefix_depth, search_depth))

  logging.debug('Using search_depth %d with prefix_depth %d',
                search_depth, prefix_depth)

  def recurse(base_url):
    for result in SafeList(ctx, base_url):
      url = result.url

      if prefix_re.match(url):
        continue

      if url.endswith('/'):
        if depth(url) < search_depth:
          # If we are not deep enough to match all possible patterns, recurse.
          for u in recurse(url):
            yield u
        else:
          for u in SafeList(ctx, url + '**'):
            yield u
      else:
        # If it's just a file, it's a result.
        yield result

  return recurse(root_url)


def ExpandCandidate(ctx, candidate):
  """Given a gs.GSListResult object, expand to a list of testable objects.

  Will return an iterable of gs.GSListResult of files with the creation_time
  attribute populated. If the object already represents a file with that
  field populated, it will be return as is.

  Directories will be expanded to all files inside the directory.

  Args:
    ctx: GS context.
    candidate: gs.GSListResult object representing a file or directory.

  Returns:
    Returns an iterable of gs.GSListResult objects.
  """
  if candidate.creation_time is not None:
    # If it's a details populated file, return as is.
    # A directory can't have a creation time.
    return [candidate]

  url = candidate.url
  if url.endswith('/'):
    # If it's a directory, return the full contents.
    url += '**'

  return SafeList(ctx, url)
