| # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Common Google Storage interface library.""" |
| |
| from __future__ import print_function |
| |
| import base64 |
| import errno |
| import os |
| import re |
| |
| from chromite.lib import cros_build_lib |
| from chromite.lib import cros_logging as logging |
| from chromite.lib import gs |
| from chromite.lib import osutils |
| from chromite.lib.paygen import filelib |
| from chromite.lib.paygen import utils |
| |
| |
| PROTOCOL = 'gs' |
| RETRY_ATTEMPTS = 2 |
| GS_LS_STATUS_RE = re.compile(r'status=(\d+)') |
| |
| # Gsutil is filled in by "FindGsUtil" on first invocation. |
| GSUTIL = None |
| |
| |
| def FindGsUtil(): |
| """Find which gsutil executuable to use. |
| |
| This may download and cache the command if needed, and will return the |
| version pinned by chromite for general use. Will cache the result after |
| the first call. |
| |
| This function is multi-process safe, but NOT THREAD SAFE. If you need |
| to use gsutil functionality in threads, call this function at least |
| once before creating the threads. That way the value will be safely |
| pre-cached. |
| |
| Returns: |
| Full path to the gsutil command to use. |
| """ |
| # TODO(dgarrett): This is a hack. Merge chromite and crostools to fix. |
| |
| global GSUTIL # pylint: disable=global-statement |
| if GSUTIL is None: |
| GSUTIL = gs.GSContext.GetDefaultGSUtilBin() |
| |
| return GSUTIL |
| |
| |
| class GsutilError(Exception): |
| """Base exception for errors where gsutil cannot be used for any reason.""" |
| |
| |
| class GsutilMissingError(GsutilError): |
| """Returned when the gsutil utility is missing from PATH.""" |
| |
| def __init__(self, msg='The gsutil utility must be installed.'): |
| GsutilError.__init__(self, msg) |
| |
| |
| class GSLibError(Exception): |
| """Raised when gsutil command runs but gives an error.""" |
| |
| |
| class CopyFail(GSLibError): |
| """Raised if Copy fails in any way.""" |
| |
| |
| class MoveFail(GSLibError): |
| """Raised if Move fails in any way.""" |
| |
| |
| class RemoveFail(GSLibError): |
| """Raised if Remove fails in any way.""" |
| |
| |
| class CatFail(GSLibError): |
| """Raised if Cat fails in any way.""" |
| |
| |
| class StatFail(GSLibError): |
| """Raised if Stat fails in any way.""" |
| |
| |
| class URIError(GSLibError): |
| """Raised when URI does not behave as expected.""" |
| |
| |
| class ValidateGsutilFailure(GSLibError): |
| """We are unable to validate that gsutil is working correctly.""" |
| |
| |
| def RetryGSLib(func): |
| """Decorator to retry function calls that throw an exception. |
| |
| If the decorated method throws a GSLibError exception, the exception |
| will be thrown away and the function will be run again until all retries |
| are exhausted. On the final attempt, the exception will be thrown normally. |
| |
| Three attempts in total will be made to run the function (one more |
| than RETRY_ATTEMPTS). |
| |
| @RetryGSLib |
| def MyFunctionHere(): pass |
| """ |
| def RetryHandler(*args, **kwargs): |
| """Retry func with given args/kwargs RETRY_ATTEMPTS times.""" |
| warning_msgs = [] |
| for i in xrange(0, RETRY_ATTEMPTS + 1): |
| try: |
| return func(*args, **kwargs) |
| except GSLibError as ex: |
| # On the last try just pass the exception on up. |
| if i >= RETRY_ATTEMPTS: |
| raise |
| |
| error_msg = str(ex) |
| RESUMABLE_ERROR_MESSAGE = ( |
| gs.GSContext.RESUMABLE_DOWNLOAD_ERROR, |
| gs.GSContext.RESUMABLE_UPLOAD_ERROR, |
| 'ResumableUploadException', |
| 'ResumableDownloadException', |
| 'ssl.SSLError: The read operation timed out', |
| ) |
| if (func.__name__ == 'Copy' and |
| any(x in error_msg for x in RESUMABLE_ERROR_MESSAGE)): |
| logging.info( |
| 'Resumable download/upload exception occured for %s', args[1]) |
| # Pass the dest_path to get the tracker filename. |
| tracker_filenames = gs.GSContext.GetTrackerFilenames(args[1]) |
| # This part of the code is copied from chromite.lib.gs with |
| # slight modifications. This is a temporary solution until |
| # we can deprecate crostools.lib.gslib (crbug.com/322740). |
| logging.info('Potential list of tracker files: %s', |
| tracker_filenames) |
| for tracker_filename in tracker_filenames: |
| tracker_file_path = os.path.join( |
| gs.GSContext.DEFAULT_GSUTIL_TRACKER_DIR, |
| tracker_filename) |
| if os.path.exists(tracker_file_path): |
| logging.info('Deleting gsutil tracker file %s before retrying.', |
| tracker_file_path) |
| logging.info('The content of the tracker file: %s', |
| osutils.ReadFile(tracker_file_path)) |
| osutils.SafeUnlink(tracker_file_path) |
| else: |
| if 'AccessDeniedException' in str(ex) or 'NoSuchKey' in str(ex): |
| raise |
| |
| # Record a warning message to be issued if a retry actually helps. |
| warning_msgs.append('Try %d failed with error message:\n%s' % |
| (i + 1, ex)) |
| else: |
| # If the func succeeded, then log any accumulated warning messages. |
| if warning_msgs: |
| logging.warning('Failed %s %d times before success:\n%s', |
| func.__name__, len(warning_msgs), |
| '\n'.join(warning_msgs)) |
| |
| RetryHandler.__module__ = func.__module__ |
| RetryHandler.__name__ = func.__name__ |
| RetryHandler.__doc__ = func.__doc__ |
| return RetryHandler |
| |
| |
| def RunGsutilCommand(args, |
| redirect_stdout=True, |
| redirect_stderr=True, |
| failed_exception=GSLibError, |
| generation=None, |
| headers=None, |
| get_headers_from_stdout=False, |
| **kwargs): |
| """Run gsutil with given args through RunCommand with given options. |
| |
| Generally this method is intended for use within this module, see the various |
| command-specific wrappers provided for convenience. However, it can be called |
| directly if 'gsutil' needs to be called in specific way. |
| |
| A few of the options for RunCommand have their default values switched for |
| this function. Those options are called out explicitly as options here, while |
| addition RunCommand options can be used through extra_run_command_opts. |
| |
| Args: |
| args: List of arguments to use with 'gsutil'. |
| redirect_stdout: Boolean option passed directly to RunCommand. |
| redirect_stderr: Boolean option passed directly to RunCommand. |
| failed_exception: Exception class to raise if CommandFailedException is |
| caught. It should be GSLibError or a subclass. |
| generation: Only run the specified command if the generation matches. |
| (See "Conditional Updates Using Object Versioning" in the gsutil docs.) |
| headers: Fill in this dictionary with header values captured from stderr. |
| get_headers_from_stdout: Whether header information is to be parsed from |
| stdout (default: stderr). |
| kwargs: Additional options to pass directly to RunCommand, beyond the |
| explicit ones above. See RunCommand itself. |
| |
| Returns: |
| Anything that RunCommand returns, which should be a CommandResult object. |
| |
| Raises: |
| GsutilMissingError is the gsutil utility cannot be found. |
| GSLibError (or whatever is in failed_exception) if RunCommand failed (and |
| error_code_ok was not True). |
| """ |
| # The -d flag causes gsutil to dump various metadata, including user |
| # credentials. We therefore don't allow users to pass it in directly. |
| assert '-d' not in args, 'Cannot pass in the -d flag directly' |
| |
| gsutil = FindGsUtil() |
| |
| if generation is not None: |
| args = ['-h', 'x-goog-if-generation-match:%s' % generation] + args |
| if headers is not None: |
| args.insert(0, '-d') |
| assert redirect_stderr |
| cmd = [gsutil] + args |
| run_opts = { |
| 'redirect_stdout': redirect_stdout, |
| 'redirect_stderr': redirect_stderr, |
| } |
| run_opts.update(kwargs) |
| |
| try: |
| result = cros_build_lib.RunCommand(cmd, **run_opts) |
| except OSError as e: |
| if e.errno == errno.ENOENT: |
| raise GsutilMissingError() |
| raise |
| except cros_build_lib.RunCommandError as e: |
| # If headers is set, we have to hide the output here because it may contain |
| # credentials that we don't want to show in buildbot logs. |
| raise failed_exception('%r failed' % cmd if headers else e.result.error) |
| |
| if headers is not None and result is not None: |
| assert redirect_stdout if get_headers_from_stdout else redirect_stderr |
| # Parse headers that look like this: |
| # header: x-goog-generation: 1359148994758000 |
| # header: x-goog-metageneration: 1 |
| headers_source = result.output if get_headers_from_stdout else result.error |
| for line in headers_source.splitlines(): |
| if line.startswith('header: '): |
| header, _, value = line.partition(': ')[-1].partition(': ') |
| headers[header.replace('x-goog-', '')] = value |
| |
| # Strip out stderr entirely to avoid showing credentials in logs; for |
| # commands that dump credentials to stdout, clobber that as well. |
| result.error = '<stripped>' |
| if get_headers_from_stdout: |
| result.output = '<stripped>' |
| |
| return result |
| |
| |
| def ValidateGsutilWorking(bucket): |
| """Validate that gsutil is working correctly. |
| |
| There is a failure mode for gsutil in which all operations fail, and this |
| is indistinguishable from all gsutil ls operations matching nothing. We |
| check that there is at least one file in the root of the bucket. |
| |
| Args: |
| bucket: bucket we are about to test. |
| |
| Raises: |
| ValidateGsutilFailure: If we are unable to find any files in the bucket. |
| """ |
| url = 'gs://%s/' % bucket |
| if not List(url): |
| raise ValidateGsutilFailure('Unable to find anything in: %s' % url) |
| |
| |
| @RetryGSLib |
| def MD5Sum(gs_uri): |
| """Read the gsutil md5 sum from etag and gsutil ls -L. |
| |
| Note that because this relies on 'gsutil ls -L' it suffers from the |
| eventual consistency issue, meaning this function could fail to find |
| the MD5 value for a recently created file in Google Storage. |
| |
| Args: |
| gs_uri: An absolute Google Storage URI that refers directly to an object. |
| No globs are supported. |
| |
| Returns: |
| A string that is an md5sum, or None if no object found. |
| |
| Raises: |
| GSLibError if the gsutil command fails. If there is no object at that path |
| that is not considered a failure. |
| """ |
| gs_md5_regex = re.compile(r'.*?Hash \(md5\):\s+(.*)', re.IGNORECASE) |
| args = ['ls', '-L', gs_uri] |
| |
| result = RunGsutilCommand(args, error_code_ok=True) |
| |
| # If object was not found then output is completely empty. |
| if not result.output: |
| return None |
| |
| for line in result.output.splitlines(): |
| match = gs_md5_regex.match(line) |
| if match: |
| # gsutil now prints the MD5 sum in base64, but we want it in hex. |
| return base64.b16encode(base64.b64decode(match.group(1))).lower() |
| |
| # This means there was some actual failure in the command. |
| raise GSLibError('Unable to determine MD5Sum for %r' % gs_uri) |
| |
| |
| @RetryGSLib |
| def Cmp(path1, path2): |
| """Return True if paths hold identical files, according to MD5 sum. |
| |
| Note that this function relies on MD5Sum, which means it also can only |
| promise eventual consistency. A recently uploaded file in Google Storage |
| may behave badly in this comparison function. |
| |
| If either file is missing then always return False. |
| |
| Args: |
| path1: URI to a file. Local paths also supported. |
| path2: URI to a file. Local paths also supported. |
| |
| Returns: |
| True if files are the same, False otherwise. |
| """ |
| md5_1 = MD5Sum(path1) if IsGsURI(path1) else filelib.MD5Sum(path1) |
| if not md5_1: |
| return False |
| |
| md5_2 = MD5Sum(path2) if IsGsURI(path2) else filelib.MD5Sum(path2) |
| |
| return md5_1 == md5_2 |
| |
| |
| @RetryGSLib |
| def Copy(src_path, dest_path, acl=None, **kwargs): |
| """Run gsutil cp src_path dest_path supporting GS globs. |
| |
| e.g. |
| gsutil cp /etc/* gs://etc/ where /etc/* is src_path with a glob and |
| gs://etc is dest_path. |
| |
| This assumes that the src or dest path already exist. |
| |
| Args: |
| src_path: The src of the path to copy, either a /unix/path or gs:// uri. |
| dest_path: The dest of the path to copy, either a /unix/path or gs:// uri. |
| acl: an ACL argument (predefined name or XML file) to pass to gsutil |
| kwargs: Additional options to pass directly to RunGsutilCommand, beyond the |
| explicit ones above. See RunGsutilCommand itself. |
| |
| Raises: |
| CopyFail: If the copy fails for any reason. |
| """ |
| args = ['cp'] |
| if acl: |
| args += ['-a', acl] |
| args += [src_path, dest_path] |
| RunGsutilCommand(args, failed_exception=CopyFail, **kwargs) |
| |
| |
| @RetryGSLib |
| def Move(src_path, dest_path, **kwargs): |
| """Run gsutil mv src_path dest_path supporting GS globs. |
| |
| Note that the created time is changed to now for the moved object(s). |
| |
| Args: |
| src_path: The src of the path to move, either a /unix/path or gs:// uri. |
| dest_path: The dest of the path to move, either a /unix/path or gs:// uri. |
| kwargs: Additional options to pass directly to RunGsutilCommand, beyond the |
| explicit ones above. See RunGsutilCommand itself. |
| |
| Raises: |
| MoveFail: If the move fails for any reason. |
| """ |
| args = ['mv', src_path, dest_path] |
| RunGsutilCommand(args, failed_exception=MoveFail, **kwargs) |
| |
| |
| @RetryGSLib |
| def Remove(*paths, **kwargs): # pylint: disable=docstring-misnamed-args |
| """Run gsutil rm on path supporting GS globs. |
| |
| Args: |
| paths: Local path or gs URI, or list of same. |
| ignore_no_match: If True, then do not complain if anything was not |
| removed because no URI match was found. Like rm -f. Defaults to False. |
| recurse: Remove recursively starting at path. Same as rm -R. Defaults |
| to False. |
| kwargs: Additional options to pass directly to RunGsutilCommand, beyond the |
| explicit ones above. See RunGsutilCommand itself. |
| |
| Raises: |
| RemoveFail: If the remove fails for any reason. |
| """ |
| ignore_no_match = kwargs.pop('ignore_no_match', False) |
| recurse = kwargs.pop('recurse', False) |
| |
| args = ['rm'] |
| |
| if recurse: |
| args.append('-R') |
| |
| args.extend(paths) |
| |
| try: |
| RunGsutilCommand(args, failed_exception=RemoveFail, **kwargs) |
| except RemoveFail as e: |
| should_raise = True |
| msg = str(e.args[0]) |
| |
| # Sometimes Google Storage glitches and complains about failing to remove a |
| # specific revision of the file. It ends up getting removed anyway, but it |
| # throws a NotFoundException. |
| if (ignore_no_match and (('No URLs matched' in msg) or |
| ('NotFoundException:' in msg))): |
| should_raise = False |
| |
| if should_raise: |
| raise |
| |
| def RemoveDirContents(gs_dir_uri): |
| """Remove all contents of a directory. |
| |
| Args: |
| gs_dir_uri: directory to delete contents of. |
| """ |
| Remove(os.path.join(gs_dir_uri, '**'), ignore_no_match=True) |
| |
| |
| def CreateWithContents(gs_uri, contents, **kwargs): |
| """Creates the specified file with specified contents. |
| |
| Args: |
| gs_uri: The URI of a file on Google Storage. |
| contents: Contents to write to the file. |
| kwargs: Additional options to pass directly to RunGsutilCommand, beyond the |
| explicit ones above. See RunGsutilCommand itself. |
| |
| Raises: |
| CopyFail: If it fails for any reason. |
| """ |
| with utils.CreateTempFileWithContents(contents) as content_file: |
| Copy(content_file.name, gs_uri, **kwargs) |
| |
| |
| @RetryGSLib |
| def Cat(gs_uri, **kwargs): |
| """Return the contents of a file at the given GS URI |
| |
| Args: |
| gs_uri: The URI of a file on Google Storage. |
| kwargs: Additional options to pass directly to RunGsutilCommand, beyond the |
| explicit ones above. See RunGsutilCommand itself. |
| |
| Raises: |
| CatFail: If the cat fails for any reason. |
| """ |
| args = ['cat', gs_uri] |
| result = RunGsutilCommand(args, failed_exception=CatFail, **kwargs) |
| return result.output |
| |
| |
| def Stat(gs_uri, **kwargs): |
| """Stats a file at the given GS URI (returns nothing). |
| |
| Args: |
| gs_uri: The URI of a file on Google Storage. |
| kwargs: Additional options to pass directly to RunGsutilCommand, beyond the |
| explicit ones above. See RunGsutilCommand itself. |
| |
| Raises: |
| StatFail: If the stat fails for any reason. |
| """ |
| args = ['stat', gs_uri] |
| # IMPORTANT! With stat, header information is dumped to standard output, |
| # rather than standard error, as with other gsutil commands. Hence, |
| # get_headers_from_stdout must be True to ensure both correct parsing of |
| # output and stripping of sensitive information. |
| RunGsutilCommand(args, failed_exception=StatFail, |
| get_headers_from_stdout=True, **kwargs) |
| |
| |
| def IsGsURI(path): |
| """Returns true if the path begins with gs:// |
| |
| Args: |
| path: An absolute Google Storage URI. |
| |
| Returns: |
| True if path is really a google storage uri that begins with gs:// |
| False otherwise. |
| """ |
| return path and path.startswith(PROTOCOL + '://') |
| |
| |
| # TODO(mtennant): Rename this "Size" for consistency. |
| @RetryGSLib |
| def FileSize(gs_uri, **kwargs): |
| """Return the size of the given gsutil file in bytes. |
| |
| Args: |
| gs_uri: Google Storage URI (beginning with 'gs://') pointing |
| directly to a single file. |
| kwargs: Additional options to pass directly to RunGsutilCommand, beyond the |
| explicit ones above. See RunGsutilCommand itself. |
| |
| Returns: |
| Size of file in bytes. |
| |
| Raises: |
| URIError: Raised when URI is unknown to Google Storage or when |
| URI matches more than one file. |
| """ |
| headers = {} |
| try: |
| Stat(gs_uri, headers=headers, **kwargs) |
| except StatFail as e: |
| raise URIError('Unable to stat file at URI %r: %s' % (gs_uri, e)) |
| |
| size_str = headers.get('stored-content-length') |
| if size_str is None: |
| raise URIError('Failed to get size of %r' % gs_uri) |
| |
| return int(size_str) |
| |
| |
| def Exists(gs_uri, **kwargs): |
| """Return True if object exists at given GS URI. |
| |
| Args: |
| gs_uri: Google Storage URI. Must be a fully-specified URI with |
| no glob expression. Even if a glob expression matches this |
| method will return False. |
| kwargs: Additional options to pass directly to RunGsutilCommand, beyond the |
| explicit ones above. See RunGsutilCommand itself. |
| |
| Returns: |
| True if gs_uri points to an existing object, and False otherwise. |
| """ |
| try: |
| Stat(gs_uri, **kwargs) |
| except StatFail: |
| return False |
| |
| return True |
| |
| |
| @RetryGSLib |
| def List(root_uri, recurse=False, filepattern=None, sort=False): |
| """Return list of file and directory paths under given root URI. |
| |
| Args: |
| root_uri: e.g. gs://foo/bar |
| recurse: Look in subdirectories, as well |
| filepattern: glob pattern to match against basename of path |
| sort: If True then do a default sort on paths |
| |
| Returns: |
| List of GS URIs to paths that matched |
| """ |
| gs_uri = root_uri |
| if recurse: |
| # In gs file patterns '**' absorbs any number of directory names, |
| # including none. |
| gs_uri = gs_uri.rstrip('/') + '/**' |
| |
| # Now match the filename itself at the end of the URI. |
| if filepattern: |
| gs_uri = gs_uri.rstrip('/') + '/' + filepattern |
| |
| args = ['ls', gs_uri] |
| |
| try: |
| result = RunGsutilCommand(args) |
| paths = [path for path in result.output.splitlines() if path] |
| |
| if sort: |
| paths = sorted(paths) |
| |
| return paths |
| |
| except GSLibError as e: |
| # The ls command will fail under normal operation if there was just |
| # nothing to be found. That shows up like this to stderr: |
| # CommandException: One or more URLs matched no objects. |
| if 'CommandException: One or more URLs matched no objects.' not in str(e): |
| raise |
| |
| # Otherwise, assume a normal error. |
| # TODO(mtennant): It would be more functionally correct to return this |
| # if and only if the error is identified as a "file not found" error. |
| # We simply have to determine how to do that reliably. |
| return [] |
| |
| |
| def ListFiles(root_uri, recurse=False, filepattern=None, sort=False): |
| """Return list of file paths under given root URI. |
| |
| Directories are intentionally excluded. |
| |
| Args: |
| root_uri: e.g. gs://foo/bar |
| recurse: Look for files in subdirectories, as well |
| filepattern: glob pattern to match against basename of file |
| sort: If True then do a default sort on paths |
| |
| Returns: |
| List of GS URIs to files that matched |
| """ |
| paths = List(root_uri, recurse=recurse, filepattern=filepattern, sort=sort) |
| |
| # Directory paths should be excluded from output, per ListFiles guarantee. |
| return [path for path in paths if not path.endswith('/')] |