| # Copyright 2013 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import re |
| |
| from recipe_engine import recipe_api |
| |
| class GSUtilApi(recipe_api.RecipeApi): |
| @property |
| def gsutil_py_path(self): |
| return self.repo_resource('gsutil.py') |
| |
| def __call__(self, cmd, name=None, use_retry_wrapper=True, version=None, |
| parallel_upload=False, multithreaded=False, infra_step=True, |
| **kwargs): |
| """A step to run arbitrary gsutil commands. |
| |
| On LUCI this should automatically use the ambient task account credentials. |
| On Buildbot, this assumes that gsutil authentication environment variables |
| (AWS_CREDENTIAL_FILE and BOTO_CONFIG) are already set, though if you want to |
| set them to something else you can always do so using the env={} kwarg. |
| |
| Note also that gsutil does its own wildcard processing, so wildcards are |
| valid in file-like portions of the cmd. See 'gsutil help wildcards'. |
| |
| Arguments: |
| |
| * cmd (List[str|Path]) - Arguments to pass to gsutil. Include gsutil-level |
| options first (see 'gsutil help options'). |
| * name (str) - Name of the step to use. Defaults to the first non-flag |
| token in the cmd. |
| """ |
| if name: |
| full_name = 'gsutil ' + name |
| else: |
| full_name = 'gsutil' # our fall-through name |
| # Find first cmd token not starting with '-' |
| for itm in cmd: |
| token = str(itm) # it could be a Path |
| if not token.startswith('-'): |
| full_name = 'gsutil ' + token |
| break |
| |
| gsutil_path = self.gsutil_py_path |
| cmd_prefix = [] |
| |
| if use_retry_wrapper: |
| # We pass the real gsutil_path to the wrapper so it doesn't have to do |
| # brittle path logic. |
| cmd_prefix = ['--', gsutil_path] |
| gsutil_path = self.resource('gsutil_smart_retry.py') |
| |
| if version: |
| cmd_prefix.extend(['--force-version', version]) |
| |
| if parallel_upload: |
| cmd_prefix.extend([ |
| '-o', |
| 'GSUtil:parallel_composite_upload_threshold=50M' |
| ]) |
| |
| if multithreaded: |
| cmd_prefix.extend(['-m']) |
| |
| if use_retry_wrapper: |
| # The -- argument for the wrapped gsutil.py is escaped as ---- as python |
| # 2.7.3 removes all occurrences of --, not only the first. It is unescaped |
| # in gsutil_wrapper.py and then passed as -- to gsutil.py. |
| # Note, that 2.7.6 doesn't have this problem, but it doesn't hurt. |
| cmd_prefix.append('----') |
| else: |
| cmd_prefix.append('--') |
| |
| return self.m.python(full_name, gsutil_path, cmd_prefix + cmd, |
| infra_step=infra_step, **kwargs) |
| |
| def upload(self, source, bucket, dest, args=None, link_name='gsutil.upload', |
| metadata=None, unauthenticated_url=False, **kwargs): |
| args = [] if args is None else args[:] |
| # Note that metadata arguments have to be passed before the command cp. |
| metadata_args = self._generate_metadata_args(metadata) |
| full_dest = 'gs://%s/%s' % (bucket, dest) |
| cmd = metadata_args + ['cp'] + args + [source, full_dest] |
| name = kwargs.pop('name', 'upload') |
| |
| result = self(cmd, name, **kwargs) |
| |
| if link_name: |
| result.presentation.links[link_name] = self._http_url( |
| bucket, dest, unauthenticated_url=unauthenticated_url) |
| return result |
| |
| def download(self, bucket, source, dest, args=None, **kwargs): |
| args = [] if args is None else args[:] |
| full_source = 'gs://%s/%s' % (bucket, source) |
| cmd = ['cp'] + args + [full_source, dest] |
| name = kwargs.pop('name', 'download') |
| return self(cmd, name, **kwargs) |
| |
| def download_url(self, url, dest, args=None, **kwargs): |
| args = args or [] |
| url = self._normalize_url(url) |
| cmd = ['cp'] + args + [url, dest] |
| name = kwargs.pop('name', 'download_url') |
| return self(cmd, name, **kwargs) |
| |
| def cat(self, url, args=None, **kwargs): |
| args = args or [] |
| url = self._normalize_url(url) |
| cmd = ['cat'] + args + [url] |
| name = kwargs.pop('name', 'cat') |
| return self(cmd, name, **kwargs) |
| |
| def copy(self, source_bucket, source, dest_bucket, dest, args=None, |
| link_name='gsutil.copy', metadata=None, unauthenticated_url=False, |
| **kwargs): |
| args = args or [] |
| args += self._generate_metadata_args(metadata) |
| full_source = 'gs://%s/%s' % (source_bucket, source) |
| full_dest = 'gs://%s/%s' % (dest_bucket, dest) |
| cmd = ['cp'] + args + [full_source, full_dest] |
| name = kwargs.pop('name', 'copy') |
| |
| result = self(cmd, name, **kwargs) |
| |
| if link_name: |
| result.presentation.links[link_name] = self._http_url( |
| dest_bucket, dest, unauthenticated_url=unauthenticated_url) |
| return result |
| |
| def list(self, url, args=None, **kwargs): |
| args = args or [] |
| url = self._normalize_url(url) |
| cmd = ['ls'] + args + [url] |
| name = kwargs.pop('name', 'list') |
| return self(cmd, name, **kwargs) |
| |
| def signurl(self, private_key_file, bucket, dest, args=None, **kwargs): |
| args = args or [] |
| full_source = 'gs://%s/%s' % (bucket, dest) |
| cmd = ['signurl'] + args + [private_key_file, full_source] |
| name = kwargs.pop('name', 'signurl') |
| return self(cmd, name, **kwargs) |
| |
| def remove_url(self, url, args=None, **kwargs): |
| args = args or [] |
| url = self._normalize_url(url) |
| cmd = ['rm'] + args + [url] |
| name = kwargs.pop('name', 'remove') |
| return self(cmd, name, **kwargs) |
| |
| def _generate_metadata_args(self, metadata): |
| result = [] |
| if metadata: |
| for k, v in sorted(metadata.iteritems(), key=lambda (k, _): k): |
| field = self._get_metadata_field(k) |
| param = (field) if v is None else ('%s:%s' % (field, v)) |
| result += ['-h', param] |
| return result |
| |
| def _normalize_url(self, url): |
| gs_prefix = 'gs://' |
| # Defines the regex that matches a normalized URL. |
| for prefix in ( |
| gs_prefix, |
| 'https://storage.cloud.google.com/', |
| 'https://storage.googleapis.com/', |
| ): |
| if url.startswith(prefix): |
| return gs_prefix + url[len(prefix):] |
| raise AssertionError("%s cannot be normalized" % url) |
| |
| @classmethod |
| def _http_url(cls, bucket, dest, unauthenticated_url=False): |
| if unauthenticated_url: |
| base = 'https://storage.googleapis.com/%s/%s' |
| else: |
| base = 'https://storage.cloud.google.com/%s/%s' |
| return base % (bucket, dest) |
| |
| @staticmethod |
| def _get_metadata_field(name, provider_prefix=None): |
| """Returns: (str) the metadata field to use with Google Storage |
| |
| The Google Storage specification for metadata can be found at: |
| https://developers.google.com/storage/docs/gsutil/addlhelp/WorkingWithObjectMetadata |
| """ |
| # Already contains custom provider prefix |
| if name.lower().startswith('x-'): |
| return name |
| |
| # See if it's innately supported by Google Storage |
| if name in ( |
| 'Cache-Control', |
| 'Content-Disposition', |
| 'Content-Encoding', |
| 'Content-Language', |
| 'Content-MD5', |
| 'Content-Type', |
| ): |
| return name |
| |
| # Add provider prefix |
| if not provider_prefix: |
| provider_prefix = 'x-goog-meta' |
| return '%s-%s' % (provider_prefix, name) |