blob: 0c4c47199e0594f30565152d1db3429cc0b799a8 [file] [log] [blame]
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import re
from recipe_engine import recipe_api
class GSUtilApi(recipe_api.RecipeApi):
@property
def gsutil_py_path(self):
return self.repo_resource('gsutil.py')
def __call__(self, cmd, name=None, use_retry_wrapper=True, version=None,
parallel_upload=False, multithreaded=False, infra_step=True,
**kwargs):
"""A step to run arbitrary gsutil commands.
On LUCI this should automatically use the ambient task account credentials.
On Buildbot, this assumes that gsutil authentication environment variables
(AWS_CREDENTIAL_FILE and BOTO_CONFIG) are already set, though if you want to
set them to something else you can always do so using the env={} kwarg.
Note also that gsutil does its own wildcard processing, so wildcards are
valid in file-like portions of the cmd. See 'gsutil help wildcards'.
Arguments:
* cmd (List[str|Path]) - Arguments to pass to gsutil. Include gsutil-level
options first (see 'gsutil help options').
* name (str) - Name of the step to use. Defaults to the first non-flag
token in the cmd.
"""
if name:
full_name = 'gsutil ' + name
else:
full_name = 'gsutil' # our fall-through name
# Find first cmd token not starting with '-'
for itm in cmd:
token = str(itm) # it could be a Path
if not token.startswith('-'):
full_name = 'gsutil ' + token
break
gsutil_path = self.gsutil_py_path
cmd_prefix = []
if use_retry_wrapper:
# We pass the real gsutil_path to the wrapper so it doesn't have to do
# brittle path logic.
cmd_prefix = ['--', gsutil_path]
gsutil_path = self.resource('gsutil_smart_retry.py')
if version:
cmd_prefix.extend(['--force-version', version])
if parallel_upload:
cmd_prefix.extend([
'-o',
'GSUtil:parallel_composite_upload_threshold=50M'
])
if multithreaded:
cmd_prefix.extend(['-m'])
if use_retry_wrapper:
# The -- argument for the wrapped gsutil.py is escaped as ---- as python
# 2.7.3 removes all occurrences of --, not only the first. It is unescaped
# in gsutil_wrapper.py and then passed as -- to gsutil.py.
# Note, that 2.7.6 doesn't have this problem, but it doesn't hurt.
cmd_prefix.append('----')
else:
cmd_prefix.append('--')
return self.m.python(full_name, gsutil_path, cmd_prefix + cmd,
infra_step=infra_step, **kwargs)
def upload(self, source, bucket, dest, args=None, link_name='gsutil.upload',
metadata=None, unauthenticated_url=False, **kwargs):
args = [] if args is None else args[:]
# Note that metadata arguments have to be passed before the command cp.
metadata_args = self._generate_metadata_args(metadata)
full_dest = 'gs://%s/%s' % (bucket, dest)
cmd = metadata_args + ['cp'] + args + [source, full_dest]
name = kwargs.pop('name', 'upload')
result = self(cmd, name, **kwargs)
if link_name:
is_dir = '-r' in args or '--recursive' in args
result.presentation.links[link_name] = self._http_url(
bucket, dest, is_directory=is_dir, is_anonymous=unauthenticated_url)
return result
def download(self, bucket, source, dest, args=None, **kwargs):
args = [] if args is None else args[:]
full_source = 'gs://%s/%s' % (bucket, source)
cmd = ['cp'] + args + [full_source, dest]
name = kwargs.pop('name', 'download')
return self(cmd, name, **kwargs)
def download_url(self, url, dest, args=None, **kwargs):
args = args or []
url = self._normalize_url(url)
cmd = ['cp'] + args + [url, dest]
name = kwargs.pop('name', 'download_url')
return self(cmd, name, **kwargs)
def cat(self, url, args=None, **kwargs):
args = args or []
url = self._normalize_url(url)
cmd = ['cat'] + args + [url]
name = kwargs.pop('name', 'cat')
return self(cmd, name, **kwargs)
def stat(self, url, args=None, **kwargs):
args = args or []
url = self._normalize_url(url)
cmd = ['stat'] + args + [url]
name = kwargs.pop('name', 'stat')
return self(cmd, name, **kwargs)
def copy(self, source_bucket, source, dest_bucket, dest, args=None,
link_name='gsutil.copy', metadata=None, unauthenticated_url=False,
**kwargs):
args = args or []
args += self._generate_metadata_args(metadata)
full_source = 'gs://%s/%s' % (source_bucket, source)
full_dest = 'gs://%s/%s' % (dest_bucket, dest)
cmd = ['cp'] + args + [full_source, full_dest]
name = kwargs.pop('name', 'copy')
result = self(cmd, name, **kwargs)
if link_name:
is_dir = '-r' in args or '--recursive' in args
result.presentation.links[link_name] = self._http_url(
dest_bucket, dest, is_directory=is_dir,
is_anonymous=unauthenticated_url)
return result
def list(self, url, args=None, **kwargs):
args = args or []
url = self._normalize_url(url)
cmd = ['ls'] + args + [url]
name = kwargs.pop('name', 'list')
return self(cmd, name, **kwargs)
def signurl(self, private_key_file, bucket, dest, args=None, **kwargs):
args = args or []
full_source = 'gs://%s/%s' % (bucket, dest)
cmd = ['signurl'] + args + [private_key_file, full_source]
name = kwargs.pop('name', 'signurl')
return self(cmd, name, **kwargs)
def remove_url(self, url, args=None, **kwargs):
args = args or []
url = self._normalize_url(url)
cmd = ['rm'] + args + [url]
name = kwargs.pop('name', 'remove')
return self(cmd, name, **kwargs)
def _generate_metadata_args(self, metadata):
result = []
if metadata:
for k, v in sorted(metadata.items(), key=lambda (k, _): k):
field = self._get_metadata_field(k)
param = (field) if v is None else ('%s:%s' % (field, v))
result += ['-h', param]
return result
def _normalize_url(self, url):
gs_prefix = 'gs://'
# Defines the regex that matches a normalized URL.
for prefix in (
gs_prefix,
'https://storage.cloud.google.com/',
'https://storage.googleapis.com/',
):
if url.startswith(prefix):
return gs_prefix + url[len(prefix):]
raise AssertionError("%s cannot be normalized" % url)
@classmethod
def _http_url(cls, bucket, dest, is_directory=False, is_anonymous=False):
if is_directory:
# Use GCP console.
url_template = 'https://console.cloud.google.com/storage/browser/%s/%s'
elif is_anonymous:
# Use unauthenticated object viewer.
url_template = 'https://storage.googleapis.com/%s/%s'
else:
# Use authenticated object viewer.
url_template = 'https://storage.cloud.google.com/%s/%s'
return url_template % (bucket, dest)
@staticmethod
def _get_metadata_field(name, provider_prefix=None):
"""Returns: (str) the metadata field to use with Google Storage
The Google Storage specification for metadata can be found at:
https://developers.google.com/storage/docs/gsutil/addlhelp/WorkingWithObjectMetadata
"""
# Already contains custom provider prefix
if name.lower().startswith('x-'):
return name
# See if it's innately supported by Google Storage
if name in (
'Cache-Control',
'Content-Disposition',
'Content-Encoding',
'Content-Language',
'Content-MD5',
'Content-Type',
):
return name
# Add provider prefix
if not provider_prefix:
provider_prefix = 'x-goog-meta'
return '%s-%s' % (provider_prefix, name)