blob: 7b1f509e5d5a3ac22af3b3962042049a6971333c [file] [log] [blame]
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Utilities for standard operations on URIs of different kinds."""
# pylint: disable=bad-continuation
from __future__ import print_function
import re
import sys
import urllib
import urllib2
import fixup_path
fixup_path.FixupPath()
from chromite.lib.paygen import filelib
from chromite.lib.paygen import gslib
# This module allows files from different storage types to be handled
# in a common way, for supported operations.
PROTOCOL_GS = gslib.PROTOCOL
PROTOCOL_HTTP = 'http'
PROTOCOL_HTTPS = 'https'
PROTOCOLS = (PROTOCOL_GS,
PROTOCOL_HTTP,
PROTOCOL_HTTPS,
)
PROTOCOL_SEP = '://'
EXTRACT_PROTOCOL_RE = re.compile(r'^(\w+)%s' % PROTOCOL_SEP)
SPLIT_URI_RE = re.compile(r'^(\w+)%s(.*)$' % PROTOCOL_SEP)
TYPE_GS = PROTOCOL_GS
TYPE_HTTP = PROTOCOL_HTTP
TYPE_HTTPS = PROTOCOL_HTTPS
TYPE_LOCAL = 'file'
class NotSupportedForType(RuntimeError):
"""Raised when operation is not supported for a particular file type"""
def __init__(self, uri_type, extra_msg=None):
# pylint: disable=W0212
function = sys._getframe(1).f_code.co_name
msg = 'Function %s not supported for %s URIs' % (function, uri_type)
if extra_msg:
msg += ', ' + extra_msg
RuntimeError.__init__(self, msg)
class NotSupportedForTypes(RuntimeError):
"""Raised when operation is not supported for all particular file type"""
def __init__(self, extra_msg=None, *uri_types):
# pylint: disable=W0212
function = sys._getframe(1).f_code.co_name
msg = ('Function %s not supported for set of URIs with types: %s' %
(function, ', '.join(uri_types)))
if extra_msg:
msg += ', ' + extra_msg
RuntimeError.__init__(self, msg)
class NotSupportedBetweenTypes(RuntimeError):
"""Raised when operation is not supported between particular file types"""
def __init__(self, uri_type1, uri_type2, extra_msg=None):
# pylint: disable=W0212
function = sys._getframe(1).f_code.co_name
msg = ('Function %s not supported between %s and %s URIs' %
(function, uri_type1, uri_type2))
if extra_msg:
msg += ', ' + extra_msg
RuntimeError.__init__(self, msg)
class MissingURLError(RuntimeError):
"""Raised when nothing exists at URL."""
def ExtractProtocol(uri):
"""Take a URI and return the protocol it is using, if any.
Examples:
'gs://some/path' ==> 'gs'
'file:///some/path' ==> 'file'
'/some/path' ==> None
'/cns/some/colossus/path' ==> None
Args:
uri: The URI to get protocol from.
Returns:
Protocol string that is found, or None.
"""
match = EXTRACT_PROTOCOL_RE.search(uri)
if match:
return match.group(1)
return None
def GetUriType(uri):
"""Get the type of a URI.
See the TYPE_* constants for examples. This is mostly based
on URI protocols, with Colossus and local files as exceptions.
Args:
uri: The URI to consider
Returns:
The URI type.
"""
protocol = ExtractProtocol(uri)
if protocol:
return protocol
return TYPE_LOCAL
def SplitURI(uri):
"""Get the protocol and path from a URI
Examples:
'gs://some/path' ==> ('gs', 'some/path')
'file:///some/path' ==> ('file', '/some/path')
'/some/path' ==> (None, '/some/path')
'/cns/some/colossus/path' ==> (None, '/cns/some/colossus/path')
Args:
uri: The uri to get protocol and path from.
Returns;
Tuple (protocol, path)
"""
match = SPLIT_URI_RE.search(uri)
if match:
return (match.group(1), match.group(2))
return (None, uri)
def IsGsURI(uri):
"""Returns True if given uri uses Google Storage protocol."""
return PROTOCOL_GS == ExtractProtocol(uri)
def IsFileURI(uri):
"""Return True if given uri is a file URI (or path).
If uri uses the file protocol or it is a plain non-Colossus path
then return True.
Args:
uri: Any URI or path.
Returns:
True or False as described above.
"""
return TYPE_LOCAL == GetUriType(uri)
def IsHttpURI(uri, https_ok=False):
"""Returns True if given uri uses http, or optionally https, protocol.
Args:
uri: The URI to check.
https_ok: If True, then accept https protocol as well.
Returns:
Boolean
"""
uri_type = GetUriType(uri)
return TYPE_HTTP == uri_type or (https_ok and TYPE_HTTPS == uri_type)
def IsHttpsURI(uri):
"""Returns True if given uri uses https protocol."""
return TYPE_HTTPS == GetUriType(uri)
def MD5Sum(uri):
"""Compute or retrieve MD5 sum of uri.
Supported for: local files, GS files.
Args:
uri: The /unix/path or gs:// uri to compute the md5sum on.
Returns:
A string representing the md5sum of the file/uri passed in.
None if we do not understand the uri passed in or cannot compute
the md5sum.
"""
uri_type = GetUriType(uri)
if uri_type == TYPE_LOCAL:
return filelib.MD5Sum(uri)
elif uri_type == TYPE_GS:
try:
return gslib.MD5Sum(uri)
except gslib.GSLibError:
return None
# Colossus does not have a command for getting MD5 sum. We could
# copy the file to local disk and calculate it, but it seems better
# to explicitly say it is not supported.
raise NotSupportedForType(uri_type)
def Cmp(uri1, uri2):
"""Return True if paths hold identical files.
If either file is missing then always return False.
Args:
uri1: URI to a file.
uri2: URI to a file.
Returns:
True if files are the same, False otherwise.
Raises:
NotSupportedBetweenTypes if Cmp cannot be done between the two
URIs provided.
"""
uri_type1 = GetUriType(uri1)
uri_type2 = GetUriType(uri2)
uri_types = set([uri_type1, uri_type2])
if TYPE_GS in uri_types:
# GS only supported between other GS files or local files.
if len(uri_types) == 1 or TYPE_LOCAL in uri_types:
return gslib.Cmp(uri1, uri2)
if TYPE_LOCAL in uri_types and len(uri_types) == 1:
return filelib.Cmp(uri1, uri2)
raise NotSupportedBetweenTypes(uri_type1, uri_type2)
class URLopener(urllib.FancyURLopener):
"""URLopener that will actually complain when download fails."""
# The urllib.urlretrieve function, which seems like a good fit for this,
# does not give access to error code.
def http_error_default(self, *args, **kwargs):
urllib.URLopener.http_error_default(self, *args, **kwargs)
def URLRetrieve(src_url, dest_path):
"""Download file from given URL to given local file path.
Args:
src_url: URL to download from.
dest_path: Path to download to.
Raises:
MissingURLError if URL cannot be downloaded.
"""
opener = URLopener()
try:
opener.retrieve(src_url, dest_path)
except IOError as e:
# If the domain is valid but download failed errno shows up as None.
if e.errno is None:
raise MissingURLError('Unable to download %s' % src_url)
# If the domain is invalid the errno shows up as 'socket error', weirdly.
try:
int(e.errno)
# This means there was some normal error writing to the dest_path.
raise
except ValueError:
raise MissingURLError('Unable to download %s (bad domain?)' % src_url)
def Copy(src_uri, dest_uri):
"""Copy one uri to another.
Args:
src_uri: URI to copy from.
dest_uri: Path to copy to.
Raises:
NotSupportedBetweenTypes if Cmp cannot be done between the two
URIs provided.
"""
uri_type1 = GetUriType(src_uri)
uri_type2 = GetUriType(dest_uri)
uri_types = set([uri_type1, uri_type2])
if TYPE_GS in uri_types:
# GS only supported between other GS files or local files.
if len(uri_types) == 1 or TYPE_LOCAL in uri_types:
return gslib.Copy(src_uri, dest_uri)
if TYPE_LOCAL in uri_types and len(uri_types) == 1:
return filelib.Copy(src_uri, dest_uri)
if uri_type1 in (TYPE_HTTP, TYPE_HTTPS) and uri_type2 == TYPE_LOCAL:
# Download file from URL.
return URLRetrieve(src_uri, dest_uri)
raise NotSupportedBetweenTypes(uri_type1, uri_type2)
def Remove(*args, **kwargs):
"""Delete the file(s) at uris, or directory(s) with recurse set.
Args:
args: One or more URIs.
ignore_no_match: If True, then do not complain if anything was not
removed because no URI match was found. Like rm -f. Defaults to False.
recurse: Remove recursively starting at path. Same as rm -R. Defaults
to False.
"""
uri_types = set([GetUriType(u) for u in args])
if TYPE_GS in uri_types:
# GS support only allows local files among list.
if len(uri_types) == 1 or (TYPE_LOCAL in uri_types and len(uri_types) == 2):
return gslib.Remove(*args, **kwargs)
if TYPE_LOCAL in uri_types and len(uri_types) == 1:
return filelib.Remove(*args, **kwargs)
raise NotSupportedForTypes(*list(uri_types))
def Size(uri):
"""Return size of file at URI in bytes.
Args:
uri: URI to consider
Returns:
Size of file at given URI in bytes.
Raises:
MissingURLError if uri is a URL and cannot be found.
"""
uri_type = GetUriType(uri)
if TYPE_GS == uri_type:
return gslib.FileSize(uri)
if TYPE_LOCAL == uri_type:
return filelib.Size(uri)
if TYPE_HTTP == uri_type or TYPE_HTTPS == uri_type:
try:
response = urllib2.urlopen(uri)
if response.getcode() == 200:
return int(response.headers.getheader('Content-Length'))
except urllib2.HTTPError as e:
# Interpret 4** errors as our own MissingURLError.
if e.code < 400 or e.code >= 500:
raise
raise MissingURLError('No such file at URL %s' % uri)
raise NotSupportedForType(uri_type)
def Exists(uri, as_dir=False):
"""Return True if file exists at given URI.
If URI is a directory and as_dir is False then this will return False.
Args:
uri: URI to consider
as_dir: If True then check URI as a directory, otherwise check as a file.
Returns:
True if file (or directory) exists at URI, False otherwise.
"""
uri_type = GetUriType(uri)
if TYPE_GS == uri_type:
if as_dir:
# GS does not contain directories.
return False
return gslib.Exists(uri)
if TYPE_LOCAL == uri_type:
return filelib.Exists(uri, as_dir=as_dir)
if TYPE_HTTP == uri_type or TYPE_HTTPS == uri_type:
if as_dir:
raise NotSupportedForType(uri_type, extra_msg='with as_dir=True')
try:
response = urllib2.urlopen(uri)
return response.getcode() == 200
except urllib2.HTTPError:
return False
raise NotSupportedForType(uri_type)
def ListFiles(root_path, recurse=False, filepattern=None, sort=False):
"""Return list of file paths under given root path.
Directories are intentionally excluded from results. The root_path
argument can be a local directory path, a Google storage directory URI,
or a Colossus (/cns) directory path.
Args:
root_path: A local path, CNS path, or GS path to directory.
recurse: Look for files in subdirectories, as well
filepattern: glob pattern to match against basename of file
sort: If True then do a default sort on paths
Returns:
List of paths to files that matched
"""
uri_type = GetUriType(root_path)
if TYPE_GS == uri_type:
return gslib.ListFiles(root_path, recurse=recurse,
filepattern=filepattern, sort=sort)
if TYPE_LOCAL == uri_type:
return filelib.ListFiles(root_path, recurse=recurse,
filepattern=filepattern, sort=sort)
raise NotSupportedForType(uri_type)
def CopyFiles(src_dir, dst_dir):
"""Recursively copy all files from src_dir into dst_dir
This leverages the Copy method, so the restrictions there for what
copies are supported apply here.
Args:
src_dir: A local, CNS, or GS directory to copy from.
dst_dir: A local, CNS, or GS directory to copy into.
Returns:
A list of absolute path files for all copied files.
"""
dst_paths = []
src_paths = ListFiles(src_dir, recurse=True)
for src_path in src_paths:
dst_path = src_path.replace(src_dir, dst_dir)
Copy(src_path, dst_path)
dst_paths.append(dst_path)
return dst_paths
def RemoveDirContents(base_dir):
"""Remove all contents of a directory.
Args:
base_dir: directory to delete contents of.
"""
uri_type = GetUriType(base_dir)
if TYPE_GS == uri_type:
return gslib.RemoveDirContents(base_dir)
if TYPE_LOCAL == uri_type:
return filelib.RemoveDirContents(base_dir)
raise NotSupportedForType(uri_type)