# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Utilities for standard operations on URIs of different kinds."""

from __future__ import print_function

import re
import sys
import urllib
import urllib2

import fixup_path
fixup_path.FixupPath()

from chromite.lib.paygen import filelib
from chromite.lib.paygen import gslib


# This module allows files from different storage types to be handled
# in a common way, for supported operations.


PROTOCOL_GS = gslib.PROTOCOL
PROTOCOL_HTTP = 'http'
PROTOCOL_HTTPS = 'https'

PROTOCOLS = (PROTOCOL_GS,
             PROTOCOL_HTTP,
             PROTOCOL_HTTPS,
             )

PROTOCOL_SEP = '://'

EXTRACT_PROTOCOL_RE = re.compile(r'^(\w+)%s' % PROTOCOL_SEP)
SPLIT_URI_RE = re.compile(r'^(\w+)%s(.*)$' % PROTOCOL_SEP)

TYPE_GS = PROTOCOL_GS
TYPE_HTTP = PROTOCOL_HTTP
TYPE_HTTPS = PROTOCOL_HTTPS
TYPE_LOCAL = 'file'


class NotSupportedForType(RuntimeError):
  """Raised when operation is not supported for a particular file type"""
  def __init__(self, uri_type, extra_msg=None):
    # pylint: disable-msg=W0212
    function = sys._getframe(1).f_code.co_name
    msg = 'Function %s not supported for %s URIs' % (function, uri_type)
    if extra_msg:
      msg += ', ' + extra_msg

    RuntimeError.__init__(self, msg)


class NotSupportedForTypes(RuntimeError):
  """Raised when operation is not supported for all particular file type"""
  def __init__(self, extra_msg=None, *uri_types):
    # pylint: disable-msg=W0212
    function = sys._getframe(1).f_code.co_name
    msg = ('Function %s not supported for set of URIs with types: %s' %
           (function, ', '.join(uri_types)))
    if extra_msg:
      msg += ', ' + extra_msg

    RuntimeError.__init__(self, msg)


class NotSupportedBetweenTypes(RuntimeError):
  """Raised when operation is not supported between particular file types"""
  def __init__(self, uri_type1, uri_type2, extra_msg=None):
    # pylint: disable-msg=W0212
    function = sys._getframe(1).f_code.co_name
    msg = ('Function %s not supported between %s and %s URIs' %
           (function, uri_type1, uri_type2))
    if extra_msg:
      msg += ', ' + extra_msg

    RuntimeError.__init__(self, msg)


class MissingURLError(RuntimeError):
  """Raised when nothing exists at URL."""


def ExtractProtocol(uri):
  """Take a URI and return the protocol it is using, if any.

  Examples:
  'gs://some/path' ==> 'gs'
  'file:///some/path' ==> 'file'
  '/some/path' ==> None
  '/cns/some/colossus/path' ==> None

  Args:
    uri: The URI to get protocol from.

  Returns:
    Protocol string that is found, or None.
  """
  match = EXTRACT_PROTOCOL_RE.search(uri)
  if match:
    return match.group(1)

  return None


def GetUriType(uri):
  """Get the type of a URI.

  See the TYPE_* constants for examples.  This is mostly based
  on URI protocols, with Colossus and local files as exceptions.

  Args:
    uri: The URI to consider

  Returns:
    The URI type.
  """
  protocol = ExtractProtocol(uri)
  if protocol:
    return protocol

  return TYPE_LOCAL


def SplitURI(uri):
  """Get the protocol and path from a URI

  Examples:
  'gs://some/path' ==> ('gs', 'some/path')
  'file:///some/path' ==> ('file', '/some/path')
  '/some/path' ==> (None, '/some/path')
  '/cns/some/colossus/path' ==> (None, '/cns/some/colossus/path')

  Args:
    uri: The uri to get protocol and path from.

  Returns;
    Tuple (protocol, path)
  """
  match = SPLIT_URI_RE.search(uri)
  if match:
    return (match.group(1), match.group(2))

  return (None, uri)


def IsGsURI(uri):
  """Returns True if given uri uses Google Storage protocol."""
  return PROTOCOL_GS == ExtractProtocol(uri)


def IsFileURI(uri):
  """Return True if given uri is a file URI (or path).

  If uri uses the file protocol or it is a plain non-Colossus path
  then return True.

  Args:
    uri: Any URI or path.

  Returns:
    True or False as described above.
  """
  return TYPE_LOCAL == GetUriType(uri)


def IsHttpURI(uri, https_ok=False):
  """Returns True if given uri uses http, or optionally https, protocol.

  Args:
    uri: The URI to check.
    https_ok: If True, then accept https protocol as well.

  Returns:
    Boolean
  """
  uri_type = GetUriType(uri)
  return TYPE_HTTP == uri_type or (https_ok and TYPE_HTTPS == uri_type)


def IsHttpsURI(uri):
  """Returns True if given uri uses https protocol."""
  return TYPE_HTTPS == GetUriType(uri)


def MD5Sum(uri):
  """Compute or retrieve MD5 sum of uri.

  Supported for: local files, GS files.

  Args:
    uri: The /unix/path or gs:// uri to compute the md5sum on.

  Returns:
    A string representing the md5sum of the file/uri passed in.
    None if we do not understand the uri passed in or cannot compute
    the md5sum.
  """

  uri_type = GetUriType(uri)

  if uri_type == TYPE_LOCAL:
    return filelib.MD5Sum(uri)
  elif uri_type == TYPE_GS:
    try:
      return gslib.MD5Sum(uri)
    except gslib.GSLibError:
      return None

  # Colossus does not have a command for getting MD5 sum.  We could
  # copy the file to local disk and calculate it, but it seems better
  # to explicitly say it is not supported.

  raise NotSupportedForType(uri_type)


def Cmp(uri1, uri2):
  """Return True if paths hold identical files.

  If either file is missing then always return False.

  Args:
    uri1: URI to a file.
    uri2: URI to a file.

  Returns:
    True if files are the same, False otherwise.

  Raises:
    NotSupportedBetweenTypes if Cmp cannot be done between the two
      URIs provided.
  """
  uri_type1 = GetUriType(uri1)
  uri_type2 = GetUriType(uri2)
  uri_types = set([uri_type1, uri_type2])

  if TYPE_GS in uri_types:
    # GS only supported between other GS files or local files.
    if len(uri_types) == 1 or TYPE_LOCAL in uri_types:
      return gslib.Cmp(uri1, uri2)

  if TYPE_LOCAL in uri_types and len(uri_types) == 1:
    return filelib.Cmp(uri1, uri2)

  raise NotSupportedBetweenTypes(uri_type1, uri_type2)


class URLopener(urllib.FancyURLopener):
  """URLopener that will actually complain when download fails."""
  # The urllib.urlretrieve function, which seems like a good fit for this,
  # does not give access to error code.
  def http_error_default(self, *args, **kwargs):
    urllib.URLopener.http_error_default(self, *args, **kwargs)


def URLRetrieve(src_url, dest_path):
  """Download file from given URL to given local file path.

  Args:
    src_url: URL to download from.
    dest_path: Path to download to.

  Raises:
    MissingURLError if URL cannot be downloaded.
  """
  opener = URLopener()

  try:
    opener.retrieve(src_url, dest_path)
  except IOError as e:
    # If the domain is valid but download failed errno shows up as None.
    if e.errno is None:
      raise MissingURLError('Unable to download %s' % src_url)

    # If the domain is invalid the errno shows up as 'socket error', weirdly.
    try:
      int(e.errno)

      # This means there was some normal error writing to the dest_path.
      raise
    except ValueError:
      raise MissingURLError('Unable to download %s (bad domain?)' % src_url)


def Copy(src_uri, dest_uri):
  """Copy one uri to another.

  Args:
    src_uri: URI to copy from.
    dest_uri: Path to copy to.

  Raises:
    NotSupportedBetweenTypes if Cmp cannot be done between the two
      URIs provided.
  """
  uri_type1 = GetUriType(src_uri)
  uri_type2 = GetUriType(dest_uri)
  uri_types = set([uri_type1, uri_type2])

  if TYPE_GS in uri_types:
    # GS only supported between other GS files or local files.
    if len(uri_types) == 1 or TYPE_LOCAL in uri_types:
      return gslib.Copy(src_uri, dest_uri)

  if TYPE_LOCAL in uri_types and len(uri_types) == 1:
    return filelib.Copy(src_uri, dest_uri)

  if uri_type1 in (TYPE_HTTP, TYPE_HTTPS) and uri_type2 == TYPE_LOCAL:
    # Download file from URL.
    return URLRetrieve(src_uri, dest_uri)

  raise NotSupportedBetweenTypes(uri_type1, uri_type2)

def Remove(*args, **kwargs):
  """Delete the file(s) at uris, or directory(s) with recurse set.

  Args:
    args: One or more URIs.
    ignore_no_match: If True, then do not complain if anything was not
      removed because no URI match was found.  Like rm -f.  Defaults to False.
    recurse: Remove recursively starting at path.  Same as rm -R.  Defaults
      to False.
  """
  uri_types = set([GetUriType(u) for u in args])

  if TYPE_GS in uri_types:
    # GS support only allows local files among list.
    if len(uri_types) == 1 or (TYPE_LOCAL in uri_types and len(uri_types) == 2):
      return gslib.Remove(*args, **kwargs)

  if TYPE_LOCAL in uri_types and len(uri_types) == 1:
    return filelib.Remove(*args, **kwargs)

  raise NotSupportedForTypes(*list(uri_types))


def Size(uri):
  """Return size of file at URI in bytes.

  Args:
    uri: URI to consider

  Returns:
    Size of file at given URI in bytes.

  Raises:
    MissingURLError if uri is a URL and cannot be found.
  """

  uri_type = GetUriType(uri)

  if TYPE_GS == uri_type:
    return gslib.FileSize(uri)

  if TYPE_LOCAL == uri_type:
    return filelib.Size(uri)

  if TYPE_HTTP == uri_type or TYPE_HTTPS == uri_type:
    try:
      response = urllib2.urlopen(uri)
      if response.getcode() == 200:
        return int(response.headers.getheader('Content-Length'))

    except urllib2.HTTPError as e:
      # Interpret 4** errors as our own MissingURLError.
      if e.code < 400 or e.code >= 500:
        raise

    raise MissingURLError('No such file at URL %s' % uri)

  raise NotSupportedForType(uri_type)


def Exists(uri, as_dir=False):
  """Return True if file exists at given URI.

  If URI is a directory and as_dir is False then this will return False.

  Args:
    uri: URI to consider
    as_dir: If True then check URI as a directory, otherwise check as a file.

  Returns:
    True if file (or directory) exists at URI, False otherwise.
  """
  uri_type = GetUriType(uri)

  if TYPE_GS == uri_type:
    if as_dir:
      # GS does not contain directories.
      return False

    return gslib.Exists(uri)

  if TYPE_LOCAL == uri_type:
    return filelib.Exists(uri, as_dir=as_dir)

  if TYPE_HTTP == uri_type or TYPE_HTTPS == uri_type:
    if as_dir:
      raise NotSupportedForType(uri_type, extra_msg='with as_dir=True')

    try:
      response = urllib2.urlopen(uri)
      return response.getcode() == 200
    except urllib2.HTTPError:
      return False

  raise NotSupportedForType(uri_type)


def ListFiles(root_path, recurse=False, filepattern=None, sort=False):
  """Return list of file paths under given root path.

  Directories are intentionally excluded from results.  The root_path
  argument can be a local directory path, a Google storage directory URI,
  or a Colossus (/cns) directory path.

  Args:
    root_path: A local path, CNS path, or GS path to directory.
    recurse: Look for files in subdirectories, as well
    filepattern: glob pattern to match against basename of file
    sort: If True then do a default sort on paths

  Returns:
    List of paths to files that matched
  """
  uri_type = GetUriType(root_path)

  if TYPE_GS == uri_type:
    return gslib.ListFiles(root_path, recurse=recurse,
                           filepattern=filepattern, sort=sort)

  if TYPE_LOCAL == uri_type:
    return filelib.ListFiles(root_path, recurse=recurse,
                             filepattern=filepattern, sort=sort)

  raise NotSupportedForType(uri_type)


def CopyFiles(src_dir, dst_dir):
  """Recursively copy all files from src_dir into dst_dir

  This leverages the Copy method, so the restrictions there for what
  copies are supported apply here.

  Args:
    src_dir: A local, CNS, or GS directory to copy from.
    dst_dir: A local, CNS, or GS directory to copy into.

  Returns:
    A list of absolute path files for all copied files.
  """
  dst_paths = []
  src_paths = ListFiles(src_dir, recurse=True)
  for src_path in src_paths:
    dst_path = src_path.replace(src_dir, dst_dir)
    Copy(src_path, dst_path)
    dst_paths.append(dst_path)

  return dst_paths


def RemoveDirContents(base_dir):
  """Remove all contents of a directory.

  Args:
    base_dir: directory to delete contents of.
  """
  uri_type = GetUriType(base_dir)

  if TYPE_GS == uri_type:
    return gslib.RemoveDirContents(base_dir)

  if TYPE_LOCAL == uri_type:
    return filelib.RemoveDirContents(base_dir)

  raise NotSupportedForType(uri_type)
