#!/usr/bin/python
# Copyright 2012 Google Inc. All Rights Reserved.
# Author: mrdmnd@ (Matt Redmond)
"""A client to pull data from Bartlett.

Inspired by //depot/google3/experimental/mobile_gwp/database/app_engine_pull.py

The server houses perf.data.gz, board, chrome version for each upload.
This script first authenticates with a proper @google.com account, then
downloads a sample (if it's not already cached) and unzips perf.data

  Authenticate(): Gets login info and returns an auth token
  DownloadSamples(): Download and unzip samples.
  _GetServePage(): Pulls /serve page from the app engine server
  _DownloadSampleFromServer(): Downloads a local compressed copy of a sample
  _UncompressSample(): Decompresses a sample, deleting the compressed version.
"""
import cookielib
import getpass
import gzip
import optparse
import os
import urllib
import urllib2

SERVER_NAME = "http://chromeoswideprofiling.appspot.com"
APP_NAME = "chromeoswideprofiling"
DELIMITER = "~"


def Authenticate(server_name):
  """Gets credentials from user and attempts to retrieve auth token.
     TODO: Accept OAuth2 instead of password.
  Args:
    server_name: (string) URL that the app engine code is living on.
  Returns:
    authtoken: (string) The authorization token that can be used
                        to grab other pages.
  """

  if server_name.endswith("/"):
    server_name = server_name.rstrip("/")
  # Grab username and password from user through stdin.
  username = raw_input("Email (must be @google.com account): ")
  password = getpass.getpass("Password: ")
  # Use a cookie to authenticate with GAE.
  cookiejar = cookielib.LWPCookieJar()
  opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
  urllib2.install_opener(opener)
  # Get an AuthToken from Google accounts service.
  auth_uri = "https://www.google.com/accounts/ClientLogin"
  authreq_data = urllib.urlencode({"Email": username,
                                   "Passwd": password,
                                   "service": "ah",
                                   "source": APP_NAME,
                                   "accountType": "HOSTED_OR_GOOGLE"})
  auth_req = urllib2.Request(auth_uri, data=authreq_data)
  try:
    auth_resp = urllib2.urlopen(auth_req)
  except urllib2.URLError:
    print "Error logging in to Google accounts service."
    return None
  body = auth_resp.read()
  # Auth response contains several fields.
  # We care about the part after Auth=
  auth_resp_dict = dict(x.split("=") for x in body.split("\n") if x)
  authtoken = auth_resp_dict["Auth"]
  return authtoken


def DownloadSamples(server_name, authtoken, output_dir, start, stop):
  """Download every sample and write unzipped version
     to output directory.
  Args:
    server_name: (string) URL that the app engine code is living on.
    authtoken:   (string) Authorization token.
    output_dir   (string) Filepath to write output to.
    start:       (int)    Index to start downloading from, starting at top.
    stop:        (int)    Index to stop downloading, non-inclusive. -1 for end.
  Returns:
    None
  """

  if server_name.endswith("/"):
    server_name = server_name.rstrip("/")

  serve_page_string = _GetServePage(server_name, authtoken)
  if serve_page_string is None:
    print "Error getting /serve page."
    return

  sample_list = serve_page_string.split("</br>")
  print "Will download:"
  sample_list_subset = sample_list[start:stop]
  for sample in sample_list_subset:
    print sample
  for sample in sample_list_subset:
    assert sample, "Sample should be valid."
    sample_info = [s.strip() for s in sample.split(DELIMITER)]
    key = sample_info[0]
    time = sample_info[1]
    time = time.replace(" ", "_") # No space between date and time.
    # sample_md5 = sample_info[2]
    board = sample_info[3]
    version = sample_info[4]

    # Put a compressed copy of the samples in output directory.
    _DownloadSampleFromServer(server_name, authtoken, key, time, board,
                              version, output_dir)
    _UncompressSample(key, time, board, version, output_dir)


def _BuildFilenameFromParams(key, time, board, version):
  """Return the filename for our sample.
  Args:
    key:  (string) Key indexing our sample in the datastore.
    time: (string) Date that the sample was uploaded.
    board: (string) Board that the sample was taken on.
    version: (string) Version string from /etc/lsb-release
  Returns:
    filename (string)
  """
  filename = DELIMITER.join([key, time, board, version])
  return filename


def _DownloadSampleFromServer(server_name, authtoken, key, time, board,
                              version, output_dir):
  """Downloads sample_$(samplekey).gz to current dir.
  Args:
    server_name: (string) URL that the app engine code is living on.
    authtoken:   (string) Authorization token.
    key:  (string) Key indexing our sample in the datastore
    time: (string) Date that the sample was uploaded.
    board: (string) Board that the sample was taken on.
    version: (string) Version string from /etc/lsb-release
    output_dir:  (string) Filepath to write to output to.
  Returns:
    None
  """
  filename = _BuildFilenameFromParams(key, time, board, version)
  compressed_filename = filename+".gz"

  if os.path.exists(os.path.join(output_dir, filename)):
    print "Already downloaded %s, skipping." % filename
    return

  serv_uri = server_name + "/serve/" + key
  serv_args = {"continue": serv_uri, "auth": authtoken}
  full_serv_uri = server_name + "/_ah/login?%s" % urllib.urlencode(serv_args)
  serv_req = urllib2.Request(full_serv_uri)
  serv_resp = urllib2.urlopen(serv_req)
  f = open(os.path.join(output_dir, compressed_filename), "w+")
  f.write(serv_resp.read())
  f.close()


def _UncompressSample(key, time, board, version, output_dir):
  """Uncompresses a given sample.gz file and deletes the compressed version.
  Args:
    key: (string) Sample key to uncompress.
    time: (string) Date that the sample was uploaded.
    board: (string) Board that the sample was taken on.
    version: (string) Version string from /etc/lsb-release
    output_dir: (string) Filepath to find sample key in.
  Returns:
    None
  """
  filename = _BuildFilenameFromParams(key, time, board, version)
  compressed_filename = filename+".gz"

  if os.path.exists(os.path.join(output_dir, filename)):
    print "Already decompressed %s, skipping." % filename
    return

  out_file = open(os.path.join(output_dir, filename), "wb")
  in_file = gzip.open(os.path.join(output_dir, compressed_filename), "rb")
  out_file.write(in_file.read())
  in_file.close()
  out_file.close()
  os.remove(os.path.join(output_dir, compressed_filename))


def _DeleteSampleFromServer(server_name, authtoken, key):
  """Opens the /delete page with the specified key
     to delete the sample off the datastore.
    Args:
      server_name: (string) URL that the app engine code is living on.
      authtoken:   (string) Authorization token.
      key:  (string) Key to delete.
    Returns:
      None
  """

  serv_uri = server_name + "/del/" + key
  serv_args = {"continue": serv_uri, "auth": authtoken}
  full_serv_uri = server_name + "/_ah/login?%s" % urllib.urlencode(serv_args)
  serv_req = urllib2.Request(full_serv_uri)
  urllib2.urlopen(serv_req)


def _GetServePage(server_name, authtoken):
  """Opens the /serve page and lists all keys.
  Args:
    server_name: (string) URL the app engine code is living on.
    authtoken:   (string) Authorization token.
  Returns:
    The text of the /serve page (including HTML tags)
  """

  serv_uri = server_name + "/serve"
  serv_args = {"continue": serv_uri, "auth": authtoken}
  full_serv_uri = server_name + "/_ah/login?%s" % urllib.urlencode(serv_args)
  serv_req = urllib2.Request(full_serv_uri)
  serv_resp = urllib2.urlopen(serv_req)
  return serv_resp.read()


def main():
  parser = optparse.OptionParser()
  parser.add_option("--output_dir", dest="output_dir", action="store",
                    help="Path to output perf data files.")
  parser.add_option("--start", dest="start_ind", action="store",
                    default=0, help="Start index.")
  parser.add_option("--stop", dest="stop_ind", action="store",
                    default=-1, help="Stop index.")
  options = parser.parse_args()[0]
  if not options.output_dir:
    print "Must specify --output_dir."
    return 1
  if not os.path.exists(options.output_dir):
    print "Specified output_dir does not exist."
    return 1

  authtoken = Authenticate(SERVER_NAME)
  if not authtoken:
    print "Could not obtain authtoken, exiting."
    return 1
  DownloadSamples(SERVER_NAME, authtoken, options.output_dir,
                  options.start_ind, options.stop_ind)
  print "Downloaded samples."
  return 0

if __name__ == "__main__":
  exit(main())
