blob: 65f67940f6de69f001370be2b818a18b44795f93 [file] [log] [blame]
#!/usr/bin/python
# Copyright 2012 Google Inc. All Rights Reserved.
# Author: mrdmnd@ (Matt Redmond)
"""A client to pull data from Bartlett.
Inspired by //depot/google3/experimental/mobile_gwp/database/app_engine_pull.py
The server houses perf.data.gz, board, chrome version for each upload.
This script first authenticates with a proper @google.com account, then
downloads a sample (if it's not already cached) and unzips perf.data
Authenticate(): Gets login info and returns an auth token
DownloadSamples(): Download and unzip samples.
_GetServePage(): Pulls /serve page from the app engine server
_DownloadSampleFromServer(): Downloads a local compressed copy of a sample
_UncompressSample(): Decompresses a sample, deleting the compressed version.
"""
import cookielib
import getpass
import gzip
import optparse
import os
import urllib
import urllib2
SERVER_NAME = "http://chromeoswideprofiling.appspot.com"
APP_NAME = "chromeoswideprofiling"
DELIMITER = "~"
def Authenticate(server_name):
"""Gets credentials from user and attempts to retrieve auth token.
TODO: Accept OAuth2 instead of password.
Args:
server_name: (string) URL that the app engine code is living on.
Returns:
authtoken: (string) The authorization token that can be used
to grab other pages.
"""
if server_name.endswith("/"):
server_name = server_name.rstrip("/")
# Grab username and password from user through stdin.
username = raw_input("Email (must be @google.com account): ")
password = getpass.getpass("Password: ")
# Use a cookie to authenticate with GAE.
cookiejar = cookielib.LWPCookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
urllib2.install_opener(opener)
# Get an AuthToken from Google accounts service.
auth_uri = "https://www.google.com/accounts/ClientLogin"
authreq_data = urllib.urlencode({"Email": username,
"Passwd": password,
"service": "ah",
"source": APP_NAME,
"accountType": "HOSTED_OR_GOOGLE"})
auth_req = urllib2.Request(auth_uri, data=authreq_data)
try:
auth_resp = urllib2.urlopen(auth_req)
except urllib2.URLError:
print "Error logging in to Google accounts service."
return None
body = auth_resp.read()
# Auth response contains several fields.
# We care about the part after Auth=
auth_resp_dict = dict(x.split("=") for x in body.split("\n") if x)
authtoken = auth_resp_dict["Auth"]
return authtoken
def DownloadSamples(server_name, authtoken, output_dir, start, stop):
"""Download every sample and write unzipped version
to output directory.
Args:
server_name: (string) URL that the app engine code is living on.
authtoken: (string) Authorization token.
output_dir (string) Filepath to write output to.
start: (int) Index to start downloading from, starting at top.
stop: (int) Index to stop downloading, non-inclusive. -1 for end.
Returns:
None
"""
if server_name.endswith("/"):
server_name = server_name.rstrip("/")
serve_page_string = _GetServePage(server_name, authtoken)
if serve_page_string is None:
print "Error getting /serve page."
return
sample_list = serve_page_string.split("</br>")
print "Will download:"
sample_list_subset = sample_list[start:stop]
for sample in sample_list_subset:
print sample
for sample in sample_list_subset:
assert sample, "Sample should be valid."
sample_info = [s.strip() for s in sample.split(DELIMITER)]
key = sample_info[0]
time = sample_info[1]
time = time.replace(" ", "_") # No space between date and time.
# sample_md5 = sample_info[2]
board = sample_info[3]
version = sample_info[4]
# Put a compressed copy of the samples in output directory.
_DownloadSampleFromServer(server_name, authtoken, key, time, board,
version, output_dir)
_UncompressSample(key, time, board, version, output_dir)
def _BuildFilenameFromParams(key, time, board, version):
"""Return the filename for our sample.
Args:
key: (string) Key indexing our sample in the datastore.
time: (string) Date that the sample was uploaded.
board: (string) Board that the sample was taken on.
version: (string) Version string from /etc/lsb-release
Returns:
filename (string)
"""
filename = DELIMITER.join([key, time, board, version])
return filename
def _DownloadSampleFromServer(server_name, authtoken, key, time, board,
version, output_dir):
"""Downloads sample_$(samplekey).gz to current dir.
Args:
server_name: (string) URL that the app engine code is living on.
authtoken: (string) Authorization token.
key: (string) Key indexing our sample in the datastore
time: (string) Date that the sample was uploaded.
board: (string) Board that the sample was taken on.
version: (string) Version string from /etc/lsb-release
output_dir: (string) Filepath to write to output to.
Returns:
None
"""
filename = _BuildFilenameFromParams(key, time, board, version)
compressed_filename = filename+".gz"
if os.path.exists(os.path.join(output_dir, filename)):
print "Already downloaded %s, skipping." % filename
return
serv_uri = server_name + "/serve/" + key
serv_args = {"continue": serv_uri, "auth": authtoken}
full_serv_uri = server_name + "/_ah/login?%s" % urllib.urlencode(serv_args)
serv_req = urllib2.Request(full_serv_uri)
serv_resp = urllib2.urlopen(serv_req)
f = open(os.path.join(output_dir, compressed_filename), "w+")
f.write(serv_resp.read())
f.close()
def _UncompressSample(key, time, board, version, output_dir):
"""Uncompresses a given sample.gz file and deletes the compressed version.
Args:
key: (string) Sample key to uncompress.
time: (string) Date that the sample was uploaded.
board: (string) Board that the sample was taken on.
version: (string) Version string from /etc/lsb-release
output_dir: (string) Filepath to find sample key in.
Returns:
None
"""
filename = _BuildFilenameFromParams(key, time, board, version)
compressed_filename = filename+".gz"
if os.path.exists(os.path.join(output_dir, filename)):
print "Already decompressed %s, skipping." % filename
return
out_file = open(os.path.join(output_dir, filename), "wb")
in_file = gzip.open(os.path.join(output_dir, compressed_filename), "rb")
out_file.write(in_file.read())
in_file.close()
out_file.close()
os.remove(os.path.join(output_dir, compressed_filename))
def _DeleteSampleFromServer(server_name, authtoken, key):
"""Opens the /delete page with the specified key
to delete the sample off the datastore.
Args:
server_name: (string) URL that the app engine code is living on.
authtoken: (string) Authorization token.
key: (string) Key to delete.
Returns:
None
"""
serv_uri = server_name + "/del/" + key
serv_args = {"continue": serv_uri, "auth": authtoken}
full_serv_uri = server_name + "/_ah/login?%s" % urllib.urlencode(serv_args)
serv_req = urllib2.Request(full_serv_uri)
urllib2.urlopen(serv_req)
def _GetServePage(server_name, authtoken):
"""Opens the /serve page and lists all keys.
Args:
server_name: (string) URL the app engine code is living on.
authtoken: (string) Authorization token.
Returns:
The text of the /serve page (including HTML tags)
"""
serv_uri = server_name + "/serve"
serv_args = {"continue": serv_uri, "auth": authtoken}
full_serv_uri = server_name + "/_ah/login?%s" % urllib.urlencode(serv_args)
serv_req = urllib2.Request(full_serv_uri)
serv_resp = urllib2.urlopen(serv_req)
return serv_resp.read()
def main():
parser = optparse.OptionParser()
parser.add_option("--output_dir", dest="output_dir", action="store",
help="Path to output perf data files.")
parser.add_option("--start", dest="start_ind", action="store",
default=0, help="Start index.")
parser.add_option("--stop", dest="stop_ind", action="store",
default=-1, help="Stop index.")
options = parser.parse_args()[0]
if not options.output_dir:
print "Must specify --output_dir."
return 1
if not os.path.exists(options.output_dir):
print "Specified output_dir does not exist."
return 1
authtoken = Authenticate(SERVER_NAME)
if not authtoken:
print "Could not obtain authtoken, exiting."
return 1
DownloadSamples(SERVER_NAME, authtoken, options.output_dir,
options.start_ind, options.stop_ind)
print "Downloaded samples."
return 0
if __name__ == "__main__":
exit(main())