blob: 25ee6c325cd3f854bf17e0c7c4e98e7b0b7b1462 [file] [log] [blame]
# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Library for interacting with gdata (i.e. Google Docs, Tracker, etc)."""
# pylint: disable=bad-continuation
from __future__ import print_function
import functools
import getpass
import os
import pickle
import re
import urllib
import xml.dom.minidom
import gdata.projecthosting.client
import gdata.service
import gdata.spreadsheet
import gdata.spreadsheet.service
from chromite.lib import operation
# pylint: disable=W0201,E0203,E1101
TOKEN_FILE = os.path.join(os.environ['HOME'], '.gdata_token')
CRED_FILE = os.path.join(os.environ['HOME'], '.gdata_cred.txt')
oper = operation.Operation('gdata_lib')
_BAD_COL_CHARS_REGEX = re.compile(r'[ /_]')
def PrepColNameForSS(col):
"""Translate a column name for spreadsheet interface."""
# Spreadsheet interface requires column names to be
# all lowercase and with no spaces or other special characters.
return _BAD_COL_CHARS_REGEX.sub('', col.lower())
# TODO(mtennant): Rename PrepRowValuesForSS
def PrepRowForSS(row):
"""Make sure spreadsheet handles all values in row as strings."""
return dict((key, PrepValForSS(val)) for key, val in row.items())
# Regex to detect values that the spreadsheet will auto-format as numbers.
_NUM_REGEX = re.compile(r'^[\d\.]+$')
def PrepValForSS(val):
"""Make sure spreadsheet handles this value as a string."""
# The main reason for this is version strings (e.g. for portage packages),
# which Sheets automatically interprets as numbers and mangles.
if val and _NUM_REGEX.match(val):
return "'" + val
return val
def ScrubValFromSS(val):
"""Remove string indicator prefix if found."""
if val and val[0] == "'":
return val[1:]
return val
class Creds(object):
"""Class to manage user/password credentials."""
__slots__ = (
'docs_auth_token', # Docs Client auth token string
'creds_dirty', # True if user/password set and not, yet, saved
'password', # User password
'token_dirty', # True if auth token(s) set and not, yet, saved
'tracker_auth_token', # Tracker Client auth token string
'user', # User account (
SAVED_TOKEN_ATTRS = ('docs_auth_token', 'tracker_auth_token', 'user')
def __init__(self):
self.user = None
self.password = None
self.docs_auth_token = None
self.tracker_auth_token = None
self.token_dirty = False
self.creds_dirty = False
def SetDocsAuthToken(self, auth_token):
"""Set the Docs auth_token string."""
self.docs_auth_token = auth_token
self.token_dirty = True
def SetTrackerAuthToken(self, auth_token):
"""Set the Tracker auth_token string."""
self.tracker_auth_token = auth_token
self.token_dirty = True
def LoadAuthToken(self, filepath):
"""Load previously saved auth token(s) from |filepath|.
This first clears both docs_auth_token and tracker_auth_token.
self.docs_auth_token = None
self.tracker_auth_token = None
f = open(filepath, 'r')
obj = pickle.load(f)
if obj.has_key('auth_token'):
# Backwards compatability. Default 'auth_token' is what
# docs_auth_token used to be saved as.
self.docs_auth_token = obj['auth_token']
self.token_dirty = True
for attr in self.SAVED_TOKEN_ATTRS:
if obj.has_key(attr):
setattr(self, attr, obj[attr])
oper.Notice('Loaded Docs/Tracker auth token(s) from "%s"' % filepath)
except IOError:
oper.Error('Unable to load auth token file at "%s"' % filepath)
def StoreAuthTokenIfNeeded(self, filepath):
"""Store auth token(s) to |filepath| if anything changed."""
if self.token_dirty:
def StoreAuthToken(self, filepath):
"""Store auth token(s) to |filepath|."""
obj = {}
for attr in self.SAVED_TOKEN_ATTRS:
val = getattr(self, attr)
if val:
obj[attr] = val
oper.Notice('Storing Docs and/or Tracker auth token to "%s"' % filepath)
f = open(filepath, 'w')
pickle.dump(obj, f)
self.token_dirty = False
except IOError:
oper.Error('Unable to store auth token to file at "%s"' % filepath)
def SetCreds(self, user, password=None):
if not '@' in user:
user = '' % user
if not password:
password = getpass.getpass('Docs password for %s:' % user)
self.user = user
self.password = password
self.creds_dirty = True
def LoadCreds(self, filepath):
"""Load email/password credentials from |filepath|."""
# Read email from first line and password from second.
with open(filepath, 'r') as f:
(self.user, self.password) = (l.strip() for l in f.readlines())
oper.Notice('Loaded Docs/Tracker login credentials from "%s"' % filepath)
def StoreCredsIfNeeded(self, filepath):
"""Store email/password credentials to |filepath| if anything changed."""
if self.creds_dirty:
def StoreCreds(self, filepath):
"""Store email/password credentials to |filepath|."""
oper.Notice('Storing Docs/Tracker login credentials to "%s"' % filepath)
# Simply write email on first line and password on second.
with open(filepath, 'w') as f:
f.write(self.user + '\n')
f.write(self.password + '\n')
self.creds_dirty = False
class IssueComment(object):
"""Represent a Tracker issue comment."""
__slots__ = ['title', 'text']
def __init__(self, title, text):
self.title = title
self.text = text
def __str__(self):
text = '<no comment>'
if self.text:
text = '\n '.join(self.text.split('\n'))
return '%s:\n %s' % (self.title, text)
class Issue(object):
"""Represents one Tracker Issue."""
SlotDefaults = {
'comments': [], # List of IssueComment objects
'id': 0, # Issue id number (int)
'labels': [], # List of text labels
'owner': None, # Current owner (text, account)
'status': None, # Current issue status (text) (e.g. Assigned)
'summary': None,# Issue summary (first comment)
'title': None, # Title text
'ccs': [], # Cc list
__slots__ = SlotDefaults.keys()
def __init__(self, **kwargs):
"""Init for one Issue object.
|kwargs| - key/value arguments to give initial values to
any additional attributes on |self|.
# Use SlotDefaults overwritten by kwargs for starting slot values.
slotvals = self.SlotDefaults.copy()
for slot in self.__slots__:
setattr(self, slot, slotvals.pop(slot))
if slotvals:
raise ValueError('I do not know what to do with %r' % slotvals)
def __str__(self):
"""Pretty print of issue."""
lines = ['Issue %d - %s' % (, self.title),
'Status: %s, Owner: %s' % (self.status, self.owner),
'Labels: %s' % ', '.join(self.labels),
if self.summary:
lines.append('Summary: %s' % self.summary)
if self.comments:
return '\n'.join(lines)
def InitFromTracker(self, t_issue, project_name):
"""Initialize |self| from tracker issue |t_issue|"""
# The __slots__ logic above confuses pylint.
# pylint: disable=assigning-non-slot = int('/')[-1])
self.labels = [label.text for label in t_issue.label]
if t_issue.owner:
self.owner = t_issue.owner.username.text
self.status = t_issue.status.text
self.summary = t_issue.content.text
self.title = t_issue.title.text
self.comments = self.GetTrackerIssueComments(, project_name)
def GetTrackerIssueComments(self, issue_id, project_name):
"""Retrieve comments for |issue_id| from comments URL"""
comments = []
feeds = ''
url = '%s/issues/p/%s/issues/%d/comments/full' % (feeds, project_name,
doc = xml.dom.minidom.parse(urllib.urlopen(url))
entries = doc.getElementsByTagName('entry')
for entry in entries:
title_text_list = []
for key in ('title', 'content'):
child = entry.getElementsByTagName(key)[0].firstChild
title_text_list.append(child.nodeValue if child else None)
return comments
def __eq__(self, other):
return ( == and self.labels == other.labels and
self.owner == other.owner and self.status == other.status and
self.summary == other.summary and self.title == other.title)
def __ne__(self, other):
return not self == other
class TrackerError(RuntimeError):
"""Error class for tracker communication errors."""
class TrackerInvalidUserError(TrackerError):
"""Error class for when user not recognized by Tracker."""
class TrackerComm(object):
"""Class to manage communication with Tracker."""
__slots__ = (
'author', # Author when creating/editing Tracker issues
'it_client', # Issue Tracker client
'project_name', # Tracker project name
def __init__(self): = None
self.it_client = None
self.project_name = None
def Connect(self, creds, project_name, source='chromiumos'):
self.project_name = project_name
it_client = gdata.projecthosting.client.ProjectHostingClient()
it_client.source = source
if creds.tracker_auth_token:
oper.Notice('Logging into Tracker using previous auth token.')
it_client.auth_token = gdata.gauth.ClientLoginToken(
oper.Notice('Logging into Tracker as "%s".' % creds.user)
it_client.ClientLogin(creds.user, creds.password,
source=source, service='code',
creds.SetTrackerAuthToken(it_client.auth_token.token_string) = creds.user
self.it_client = it_client
def _QueryTracker(self, query):
"""Query the tracker for a list of issues. Return |None| on failure."""
return self.it_client.get_issues(self.project_name, query=query)
except gdata.client.RequestError:
return None
def _CreateIssue(self, t_issue):
"""Create an Issue from a Tracker Issue."""
issue = Issue()
issue.InitFromTracker(t_issue, self.project_name)
return issue
# TODO(mtennant): This method works today, but is not being actively used.
# Leaving it in, because a logical use of the method is for to verify
# that a Tracker issue in the package spreadsheet is open, and to add
# comments to it when new upstream versions become available.
def GetTrackerIssueById(self, tid):
"""Get tracker issue given |tid| number. Return Issue object if found."""
query = gdata.projecthosting.client.Query(issue_id=str(tid))
feed = self._QueryTracker(query)
if feed.entry:
return self._CreateIssue(feed.entry[0])
return None
def GetTrackerIssuesByText(self, search_text, full_text=True,
"""Find all Tracker Issues that contain the text search_text."""
if not full_text:
search_text = 'summary:"%s"' % search_text
if only_open:
search_text += ' is:open'
query = gdata.projecthosting.client.Query(text_query=search_text)
feed = self._QueryTracker(query)
if feed:
return [self._CreateIssue(tissue) for tissue in feed.entry]
return []
def CreateTrackerIssue(self, issue):
"""Create a new issue in Tracker according to |issue|."""
created = self.it_client.add_issue(project_name=self.project_name,
ccs=issue.ccs) = int('/')[-1])
except gdata.client.RequestError as ex:
if ex.body and ex.body.lower() == 'user not found':
raise TrackerInvalidUserError('Tracker user %s not found' % issue.owner)
if ex.body and ex.body.lower() == 'issue owner must be a member':
raise TrackerInvalidUserError('Tracker user %s not a member' %
def AppendTrackerIssueById(self, issue_id, comment, owner=None):
"""Append |comment| to issue |issue_id| in Tracker"""
return issue_id
class SpreadsheetRow(dict):
"""Minor semi-immutable extension of dict to keep the original spreadsheet
row object and spreadsheet row number as attributes.
No changes are made to equality checking or anything else, so client code
that wishes to handle this as a pure dict can.
def __init__(self, ss_row_obj, ss_row_num, mapping=None):
if mapping:
dict.__init__(self, mapping)
self.ss_row_obj = ss_row_obj
self.ss_row_num = ss_row_num
def __setitem__(self, key, val):
raise TypeError('setting item in SpreadsheetRow not supported')
def __delitem__(self, key):
raise TypeError('deleting item in SpreadsheetRow not supported')
class SpreadsheetError(RuntimeError):
"""Error class for spreadsheet communication errors."""
def ReadWriteDecorator(func):
"""Raise SpreadsheetError if appropriate."""
def f(self, *args, **kwargs):
return func(self, *args, **kwargs)
except gdata.service.RequestError as ex:
raise SpreadsheetError(str(ex))
f.__name__ = func.__name__
return f
class SpreadsheetComm(object):
"""Class to manage communication with one Google Spreadsheet worksheet."""
# Row numbering in spreadsheets effectively starts at 2 because row 1
# has the column headers.
# Spreadsheet column numbers start at 1.
__slots__ = (
'_columns', # Tuple of translated column names, filled in as needed
'_rows', # Tuple of Row dicts in order, filled in as needed
'gd_client', # Google Data client
'ss_key', # Spreadsheet key
'ws_name', # Worksheet name
'ws_key', # Worksheet key
def columns(self):
"""The columns property is filled in on demand.
It is a tuple of column names, each run through PrepColNameForSS.
if self._columns is None:
query = gdata.spreadsheet.service.CellQuery()
query['max-row'] = '1'
feed = self.gd_client.GetCellsFeed(self.ss_key, self.ws_key, query=query)
# The use of PrepColNameForSS here looks weird, but the values
# in row 1 are the unaltered column names, rather than the restricted
# column names used for interface purposes. In other words, if the
# spreadsheet looks like it has a column called "Foo Bar", then the
# first row will have a value "Foo Bar" but all interaction with that
# column for other rows will use column key "foobar". Translate to
# restricted names now with PrepColNameForSS.
cols = [PrepColNameForSS(entry.content.text) for entry in feed.entry]
self._columns = tuple(cols)
return self._columns
def rows(self):
"""The rows property is filled in on demand.
It is a tuple of SpreadsheetRow objects.
if self._rows is None:
rows = []
feed = self.gd_client.GetListFeed(self.ss_key, self.ws_key)
for rowIx, rowObj in enumerate(feed.entry, start=self.ROW_NUMBER_OFFSET):
row_dict = dict((key, ScrubValFromSS(val.text))
for key, val in rowObj.custom.iteritems())
rows.append(SpreadsheetRow(rowObj, rowIx, row_dict))
self._rows = tuple(rows)
return self._rows
def __init__(self):
for slot in self.__slots__:
setattr(self, slot, None)
def Connect(self, creds, ss_key, ws_name, source='chromiumos'):
"""Login to spreadsheet service and set current worksheet.
|creds| Credentials object for Google Docs
|ss_key| Spreadsheet key
|ws_name| Worksheet name
|source| Name to associate with connecting service
self._Login(creds, source)
self.SetCurrentWorksheet(ws_name, ss_key=ss_key)
def SetCurrentWorksheet(self, ws_name, ss_key=None):
"""Change the current worksheet. This clears all caches."""
if ss_key and ss_key != self.ss_key:
self.ss_key = ss_key
self.ws_name = ws_name
ws_key = self._GetWorksheetKey(self.ss_key, self.ws_name)
if ws_key != self.ws_key:
self.ws_key = ws_key
def _ClearCache(self, keep_columns=False):
"""Called whenever column/row data might be stale."""
self._rows = None
if not keep_columns:
self._columns = None
def _Login(self, creds, source):
"""Login to Google doc client using given |creds|."""
gd_client = RetrySpreadsheetsService()
gd_client.source = source
# Login using previous auth token if available, otherwise
# use email/password from creds.
if creds.docs_auth_token:
oper.Notice('Logging into Docs using previous auth token.')
oper.Notice('Logging into Docs as "%s".' % creds.user) = creds.user
gd_client.password = creds.password
self.gd_client = gd_client
def _GetWorksheetKey(self, ss_key, ws_name):
"""Get the worksheet key with name |ws_name| in spreadsheet |ss_key|."""
feed = self.gd_client.GetWorksheetsFeed(ss_key)
# The worksheet key is the last component in the URL (after last '/')
for entry in feed.entry:
if ws_name == entry.title.text:
oper.Die('Unable to find worksheet "%s" in spreadsheet "%s"' %
(ws_name, ss_key))
def GetColumns(self):
"""Return tuple of column names in worksheet.
Note that each returned name has been run through PrepColNameForSS.
return self.columns
def GetColumnIndex(self, colName):
"""Get the column index (starting at 1) for column |colName|"""
# Spreadsheet column indices start at 1, so +1.
return self.columns.index(colName) + self.COLUMN_NUMBER_OFFSET
except ValueError:
return None
def GetRows(self):
"""Return tuple of SpreadsheetRow objects in order."""
return self.rows
def GetRowCacheByCol(self, column):
"""Return a dict for looking up rows by value in |column|.
Each row value is a SpreadsheetRow object.
If more than one row has the same value for |column|, then the
row objects will be in a list in the returned dict.
row_cache = {}
for row in self.GetRows():
col_val = row[column]
current_entry = row_cache.get(col_val, None)
if current_entry and type(current_entry) is list:
elif current_entry:
current_entry = [current_entry, row]
current_entry = row
row_cache[col_val] = current_entry
return row_cache
def InsertRow(self, row):
"""Insert |row| at end of spreadsheet."""
self.gd_client.InsertRow(row, self.ss_key, self.ws_key)
def UpdateRowCellByCell(self, rowIx, row):
"""Replace cell values in row at |rowIx| with those in |row| dict."""
for colName in row:
colIx = self.GetColumnIndex(colName)
if colIx is not None:
self.ReplaceCellValue(rowIx, colIx, row[colName])
def DeleteRow(self, ss_row):
"""Delete the given |ss_row| (must be original spreadsheet row object."""
def ReplaceCellValue(self, rowIx, colIx, val):
"""Replace cell value at |rowIx| and |colIx| with |val|"""
self.gd_client.UpdateCell(rowIx, colIx, val, self.ss_key, self.ws_key)
def ClearCellValue(self, rowIx, colIx):
"""Clear cell value at |rowIx| and |colIx|"""
self.ReplaceCellValue(rowIx, colIx, None)
def ClearColumnWorksheet(self, colIx):
"""Clear column with index |colIX| from current worksheet."""
query = gdata.spreadsheet.service.CellQuery()
query.min_col = str(colIx)
query.max_col = str(colIx)
cells = self.gd_client.GetCellsFeed(self.ss_key, wksht_id=self.ws_key,
batchRequest = gdata.spreadsheet.SpreadsheetsCellsFeed()
for entry in cells.entry:
entry.cell.inputValue = None
self.gd_client.ExecuteBatch(batchRequest, cells.GetBatchLink().href)
def WriteColumnToWorksheet(self, colIx, data):
"""Clear column index |colIx| from worksheet and write |data| to it."""
query = gdata.spreadsheet.service.CellQuery()
query.min_col = str(colIx)
query.max_col = str(colIx)
query.min_row = '1'
query.max_row = str(len(data))
query.return_empty = 'true'
cells = self.gd_client.GetCellsFeed(self.ss_key, wksht_id=self.ws_key,
batchRequest = gdata.spreadsheet.SpreadsheetsCellsFeed()
for entry, value in zip(cells.entry, data):
entry.cell.inputValue = str(value)
self.gd_client.ExecuteBatch(batchRequest, cells.GetBatchLink().href)
class RetrySpreadsheetsService(gdata.spreadsheet.service.SpreadsheetsService):
"""Extend SpreadsheetsService to put retry logic around http request method.
The entire purpose of this class is to remove some flakiness from
interactions with Google Drive spreadsheet service, in the form of
certain 40* and 50* http error responses to http requests. This is
documented in
There are two "request" methods that need to be wrapped in retry logic.
1) The request method on self. Original implementation is in
base class atom.service.AtomService.
2) The request method on self.http_client. The class of self.http_client
can actually vary, so the original implementation of the request
method can also vary.
# pylint: disable=R0904
RETRYABLE_STATUSES = (403, # Forbidden (but retries still seem to help).
500, # Internal server error.
def __init__(self, *args, **kwargs):
gdata.spreadsheet.service.SpreadsheetsService.__init__(self, *args,
# Wrap self.http_client.request with retry wrapper. This request method
# is used by ProgrammaticLogin(), at least.
if hasattr(self, 'http_client'):
self.http_client.request = functools.partial(self._RetryRequest,
self.request = functools.partial(self._RetryRequest, self.request)
def _RetryRequest(self, func, *args, **kwargs):
"""Retry wrapper for bound |func|, passing |args| and |kwargs|.
This retry wrapper can be used for any http request |func| that provides
an http status code via the .status attribute of the returned value.
Retry when the status value on the return object is in RETRYABLE_STATUSES,
and run up to TRY_MAX times. If successful (whether or not retries
were necessary) return the last return value returned from base method.
If unsuccessful return the first return value returned from base method.
first_retval = None
for try_ix in xrange(1, self.TRY_MAX + 1):
retval = func(*args, **kwargs)
if retval.status not in self.RETRYABLE_STATUSES:
return retval
oper.Warning('Retry-able HTTP request failure (status=%d), try %d/%d' %
(retval.status, try_ix, self.TRY_MAX))
if not first_retval:
first_retval = retval
oper.Warning('Giving up on HTTP request after %d tries' % self.TRY_MAX)
return first_retval