| #!/usr/bin/python |
| # Copyright 2014 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Simple client for the Gerrit REST API. |
| |
| Example usage: |
| ./gerrit_client.py -j /tmp/out.json -f json \ |
| -u https://chromium.googlesource.com/chromium/src/+log |
| """ |
| |
| import argparse |
| import json |
| import logging |
| import os |
| import sys |
| import tarfile |
| import time |
| import urllib |
| import urlparse |
| |
| DEPOT_TOOLS = os.path.abspath( |
| os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir, |
| os.pardir)) |
| sys.path.insert(0, DEPOT_TOOLS) |
| |
| from gerrit_util import CreateHttpConn, ReadHttpResponse, ReadHttpJsonResponse |
| |
| |
| def reparse_url(parsed_url, query_params): |
| return urlparse.ParseResult( |
| scheme=parsed_url.scheme, |
| netloc=parsed_url.netloc, |
| path=parsed_url.path, |
| params=parsed_url.params, |
| fragment=parsed_url.fragment, |
| query=urllib.urlencode(query_params, doseq=True)) |
| |
| |
| def gitiles_get(parsed_url, handler, attempts): |
| # This insanity is due to CreateHttpConn interface :( |
| host = parsed_url.netloc |
| path = parsed_url.path |
| if parsed_url.query: |
| path += '?%s' % (parsed_url.query, ) |
| |
| retry_delay_seconds = 1 |
| attempt = 1 |
| while True: |
| try: |
| return handler(CreateHttpConn(host, path)) |
| except Exception as e: |
| if attempt >= attempts: |
| raise |
| logging.exception('Failed to perform Gitiles operation: %s', e) |
| |
| # Retry from previous loop. |
| logging.error('Sleeping %d seconds before retry (%d/%d)...', |
| retry_delay_seconds, attempt, attempts) |
| time.sleep(retry_delay_seconds) |
| retry_delay_seconds *= 2 |
| attempt += 1 |
| |
| |
| def fetch_log_with_paging(query_params, limit, fetch): |
| """Fetches log, possibly requesting multiple pages to do so. |
| |
| Args: |
| query_params (dict): Parameters to use in the request. |
| limit (int): Page size. |
| fetch (function): Function to use to make the requests. |
| |
| Returns: |
| Dict with key "log", whose value is a list of commits. |
| """ |
| # Log api returns {'log': [list of commits], 'next': hash}. |
| last_result = fetch(query_params) |
| commits = last_result['log'] |
| while last_result.get('next') and len(commits) < limit: |
| query_params['s'] = last_result.get('next') |
| last_result = fetch(query_params) |
| # The first commit in `last_result` is not necessarily the parent of the |
| # last commit in result so far! This is because log command can be done on |
| # one file object, for example: |
| # https://gerrit.googlesource.com/gitiles/+log/1c21279f337da8130/COPYING |
| # Even when getting log for the whole repository, there could be merge |
| # commits. |
| commits.extend(last_result['log']) |
| # Use 'next' field (if any) from `last_result`, but commits aggregated |
| # from all the results. This essentially imitates paging with at least |
| # `limit` page size. |
| last_result['log'] = commits |
| logging.debug( |
| 'fetched %d commits, next: %s.', len(commits), |
| last_result.get('next')) |
| return last_result |
| |
| |
| def main(arguments): |
| parser = create_argparser() |
| args = parser.parse_args(arguments) |
| |
| if args.extract_to and args.format != "archive": |
| parser.error('--extract-to requires --format=archive') |
| if not args.extract_to and args.format == "archive": |
| parser.error('--format=archive requires --extract-to') |
| |
| if args.extract_to: |
| # make sure it is absolute and ends with '/' |
| args.extract_to = os.path.join(os.path.abspath(args.extract_to), '') |
| os.makedirs(args.extract_to) |
| |
| parsed_url = urlparse.urlparse(args.url) |
| if not parsed_url.scheme.startswith('http'): |
| parser.error('Invalid URI scheme (expected http or https): %s' % args.url) |
| |
| query_params = {} |
| if parsed_url.query: |
| query_params.update(urlparse.parse_qs(parsed_url.query)) |
| # Force the format specified on command-line. |
| if query_params.get('format'): |
| parser.error('URL must not contain format; use --format command line flag ' |
| 'instead.') |
| query_params['format'] = args.format |
| |
| kwargs = {} |
| accept_statuses = frozenset([int(s) for s in args.accept_statuses.split(',')]) |
| if accept_statuses: |
| kwargs['accept_statuses'] = accept_statuses |
| |
| # Choose handler. |
| if args.format == 'json': |
| def handler(conn): |
| return ReadHttpJsonResponse(conn, **kwargs) |
| elif args.format == 'text': |
| # Text fetching will pack the text into structured JSON. |
| def handler(conn): |
| # Wrap in a structured JSON for export to recipe module. |
| return { |
| 'value': ReadHttpResponse(conn, **kwargs).read() or None, |
| } |
| elif args.format == 'archive': |
| # Archive fetching hooks result to tarfile extraction. This implementation |
| # is able to do a streaming extraction operation without having to buffer |
| # the entire tarfile. |
| def handler(conn): |
| ret = { |
| 'extracted': { |
| 'filecount': 0, |
| 'bytes': 0, |
| }, |
| 'skipped': { |
| 'filecount': 0, |
| 'bytes': 0, |
| 'names': [], |
| } |
| } |
| fileobj = ReadHttpResponse(conn, **kwargs) |
| with tarfile.open(mode='r|*', fileobj=fileobj) as tf: |
| # monkeypatch the TarFile object to allow printing messages and |
| # collecting stats for each extracted file. extractall makes a single |
| # linear pass over the tarfile, which is compatible with |
| # ReadHttpResponse; other naive implementations (such as `getmembers`) |
| # do random access over the file and would require buffering the whole |
| # thing (!!). |
| em = tf._extract_member |
| def _extract_member(tarinfo, targetpath): |
| if not os.path.abspath(targetpath).startswith(args.extract_to): |
| print('Skipping %s' % (tarinfo.name,)) |
| ret['skipped']['filecount'] += 1 |
| ret['skipped']['bytes'] += tarinfo.size |
| ret['skipped']['names'].append(tarinfo.name) |
| return |
| print('Extracting %s' % (tarinfo.name,)) |
| ret['extracted']['filecount'] += 1 |
| ret['extracted']['bytes'] += tarinfo.size |
| return em(tarinfo, targetpath) |
| tf._extract_member = _extract_member |
| tf.extractall(args.extract_to) |
| return ret |
| |
| if args.log_start: |
| query_params['s'] = args.log_start |
| |
| def fetch(query_params): |
| parsed_url_with_query = reparse_url(parsed_url, query_params) |
| result = gitiles_get(parsed_url_with_query, handler, args.attempts) |
| if not args.quiet: |
| logging.info('Read from %s: %s', parsed_url_with_query.geturl(), result) |
| return result |
| |
| if args.log_limit: |
| if args.format != 'json': |
| parser.error('--log-limit works with json format only') |
| result = fetch_log_with_paging(query_params, args.log_limit, fetch) |
| else: |
| # Either not a log request, or don't care about paging. |
| # So, just return whatever is fetched the first time. |
| result = fetch(query_params) |
| |
| with open(args.json_file, 'w') as json_file: |
| json.dump(result, json_file) |
| return 0 |
| |
| |
| def create_argparser(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument( |
| '-j', '--json-file', |
| help='Path to json file for output.') |
| parser.add_argument( |
| '--extract-to', |
| help='Local path to extract archive url. Must not exist.') |
| parser.add_argument( |
| '-f', '--format', required=True, choices=('json', 'text', 'archive')) |
| parser.add_argument( |
| '-u', '--url', required=True, |
| help='Url of gitiles. For example, ' |
| 'https://chromium.googlesource.com/chromium/src/+refs. ' |
| 'Insert a/ after domain for authenticated access.') |
| parser.add_argument( |
| '-a', '--attempts', type=int, default=1, |
| help='The number of attempts to make (with exponential backoff) before ' |
| 'failing. If several requests are to be made, applies per each ' |
| 'request separately.') |
| parser.add_argument( |
| '-q', '--quiet', action='store_true', |
| help='Suppress file contents logging output.') |
| parser.add_argument( |
| '--log-limit', type=int, default=None, |
| help='Follow gitiles pages to fetch at least this many commits. By ' |
| 'default, first page with unspecified number of commits is fetched. ' |
| 'Only for https://<hostname>/<repo>/+log/... gitiles request.') |
| parser.add_argument( |
| '--log-start', |
| help='If given, continue fetching log by paging from this commit hash. ' |
| 'This value can be typically be taken from json result of previous ' |
| 'call to log, which returns next page start commit as "next" key. ' |
| 'Only for https://<hostname>/<repo>/+log/... gitiles request.') |
| parser.add_argument( |
| '--accept-statuses', type=str, default='200', |
| help='Comma-separated list of Status codes to accept as "successful" ' |
| 'HTTP responses.') |
| return parser |
| |
| |
| if __name__ == '__main__': |
| logging.basicConfig() |
| logging.getLogger().setLevel(logging.INFO) |
| sys.exit(main(sys.argv[1:])) |