Add retry in downloading Android artifacts.
BUG=chromium:512668
TEST=local test
Change-Id: Ic084079fd9eac71ac4cca40a702f1337d9eca2aa
Reviewed-on: https://chromium-review.googlesource.com/312503
Commit-Ready: Dan Shi <dshi@google.com>
Tested-by: Dan Shi <dshi@google.com>
Reviewed-by: Dan Shi <dshi@google.com>
diff --git a/android_build.py b/android_build.py
index 597d8bb..dd1a931 100644
--- a/android_build.py
+++ b/android_build.py
@@ -9,13 +9,24 @@
import apiclient
import httplib2
import io
+import subprocess
from apiclient import discovery
from oauth2client.client import SignedJwtAssertionCredentials
+import retry
+
+
CREDENTIAL_SCOPE = 'https://www.googleapis.com/auth/androidbuild.internal'
DEFAULT_BUILDER = 'androidbuildinternal'
DEFAULT_CHUNKSIZE = 20*1024*1024
+# Maximum attempts to interact with Launch Control API.
+MAX_ATTEMPTS = 10
+# Timeout in minutes for downloading attempt.
+DOWNLOAD_TIMEOUT_MINS = 30
+# Timeout in minutes for API query.
+QUERY_TIMEOUT_MINS = 1
+
class AndroidBuildFetchError(Exception):
"""Exception to raise when failed to make calls to Android build server."""
@@ -28,6 +39,7 @@
credential_info = None
@classmethod
+ @retry.retry(Exception, timeout_min=QUERY_TIMEOUT_MINS)
def _GetServiceObject(cls):
"""Returns a service object with given credential information."""
if not cls.credential_info:
@@ -37,8 +49,8 @@
cls.credential_info['client_email'],
cls.credential_info['private_key'], CREDENTIAL_SCOPE)
http_auth = credentials.authorize(httplib2.Http())
- service_obj = discovery.build(DEFAULT_BUILDER, 'v1', http=http_auth)
- return service_obj
+ return discovery.build(DEFAULT_BUILDER, 'v1', http=http_auth)
+
@classmethod
def _VerifyBranch(cls, service_obj, branch, build_id, target):
@@ -56,7 +68,7 @@
"""
builds = service_obj.build().list(
buildType='submitted', branch=branch, buildId=build_id, target=target,
- maxResults=0).execute()
+ maxResults=0).execute(num_retries=MAX_ATTEMPTS)
if not builds:
raise AndroidBuildFetchError(
'Failed to locate build with branch %s, build id %s and target %s.' %
@@ -91,12 +103,13 @@
# Get all artifacts for the given build_id and target.
artifacts = service_obj.buildartifact().list(
buildType='submitted', buildId=build_id, target=target,
- attemptId='latest', maxResults=0).execute()
+ attemptId='latest', maxResults=0).execute(num_retries=MAX_ATTEMPTS)
return artifacts['artifacts']
@classmethod
+ @retry.retry(Exception, timeout_min=DOWNLOAD_TIMEOUT_MINS)
def Download(cls, branch, build_id, target, resource_id, dest_file):
- """Get the list of artifacts for given build id and target.
+ """Download the list of artifacts for given build id and target.
Args:
branch: branch of the desired build.
@@ -108,6 +121,9 @@
service_obj = cls._GetServiceObject()
cls._VerifyBranch(service_obj, branch, build_id, target)
+ # Delete partially downloaded file if exists.
+ subprocess.call(['rm', '-rf', dest_file])
+
# TODO(dshi): Add retry logic here to avoid API flakes.
download_req = service_obj.buildartifact().get_media(
buildType='submitted', buildId=build_id, target=target,
@@ -117,9 +133,12 @@
fh, download_req, chunksize=DEFAULT_CHUNKSIZE)
done = None
while not done:
- _, done = downloader.next_chunk()
+ _, done = downloader.next_chunk(num_retries=MAX_ATTEMPTS)
+
@classmethod
+ @retry.retry(Exception, timeout_min=QUERY_TIMEOUT_MINS,
+ blacklist=[AndroidBuildFetchError])
def GetLatestBuildID(cls, target, branch):
"""Get the latest build ID for the given target and branch.
@@ -134,7 +153,7 @@
service_obj = cls._GetServiceObject()
builds = service_obj.build().list(
buildType='submitted', branch=branch, target=target, successful=True,
- maxResults=1).execute()
+ maxResults=1).execute(num_retries=MAX_ATTEMPTS)
if not builds or not builds['builds']:
raise AndroidBuildFetchError(
'Failed to locate build with branch %s and target %s.' %
diff --git a/retry.py b/retry.py
new file mode 100644
index 0000000..a0b51fb
--- /dev/null
+++ b/retry.py
@@ -0,0 +1,79 @@
+# Copyright 2015 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Basic infrastructure for implementing retries.
+
+This code is adopted from autotest: client/common_lib/cros/retry.py
+This implementation removes the timeout feature as that requires the retry to
+be done in main thread. For devserver, the call is handled in a thread kicked
+off by cherrypy, so timeotu can't be supported.
+"""
+
+from __future__ import print_function
+
+import cherrypy
+import random
+import sys
+import time
+
+
+def retry(ExceptionToCheck, timeout_min=1.0, delay_sec=3, blacklist=None):
+ """Retry calling the decorated function using a delay with jitter.
+
+ Will raise RPC ValidationError exceptions from the decorated
+ function without retrying; a malformed RPC isn't going to
+ magically become good. Will raise exceptions in blacklist as well.
+
+ original from:
+ http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
+
+ Args:
+ ExceptionToCheck: the exception to check. May be a tuple of exceptions to
+ check.
+ timeout_min: timeout in minutes until giving up.
+ delay_sec: pre-jittered delay between retries in seconds. Actual delays
+ will be centered around this value, ranging up to 50% off this
+ midpoint.
+ blacklist: a list of exceptions that will be raised without retrying
+ """
+ def deco_retry(func):
+ random.seed()
+
+ def delay():
+ """'Jitter' the delay, up to 50% in either direction."""
+ random_delay = random.uniform(.5 * delay_sec, 1.5 * delay_sec)
+ cherrypy.log('Retrying in %f seconds...' % random_delay)
+ time.sleep(random_delay)
+
+ def func_retry(*args, **kwargs):
+ # Used to cache exception to be raised later.
+ exc_info = None
+ delayed_enabled = False
+ exception_tuple = () if blacklist is None else tuple(blacklist)
+ start_time = time.time()
+ remaining_time = timeout_min * 60
+
+ while remaining_time > 0:
+ if delayed_enabled:
+ delay()
+ else:
+ delayed_enabled = True
+ try:
+ # Clear the cache
+ exc_info = None
+ return func(*args, **kwargs)
+ except exception_tuple:
+ raise
+ except ExceptionToCheck as e:
+ cherrypy.log('%s(%s)' % (e.__class__, e))
+ # Cache the exception to be raised later.
+ exc_info = sys.exc_info()
+
+ remaining_time = int(timeout_min*60 - (time.time() - start_time))
+
+ # Raise the cached exception with original backtrace.
+ raise exc_info[0], exc_info[1], exc_info[2]
+
+ return func_retry # true decorator
+ return deco_retry