blob: 05be01bcf5e6d2a8a65a849b8cc1be3e4e88171c [file] [log] [blame]
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Basic infrastructure for implementing retries."""
from __future__ import print_function
import random
import sys
import time
from chromite.lib import cros_build_lib
from chromite.lib import cros_logging as logging
def GenericRetry(handler, max_retry, functor, *args, **kwargs):
"""Generic retry loop w/ optional break out depending on exceptions.
To retry based on the return value of |functor| see the timeout_util module.
Keep in mind that the total sleep time will be the triangular value of
max_retry multiplied by the sleep value. e.g. max_retry=5 and sleep=10
will be T5 (i.e. 5+4+3+2+1) times 10, or 150 seconds total. Rather than
use a large sleep value, you should lean more towards large retries and
lower sleep intervals, or by utilizing backoff_factor.
Args:
handler: A functor invoked w/ the exception instance that
functor(*args, **kwargs) threw. If it returns True, then a
retry is attempted. If False, the exception is re-raised.
max_retry: A positive integer representing how many times to retry
the command before giving up. Worst case, the command is invoked
(max_retry + 1) times before failing.
functor: A callable to pass args and kwargs to.
log_all_retries: when True, log all retries.
args: Positional args passed to functor.
kwargs: Optional args passed to functor.
sleep: Optional keyword. Multiplier for how long to sleep between
retries; will delay (1*sleep) the first time, then (2*sleep),
continuing via attempt * sleep.
backoff_factor: Optional keyword. If supplied and > 1, subsequent sleeps
will be of length (backoff_factor ^ (attempt - 1)) * sleep,
rather than the default behavior of attempt * sleep.
raise_first_exception_on_failure: Optional boolean which determines which
exception is raised upon failure after
retries. If True, the first exception
that was encountered. If False, the
final one. Default: True.
status_callback: Optional callback invoked after each call of |functor|.
It takes two arguments: |attempt| which is the index of the last
attempt (0-based), and |success| representing whether the last attempt
was successfully done or not. If the callback raises an exception,
no further retry will be made, and the exception will be propagated to
the caller.
Returns:
Whatever functor(*args, **kwargs) returns.
Raises:
Exception: Whatever exceptions functor(*args, **kwargs) throws and
isn't suppressed is raised. Note that the first exception encountered
is what's thrown.
"""
def delay():
"""'Jitter' the delay, up to 50% in either direction."""
random_delay = random.uniform(.5 * delay_sec, 1.5 * delay_sec)
logging.debug('Retrying in %f seconds...', random_delay)
time.sleep(random_delay)
log_all_retries = kwargs.pop('log_all_retries', False)
sleep = kwargs.pop('sleep', 0)
if sleep < 0:
raise ValueError('sleep must be >= 0')
if max_retry < 0:
raise ValueError('max_retry needs to be zero or more: %s' % max_retry)
backoff_factor = kwargs.pop('backoff_factor', 1)
if backoff_factor < 1:
raise ValueError('backoff_factor must be 1 or greater: %s'
% backoff_factor)
status_callback = kwargs.pop(
'status_callback', lambda attempt, success: None)
raise_first_exception_on_failure = kwargs.pop(
'raise_first_exception_on_failure', True)
delay_sec = kwargs.pop('delay_sec', 0)
exception_to_raise = kwargs.pop('exception_to_raise', None)
exc_info = None
for attempt in xrange(max_retry + 1):
if attempt > 0 and delay_sec:
delay()
if attempt and log_all_retries:
fname = functor.__name__ if hasattr(functor, '__name__') else "<nameless>"
logging.debug('retrying %s (attempt %d)', fname, attempt + 1)
if attempt and sleep:
if backoff_factor > 1:
sleep_time = sleep * backoff_factor ** (attempt - 1)
else:
sleep_time = sleep * attempt
time.sleep(sleep_time)
try:
ret = functor(*args, **kwargs)
except Exception as e:
# Note we're not snagging BaseException, so MemoryError/KeyboardInterrupt
# and friends don't enter this except block.
# If raise_first_exception_on_failure, we intentionally ignore
# any failures in later attempts since we'll throw the original
# failure if all retries fail.
if exc_info is None or not raise_first_exception_on_failure:
exc_info = sys.exc_info()
try:
status_callback(attempt, False)
except Exception:
# In case callback raises an exception, quit the retry.
# For further investigation, log the original exception here.
logging.error('Ending retry due to Exception raised by a callback. '
'Original exception raised during the attempt is '
'as follows: ',
exc_info=exc_info)
# Reraise the exception raised from the status_callback.
raise
if not handler(e):
logging.debug('ending retries with error: %s(%s)', e.__class__, e)
break
logging.debug('%s(%s)', e.__class__, e)
else:
# Run callback in outside of try's main block, in order to avoid
# accidental capture of an Exception which may be raised in callback.
status_callback(attempt, True)
return ret
# Did not return, meaning all attempts failed. Raise the exception.
if exception_to_raise:
raise exception_to_raise('%s: %s' % (exc_info[0], exc_info[1]))
raise exc_info[0], exc_info[1], exc_info[2]
def RetryException(exc_retry, max_retry, functor, *args, **kwargs):
"""Convenience wrapper for GenericRetry based on exceptions.
Args:
exc_retry: A class (or tuple of classes). If the raised exception
is the given class(es), a retry will be attempted. Otherwise,
the exception is raised.
max_retry: See GenericRetry.
functor: See GenericRetry.
*args: See GenericRetry.
**kwargs: See GenericRetry.
"""
if not isinstance(exc_retry, (tuple, type)):
raise TypeError('exc_retry should be an exception (or tuple), not %r' %
exc_retry)
#pylint: disable=E0102
def exc_retry(exc, values=exc_retry):
return isinstance(exc, values)
return GenericRetry(exc_retry, max_retry, functor, *args, **kwargs)
def RetryCommand(functor, max_retry, *args, **kwargs):
"""Wrapper for RunCommand that will retry a command
Args:
functor: RunCommand function to run; retries will only occur on
RunCommandError exceptions being thrown.
max_retry: A positive integer representing how many times to retry
the command before giving up. Worst case, the command is invoked
(max_retry + 1) times before failing.
sleep: Optional keyword. Multiplier for how long to sleep between
retries; will delay (1*sleep) the first time, then (2*sleep),
continuing via attempt * sleep.
retry_on: If provided, we will retry on any exit codes in the given list.
Note: A process will exit with a negative exit code if it is killed by a
signal. By default, we retry on all non-negative exit codes.
error_check: Optional callback to check the error output. Return None to
fall back to |retry_on|, or True/False to set the retry directly.
log_retries: Whether to log a warning when retriable errors occur.
args: Positional args passed to RunCommand; see RunCommand for specifics.
kwargs: Optional args passed to RunCommand; see RunCommand for specifics.
Returns:
A CommandResult object.
Raises:
Exception: Raises RunCommandError on error with optional error_message.
"""
values = kwargs.pop('retry_on', None)
error_check = kwargs.pop('error_check', lambda x: None)
log_retries = kwargs.pop('log_retries', True)
def ShouldRetry(exc):
"""Return whether we should retry on a given exception."""
if not ShouldRetryCommandCommon(exc):
return False
if values is None and exc.result.returncode < 0:
logging.info('Child process received signal %d; not retrying.',
-exc.result.returncode)
return False
ret = error_check(exc)
if ret is not None:
return ret
if values is None or exc.result.returncode in values:
if log_retries:
logging.warning('Command failed with retriable error.\n%s', exc)
return True
return False
return GenericRetry(ShouldRetry, max_retry, functor, *args, **kwargs)
def ShouldRetryCommandCommon(exc):
"""Returns whether any RunCommand should retry on a given exception."""
if not isinstance(exc, cros_build_lib.RunCommandError):
return False
if exc.result.returncode is None:
logging.error('Child process failed to launch; not retrying:\n'
'command: %s', exc.result.cmdstr)
return False
return True
def RunCommandWithRetries(max_retry, *args, **kwargs):
"""Wrapper for RunCommand that will retry a command
Args:
max_retry: See RetryCommand and RunCommand.
*args: See RetryCommand and RunCommand.
**kwargs: See RetryCommand and RunCommand.
Returns:
A CommandResult object.
Raises:
Exception: Raises RunCommandError on error with optional error_message.
"""
return RetryCommand(cros_build_lib.RunCommand, max_retry, *args, **kwargs)
class DownloadError(Exception):
"""Fetching file via curl failed"""
def RunCurl(curl_args, *args, **kwargs):
"""Runs curl and wraps around all necessary hacks.
Args:
curl_args: Command line to pass to curl. Must be list of str.
*args, **kwargs: See RunCommandWithRetries and RunCommand.
Note that retry_on, error_check, sleep, backoff_factor cannot be
overwritten.
Returns:
A CommandResult object.
Raises:
DownloadError: Whenever curl fails for any reason.
"""
cmd = ['curl'] + curl_args
# These values were discerned via scraping the curl manpage; they're all
# retry related (dns failed, timeout occurred, etc, see the manpage for
# exact specifics of each).
# Note we allow 22 to deal w/ 500's- they're thrown by google storage
# occasionally. This is also thrown when getting 4xx, but curl doesn't
# make it easy to differentiate between them.
# Note we allow 35 to deal w/ Unknown SSL Protocol error, thrown by
# google storage occasionally.
# Finally, we do not use curl's --retry option since it generally doesn't
# actually retry anything; code 18 for example, it will not retry on.
retriable_exits = frozenset([5, 6, 7, 15, 18, 22, 26, 28, 35, 52, 56])
def _CheckExit(exc):
"""Filter out specific error codes when getting exit 22
Curl will exit(22) for a wide range of HTTP codes -- both the 4xx and 5xx
set. For the 4xx, we don't want to retry. We have to look at the output.
"""
if exc.result.returncode == 22:
return '404 Not Found' not in exc.result.error
# We'll let the common exit code filter do the right thing.
return None
try:
return RunCommandWithRetries(
10, cmd, retry_on=retriable_exits, error_check=_CheckExit,
sleep=3, backoff_factor=1.6, *args, **kwargs)
except cros_build_lib.RunCommandError as e:
if e.result.returncode in (51, 58, 60):
# These are the return codes of failing certs as per 'man curl'.
raise DownloadError(
'Download failed with certificate error? Try "sudo c_rehash".')
raise DownloadError('Curl failed w/ exit code %i: %s' %
(e.result.returncode, e.result.error))