| # -*- coding: utf-8 -*- |
| # Copyright (c) 2013 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Functions for implementing timeouts.""" |
| |
| from __future__ import print_function |
| |
| import contextlib |
| import datetime |
| import functools |
| import signal |
| import threading |
| import time |
| |
| from chromite.lib import cros_logging as logging |
| |
| |
| class TimeoutError(Exception): # pylint: disable=redefined-builtin |
| """Raises when code within Timeout has been run too long.""" |
| |
| |
| def Timedelta(num, zero_ok=False): |
| """Normalize |num| (in seconds) into a datetime.timedelta.""" |
| if not isinstance(num, datetime.timedelta): |
| num = datetime.timedelta(seconds=num) |
| if zero_ok: |
| if num.total_seconds() < 0: |
| raise ValueError('timing must be >= 0, not %s' % (num,)) |
| else: |
| if num.total_seconds() <= 0: |
| raise ValueError('timing must be greater than 0, not %s' % (num,)) |
| return num |
| |
| |
| def _ScheduleTimer(seconds, interval=0): |
| """Schedules the timer to raise SIGALRM. |
| |
| If |seconds| is less than minimum resolution, it would be round up to the |
| resolution. |
| Note: if the seconds is very short, the signal can be delivered almost |
| immediately, so that handler can be called even in this stack. |
| |
| Args: |
| seconds: How long to wait before sending SIGALRM, in seconds. |
| interval: (Optional) interval schedule for the timer. |
| """ |
| # Min resolution of itimer. See man setitimer(2) for details. |
| MIN_SECONDS = 0.000001 |
| signal.setitimer(signal.ITIMER_REAL, max(seconds, MIN_SECONDS), interval) |
| |
| |
| def _CancelTimer(): |
| """Cancels the currently scheduled SIGALRM timer. |
| |
| Returns: |
| Previous timer, which is a pair of scheduled timeout and interval. |
| """ |
| return signal.setitimer(signal.ITIMER_REAL, 0) |
| |
| |
| @contextlib.contextmanager |
| def Timeout(max_run_time, |
| error_message='Timeout occurred- waited %(time)s seconds.', |
| reason_message=None): |
| """ContextManager that alarms if code is ran for too long. |
| |
| Timeout can run nested and raises a TimeoutException if the timeout |
| is reached. Timeout can also nest underneath FatalTimeout. |
| |
| Args: |
| max_run_time: How long to wait before sending SIGALRM. May be a number |
| (in seconds, can be fractional) or a datetime.timedelta object. |
| error_message: Optional string to wrap in the TimeoutError exception on |
| timeout. If not provided, default template will be used. |
| reason_message: Optional string to be appended to the TimeoutError |
| error_message string. Provide a custom message here if you want to have |
| a purpose-specific message without overriding the default template in |
| |error_message|. |
| """ |
| max_run_time = Timedelta(max_run_time).total_seconds() |
| if reason_message: |
| error_message += reason_message |
| |
| # pylint: disable=unused-argument |
| def kill_us(sig_num, frame): |
| raise TimeoutError(error_message % {'time': max_run_time}) |
| |
| previous_time = time.time() |
| previous_timeout, previous_interval = _CancelTimer() |
| original_handler = signal.signal(signal.SIGALRM, kill_us) |
| |
| try: |
| # Signal the min in case the leftover time was smaller than this timeout. |
| # This needs to be called in try block, otherwise, finally may not be |
| # called in case that the timeout duration is too short. |
| _ScheduleTimer(min(previous_timeout or float('inf'), max_run_time)) |
| yield |
| finally: |
| # Cancel the alarm request and restore the original handler. |
| _CancelTimer() |
| signal.signal(signal.SIGALRM, original_handler) |
| |
| # Ensure the previous handler will fire if it was meant to. |
| if previous_timeout: |
| remaining_timeout = previous_timeout - (time.time() - previous_time) |
| # It is ok to pass negative remaining_timeout. Please see also comments |
| # of _ScheduleTimer for more details. |
| _ScheduleTimer(remaining_timeout, previous_interval) |
| |
| |
| @contextlib.contextmanager |
| def FatalTimeout(max_run_time, display_message=None): |
| """ContextManager that exits the program if code is run for too long. |
| |
| This implementation is fairly simple, thus multiple timeouts |
| cannot be active at the same time. |
| |
| Additionally, if the timeout has elapsed, it'll trigger a SystemExit |
| exception within the invoking code, ultimately propagating that past |
| itself. If the underlying code tries to suppress the SystemExit, once |
| a minute it'll retrigger SystemExit until control is returned to this |
| manager. |
| |
| Args: |
| max_run_time: How long to wait. May be a number (in seconds, can be |
| fractional) or a datetime.timedelta object. |
| display_message: Optional string message to be included in timeout |
| error message, if the timeout occurs. |
| """ |
| max_run_time = Timedelta(max_run_time).total_seconds() |
| |
| # pylint: disable=unused-argument |
| def kill_us(sig_num, frame): |
| # While this SystemExit *should* crash it's way back up the |
| # stack to our exit handler, we do have live/production code |
| # that uses blanket except statements which could suppress this. |
| # As such, keep scheduling alarms until our exit handler runs. |
| # Note that there is a potential conflict via this code, and |
| # run's kill_timeout; thus we set the alarming interval |
| # fairly high. |
| _ScheduleTimer(60) |
| |
| # The cbuildbot stage that gets aborted by this timeout should be treated as |
| # failed by buildbot. |
| error_message = ('Timeout occurred- waited %i seconds, failing.' % |
| max_run_time) |
| if display_message: |
| error_message += ' Timeout reason: %s' % display_message |
| logging.PrintBuildbotStepFailure() |
| logging.error(error_message) |
| raise SystemExit(error_message) |
| |
| if signal.getitimer(signal.ITIMER_REAL)[0]: |
| raise Exception('FatalTimeout cannot be used in parallel to other alarm ' |
| 'handling code; failing') |
| |
| original_handler = signal.signal(signal.SIGALRM, kill_us) |
| try: |
| _ScheduleTimer(max_run_time) |
| yield |
| finally: |
| # Cancel the alarm request and restore the original handler. |
| _CancelTimer() |
| signal.signal(signal.SIGALRM, original_handler) |
| |
| |
| def TimeoutDecorator(max_time): |
| """Decorator used to ensure a func is interrupted if it's running too long.""" |
| # Save off the built-in versions of time.time, signal.signal, and |
| # signal.alarm, in case they get mocked out later. We want to ensure that |
| # tests don't accidentally mock out the functions used by Timeout. |
| def _Save(): |
| return (time.time, signal.signal, signal.setitimer, signal.getitimer, |
| signal.SIGALRM, signal.ITIMER_REAL) |
| def _Restore(values): |
| (time.time, signal.signal, signal.setitimer, signal.getitimer, |
| signal.SIGALRM, signal.ITIMER_REAL) = values |
| builtins = _Save() |
| |
| def NestedTimeoutDecorator(func): |
| @functools.wraps(func) |
| def TimeoutWrapper(*args, **kwargs): |
| new = _Save() |
| try: |
| _Restore(builtins) |
| with Timeout(max_time): |
| _Restore(new) |
| try: |
| return func(*args, **kwargs) |
| finally: |
| _Restore(builtins) |
| finally: |
| _Restore(new) |
| |
| return TimeoutWrapper |
| |
| return NestedTimeoutDecorator |
| |
| |
| def WaitForReturnTrue(*args, **kwargs): |
| """Periodically run a function, waiting in between runs. |
| |
| Continues to run until the function returns True. |
| |
| Args: |
| See WaitForReturnValue([True], ...) |
| |
| Raises: |
| TimeoutError when the timeout is exceeded. |
| """ |
| WaitForReturnValue([True], *args, **kwargs) |
| |
| |
| def WaitForReturnValue(values, *args, **kwargs): |
| """Periodically run a function, waiting in between runs. |
| |
| Continues to run until the function return value is in the list |
| of accepted |values|. See WaitForSuccess for more details. |
| |
| Args: |
| values: A list or set of acceptable return values. |
| *args, **kwargs: See WaitForSuccess for remaining arguments. |
| |
| Returns: |
| The value most recently returned by |func|. |
| |
| Raises: |
| TimeoutError when the timeout is exceeded. |
| """ |
| def _Retry(return_value): |
| return return_value not in values |
| |
| return WaitForSuccess(_Retry, *args, **kwargs) |
| |
| |
| def WaitForSuccess(retry_check, func, timeout, period=1, side_effect_func=None, |
| func_args=None, func_kwargs=None, fallback_timeout=10): |
| """Periodically run a function, waiting in between runs. |
| |
| Continues to run given function until return value is accepted by retry check. |
| |
| To retry based on raised exceptions see GenericRetry in retry_util. |
| |
| Args: |
| retry_check: A functor that will be passed the return value of |func| as |
| the only argument. If |func| should be retried |retry_check| should |
| return True. |
| func: The function to run to test for a value. |
| timeout: The maximum amount of time to wait. May be a number (in seconds) |
| or a datetime.timedelta object. |
| period: How long between calls to |func|. May be a number (in seconds) or |
| a datetime.timedelta object. |
| side_effect_func: Optional function to be called between polls of func, |
| typically to output logging messages. The remaining time will be passed |
| as a datetime.timedelta object. |
| func_args: Optional list of positional arguments to be passed to |func|. |
| func_kwargs: Optional dictionary of keyword arguments to be passed to |
| |func|. |
| fallback_timeout: We set a secondary timeout based on sigalarm this many |
| seconds after the initial timeout. This should NOT be |
| considered robust, but can allow timeouts inside blocking |
| methods. |
| |
| Returns: |
| The value most recently returned by |func| that was not flagged for retry. |
| |
| Raises: |
| TimeoutError when the timeout is exceeded. |
| """ |
| timeout = Timedelta(timeout, zero_ok=True) |
| period = Timedelta(period, zero_ok=True) |
| fallback_timeout = Timedelta(fallback_timeout) |
| func_args = func_args or [] |
| func_kwargs = func_kwargs or {} |
| |
| end = datetime.datetime.now() + timeout |
| |
| # pylint: disable=protected-access |
| # It is used to get the main thread '_MainThread'. Without python 3.4, there |
| # may be no perfect solutions. See this discussion for details: |
| # http://stackoverflow.com/questions/23206787. |
| is_main_thread = isinstance(threading.current_thread(), |
| threading._MainThread) |
| # pylint: enable=protected-access |
| def retry(): |
| while True: |
| # Guarantee we always run at least once. |
| value = func(*func_args, **func_kwargs) |
| if not retry_check(value): |
| return value |
| |
| # Run the user's callback func if available. |
| if side_effect_func: |
| delta = end - datetime.datetime.now() |
| if delta.total_seconds() < 0: |
| delta = datetime.timedelta(seconds=0) |
| side_effect_func(delta) |
| |
| # If we're just going to sleep past the timeout period, abort now. |
| delta = end - datetime.datetime.now() |
| if delta <= period: |
| raise TimeoutError('Timed out after %s' % timeout) |
| |
| time.sleep(period.total_seconds()) |
| |
| if not is_main_thread: |
| # Warning: the function here is not working in the main thread. Since |
| # signal only works in main thread, this function may run longer than |
| # timeout or even hang. |
| return retry() |
| else: |
| # Use a sigalarm after an extra delay, in case a function we call is |
| # blocking for some reason. This should NOT be considered reliable. |
| with Timeout(timeout + fallback_timeout): |
| return retry() |