blob: cda2da544ef1c1218ace0138183a664d48c61d3e [file] [log] [blame]
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import logging, os, re, time
import common
from autotest_lib.client.bin import utils
from autotest_lib.client.common_lib import error, enum
from autotest_lib.client.cros import cros_logging, rtc, sys_power
from autotest_lib.client.cros import flimflam_test_path
import flimflam
class SuspendFailure(error.TestFail):
"""Base class for a failure during a single suspend/resume cycle."""
pass
class SuspendAbort(SuspendFailure):
"""Suspend took too long, got wakeup event (RTC tick) before it was done."""
pass
class HwClockError(SuspendFailure):
"""Known bug with firmware messing up RTC interrupts (crosbug.com/36004)"""
AFFECTED_BOARDS = ['LUMPY', 'STUMPY', 'KIEV']
@staticmethod
def is_affected():
"""Returns True iff the current board is known to be affected."""
return utils.get_board() in HwClockError.AFFECTED_BOARDS
class FirmwareError(SuspendFailure):
"""String 'ERROR' found in firmware log after resume."""
WHITELIST = [r'PNP: 002e\.4 70 irq size: 0x0000000001 not assigned']
class KernelError(SuspendFailure):
"""Kernel WARN_ON() hit during suspend/resume."""
pass
class Suspender(object):
"""Class for suspend/resume measurements.
Public attributes:
disconnect_3G_time: Amount of seconds it took to disable 3G.
successes[]: List of timing measurement dicts from successful suspends.
failures[]: List of SuspendFailure exceptions from failed suspends.
device_times[]: List of individual device suspend/resume time dicts.
Public methods:
suspend: Do a suspend/resume cycle. Return timing measurement dict.
Private attributes:
_log_reader: LogReader that is set to start right before suspending.
_use_dbus: Set to use asynchronous DBus method for suspending.
_throw: Set to have SuspendFailure exceptions raised to the caller.
_reset_pm_print_times: Set to deactivate pm_print_times after the test.
_restart_tlsdated: Set to restart tlsdated after the test.
Private methods:
__init__: Shuts off tlsdated for duration of test, disables 3G
__del__: Restore tlsdated (must run eventually, but GC delay no problem)
_set_pm_print_times: Enable/disable kernel device suspend timing output.
_check_failure_log: Check /sys/.../suspend_stats for new failures.
_ts: Returns a timestamp from /var/run/power_manager/last_resume_timings
_hwclock_ts: Read RTC timestamp left on resume in hwclock-on-resume
_device_resume_time: Read seconds overall device resume took from logs.
_individual_device_times: Reads individual device suspend/resume times.
"""
def __init__(self, use_dbus=False, throw=False, device_times=False):
"""Prepare environment for suspending."""
self.disconnect_3G_time = 0
self.successes = []
self.failures = []
self._use_dbus = use_dbus
self._throw = throw
self._reset_pm_print_times = False
self._restart_tlsdated = False
self._log_reader = cros_logging.LogReader()
if device_times:
self.device_times = []
# stop tlsdated, make sure we/hwclock have /dev/rtc for ourselves
if utils.system_output('initctl status tlsdated').find('start') != -1:
utils.system('initctl stop tlsdated')
self._restart_tlsdated = True
# prime powerd_suspend RTC timestamp saving and make sure hwclock works
utils.open_write_close('/var/run/power_manager/hwclock-on-resume', '')
hwclock_output = utils.system_output('hwclock -r --debug --utc',
ignore_status=True)
if hwclock_output.find('Using /dev interface') == -1:
raise error.TestError('hwclock cannot find rtc: ' + hwclock_output)
# activate device suspend timing debug output
if hasattr(self, 'device_times'):
if not int(utils.read_one_line('/sys/power/pm_print_times')):
self._set_pm_print_times(True)
self._reset_pm_print_times = True
# Shut down 3G to remove its variability from suspend time measurements
flim = flimflam.FlimFlam()
service = flim.FindCellularService(0)
if service:
logging.info('Found 3G interface, disconnecting.')
start_time = time.time()
(success, status) = flim.DisconnectService(
service=service, wait_timeout=60)
if success:
logging.info('3G disconnected successfully.')
self.disconnect_3G_time = time.time() - start_time
else:
logging.error('Could not disconnect: %s.' % status)
self.disconnect_3G_time = -1
def __del__(self):
"""Restore normal environment (not turning 3G back on for now...)"""
os.remove('/var/run/power_manager/hwclock-on-resume')
if self._restart_tlsdated:
utils.system('initctl start tlsdated')
if self._reset_pm_print_times:
self._set_pm_print_times(False)
def _set_pm_print_times(self, on):
"""Enable/disable extra suspend timing output from powerd to syslog."""
if utils.system('echo %s > /sys/power/pm_print_times' % int(bool(on)),
ignore_status=True):
logging.warn('Failed to set pm_print_times to %s' % bool(on))
del self.device_times
self._reset_pm_print_times = False
else:
logging.info('Device resume times set to %s' % bool(on))
def _ts(self, name, retries=50, sleep_seconds=0.2):
"""Searches logs for last timestamp with a given suspend message."""
# Occasionally need to retry due to races from process wakeup order
for retry in xrange(retries + 1):
try:
f = open('/var/run/power_manager/last_resume_timings')
for line in f:
words = line.split('=')
if name == words[0]:
return float(words[1]);
except IOError:
pass
time.sleep(sleep_seconds)
raise error.TestError('Could not find %s entry.' % name)
def _hwclock_ts(self, not_before, retries=3):
"""Read the RTC resume timestamp saved by powerd_suspend."""
path = '/var/run/power_manager/hwclock-on-resume'
for _ in xrange(retries + 1):
if os.path.exists(path):
match = re.search(r'([0-9]+) seconds since .+ (-?[0-9.]+) sec',
utils.read_file(path), re.DOTALL)
if match:
seconds = int(match.group(1)) + float(match.group(2))
if seconds < not_before: continue
logging.debug('RTC resume timestamp read: %f' % seconds)
return seconds
time.sleep(0.2)
logging.debug('HwClock failure, dumping nvram:\n' +
utils.system_output('mosys nvram dump', ignore_status=True))
raise HwClockError('RTC timestamp broken:' + utils.read_file(path))
def _firmware_resume_time(self):
"""Calculate seconds for firmware resume from logged TSC. (x86 only)"""
if utils.get_arch() not in ['i686', 'x86_64']:
# TODO: support this on ARM somehow
return 0
pattern = r'TSC at resume: (\d+)$'
line = self._log_reader.get_last_msg(pattern)
if line:
freq = 1000 * int(utils.read_one_line(
'/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq'))
return float(re.search(pattern, line).group(1)) / freq
raise error.TestError('Failed to find TSC resume value in syslog.')
def _device_resume_time(self):
"""Read amount of seconds for overall device resume from syslog."""
pattern = r'PM: resume of devices complete after ([0-9.]+)'
line = self._log_reader.get_last_msg(pattern)
if line:
return float(re.search(pattern, line).group(1)) / 1000
raise error.TestError('Failed to find device resume time in syslog.')
def _individual_device_times(self, start_resume):
"""Return dict of individual device suspend and resume times."""
self.device_times.append(dict())
regex = re.compile(r'call ([^ ]+)\+ returned 0 after ([0-9]+) usecs')
for line in self._log_reader.read_all_logs():
match = regex.search(line)
if match:
key = 'seconds_dev_' + match.group(1).replace(':', '-')
secs = float(match.group(2)) / 1e6
if cros_logging.extract_kernel_timestamp(line) > start_resume:
key += '_resume'
else:
key += '_suspend'
if key in self.device_times[-1]:
logging.warn('Duplicate entry for %s: +%f' % (key, secs))
self.device_times[-1][key] += secs
else:
logging.debug('%s: %f' % (key, secs))
self.device_times[-1][key] = secs
def suspend(self, duration=10):
"""
Do a single suspend for 'duration' seconds. Returns None on errors, or
raises the exception when _throw is set. Returns a dict of general
measurements, or a tuple (general_measurements, individual_device_times)
when _device_times is set.
"""
iteration = len(self.failures) + len(self.successes) + 1
self._log_reader.set_start_by_current()
# Remove previous last_run_timings file to prevent races
try:
os.remove('/var/run/power_manager/last_resume_timings')
except OSError:
pass
try:
# set the RTC alarm
alarm_time = int(rtc.get_seconds() + duration)
logging.debug('Trying suspend %d for %d seconds... wakealarm = %d' %
(iteration, duration, alarm_time))
rtc.set_wake_alarm(alarm_time)
if len(rtc.get_rtc_devices()) > 1:
backup_wake = rtc.get_seconds(rtc_device='rtc1') + duration + 30
logging.debug('Priming backup RTC at %d' % backup_wake)
rtc.set_wake_alarm(backup_wake, rtc_device='rtc1')
# do the actual suspend
if self._use_dbus:
# asynch suspend, need to wait a while for it to happen...
sys_power.request_suspend()
# TODO: replace sleep with listening for DBus resume message
time.sleep(3)
else:
sys_power.write_wakeup_count(sys_power.read_wakeup_count())
sys_power.suspend_to_ram()
# look for errors
if os.path.exists('/sys/firmware/log'):
for msg in re.findall(r'^.*ERROR.*$',
utils.read_file('/sys/firmware/log'), re.M):
for pattern in FirmwareError.WHITELIST:
if re.search(pattern, msg):
logging.info('Whitelisted firmware error: ' + msg)
break
else:
raise FirmwareError(msg.strip('\r\n '))
warning_regex = re.compile(r' kernel: \[.*WARNING:')
abort_regex = re.compile(r' kernel: \[.*Freezing of tasks aborted|'
r'powerd_suspend\[.*Aborting suspend, wake event received')
unknown_regex = re.compile(r'powerd_suspend\[\d+\]: Error')
for line in self._log_reader.read_all_logs():
if warning_regex.search(line):
raise KernelError(line)
if abort_regex.search(line):
raise SuspendAbort(line)
if unknown_regex.search(line):
raise SuspendFailure('Unidentified suspend failure.')
# calculate general measurements
start_resume = self._ts('start_resume_time')
kernel_down = (self._ts('end_suspend_time') -
self._ts('start_suspend_time'))
kernel_up = self._ts('end_resume_time') - start_resume
devices_up = self._device_resume_time()
total_up = self._hwclock_ts(alarm_time) - alarm_time
firmware_up = self._firmware_resume_time()
board_up = total_up - kernel_up - firmware_up
try:
cpu_up = self._ts('cpu_ready_time', 0) - start_resume
except error.TestError:
# can be missing on non-SMP machines
cpu_up = None
if total_up > 15:
raise SuspendFailure('Duration too short, backup RTC fired!')
logging.debug('Success(%d): %g down, %g up, %g board, %g firmware, '
'%g kernel, %g cpu, %g devices' %
(iteration, kernel_down, total_up, board_up,
firmware_up, kernel_up, cpu_up, devices_up))
self.successes.append({
'seconds_system_suspend': kernel_down,
'seconds_system_resume': total_up,
'seconds_system_resume_firmware': firmware_up + board_up,
'seconds_system_resume_firmware_cpu': firmware_up,
'seconds_system_resume_firmware_ec': board_up,
'seconds_system_resume_kernel': kernel_up,
'seconds_system_resume_kernel_cpu': cpu_up,
'seconds_system_resume_kernel_dev': devices_up,
})
if hasattr(self, 'device_times'):
self._individual_device_times(start_resume)
return (self.successes[-1], self.device_times[-1])
else:
return self.successes[-1]
except SuspendFailure as ex:
logging.error('%s(%d): %s' % (type(ex).__name__, iteration, ex))
self.failures.append(ex)
if self._throw: raise
return None