blob: d664bee125b285b171b62c94c328d2b311fbf207 [file] [log] [blame]
#!/bin/sh
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
. /usr/share/misc/shflags
DEFINE_integer count 10000 "number of iterations" c
DEFINE_integer memory_check_size 0 \
"Amount of memory to allocate (0 means as much as possible)"
DEFINE_integer suspend_max 10 "Max seconds to suspend"
DEFINE_integer suspend_min 5 "Min seconds to suspend"
DEFINE_integer wake_max 10 "Max seconds to stay awake for"
DEFINE_integer wake_min 5 "Min seconds to stay awake for"
DEFINE_integer wake_early_margin 0 "The allowable margin to wake early"
DEFINE_integer wake_worst_case 30 "The worst case time to wake"
DEFINE_boolean backup_rtc ${FLAGS_FALSE} "Use second rtc if present for backup"
DEFINE_boolean bug_fatal ${FLAGS_TRUE} "Abort on BUG dmesg lines"
DEFINE_boolean crc_fatal ${FLAGS_TRUE} "Abort on CRC error dmesg lines"
DEFINE_boolean warn_fatal ${FLAGS_FALSE} "Abort on WARNING dmesg lines"
DEFINE_boolean errors_fatal ${FLAGS_TRUE} "Abort on errors"
DEFINE_boolean fw_errors_fatal ${FLAGS_TRUE} \
"Abort on firmware errors (subset of --errors_fatal)"
DEFINE_boolean memory_check ${FLAGS_FALSE} "Use memory_suspend_test to suspend"
DEFINE_boolean pm_print_times ${FLAGS_TRUE} \
"Print the time taken by devices to suspend and resume"
DEFINE_string ignore_wakeup_source "none" "Wakeup source to ignore" i
DEFINE_string record_dmesg_dir "" "dir to record dmesgs for failed iterations"
DEFINE_string post_resume_command "" "Command to run after each resume"
# Record the dmesg for the last iteration.
# Accepts iteration number as an argument (to name the file dmesg_<iter_num>)
record_dmesg_for_iteration() {
local lines=0
local filename=${FLAGS_record_dmesg_dir}/dmesg_$1
echo -n " (dmesg > ${filename})..."
lines=$(dmesg | tac | grep -m1 -n "PM: Syncing filesystems" \
| cut -f1 -d:)
dmesg | tail -${lines} > ${filename}
}
get_success_count() {
awk '$1 == "success:" { print $2 }' /sys/kernel/debug/suspend_stats
}
get_ec_resume_result() {
local ec_result_path=/sys/kernel/debug/cros_ec/last_resume_result
local result=0
[ -e "${ec_result_path}" ] && result=$(cat "${ec_result_path}")
printf "%s" "${result}"
}
# Return the current event count for the wakeup source.
get_wakeup_source_event_count() {
local wakeup_source=$1
local event_count=0
if [ "${wakeup_source}" -a "none" != "${wakeup_source}" ]; then
# Get the event count field for the wakeup source.
event_count=$(awk '$1 == "'"${wakeup_source}"'" { print $3 }' \
/sys/kernel/debug/wakeup_sources)
if [ -z "$event_count" ]; then
event_count=0
fi
fi
echo "$event_count"
}
random() {
hexdump -n 2 -e '/2 "%u"' /dev/urandom
}
boolean_value() {
if [ $1 -eq ${FLAGS_TRUE} ]; then
echo "true"
else
echo "false"
fi
}
dump_stats_and_exit() {
echo "${preserved_pm_print_times}" > /sys/power/pm_print_times
start tlsdated
echo ""
echo "Finished ${cur} iterations."
echo "Premature wakes: ${premature_wake_count}"
echo "Late wakes: ${late_wake_count}"
echo "Suspend failures: $(( cur -
($(get_success_count) - initial_successes) ))"
echo "Wakealarm errors: ${wakealarm_errors}"
echo "Firmware log errors: ${firmware_errors}"
if [ "${check_s0ix_errors}" = 1 ]; then
echo "s0ix errors: ${s0ix_errors}"
elif
[ "${check_s2idle_errors}" = 1 ]; then
echo "s2idle errors: ${s2idle_errors}"
fi
exit 0
}
has_s2idle() {
if ls /sys/devices/system/cpu/cpu[0-9]*/cpuidle/state[0-9]/s2idle \
> /dev/null 2>&1; then
return 0
else
return 1
fi
}
# Returns the S0ix residency counter as returned by the kernel.
# The driver that provides these counters depends on the platform.
get_s0ix_count() {
if [ -e /sys/kernel/debug/pmc_core/slp_s0_residency_usec ]; then
# Kabylake / Skylake systems.
cat /sys/kernel/debug/pmc_core/slp_s0_residency_usec
elif [ -e /sys/kernel/debug/telemetry/s0ix_residency_usec ]; then
# Apollolake Systems.
cat /sys/kernel/debug/telemetry/s0ix_residency_usec
else
echo 0
fi
}
# Returns the sum of s2idle residency counters as returned by the kernel.
get_s2idle_count() {
if has_s2idle; then
cat /sys/devices/system/cpu/cpu[0-9]*/cpuidle/state[0-9]/s2idle/time \
| paste -sd+ | bc
else
echo 0
fi
}
get_firmware_error_counts() {
echo $(cbmem -1 | grep ERROR | wc -l)
}
FLAGS "$@" || exit 1
# Some environment variables (USER/EUID) are not set in the factory, so use
# 'id' to check if running as root instead.
if [ "$(id -u)" -ne 0 ]; then
echo "This script must be run as root." 1>&2
exit 1
fi
if [ ${FLAGS_backup_rtc} -eq ${FLAGS_TRUE} ] &&
[ ! -e /sys/class/rtc/rtc1/wakealarm ]; then
echo "rtc1 not present, not setting second wakealarm"
FLAGS_backup_rtc=${FLAGS_FALSE}
fi
if [ ${FLAGS_memory_check} -eq ${FLAGS_TRUE} ]; then
# Default size is set to 0, which tells memory_suspend_test to allocate as
# much memory as possible.
suspend_cmd="memory_suspend_test --size=${FLAGS_memory_check_size}"
else
suspend_cmd="powerd_dbus_suspend --delay=0"
fi
if [ -n "${FLAGS_record_dmesg_dir}" ]; then
mkdir -p ${FLAGS_record_dmesg_dir}
touch ${FLAGS_record_dmesg_dir}/dmesg_test
if [ $? -ne 0 ]; then
echo "${FLAGS_record_dmesg_dir} not writable for recording dmesg(s)"
exit 1
fi
rm -rf ${FLAGS_record_dmesg_dir}/dmesg_*
fi
check_s0ix_errors=0
check_s2idle_errors=0
if check_powerd_config --suspend_to_idle; then
if [ -e /sys/kernel/debug/pmc_core/slp_s0_residency_usec ] || \
[ -e /sys/kernel/debug/telemetry/s0ix_residency_usec ]; then
check_s0ix_errors=1
elif has_s2idle; then
check_s2idle_errors=1
else
echo "Warning! platform uses s0ix/s2idle, but we have no way to verify s0ix/s2idle"
fi
fi
echo "Running ${FLAGS_count} iterations with:"
echo " suspend: ${FLAGS_suspend_min}-${FLAGS_suspend_max} seconds"
echo " wake: ${FLAGS_wake_min}-${FLAGS_wake_max} seconds"
echo " backup_rtc: $(boolean_value ${FLAGS_backup_rtc})"
echo " errors_fatal: $(boolean_value ${FLAGS_errors_fatal})"
echo " fw_errors_fatal: $(boolean_value ${FLAGS_fw_errors_fatal})"
echo " bugs fatal: $(boolean_value ${FLAGS_bug_fatal})"
echo " warnings fatal: $(boolean_value ${FLAGS_warn_fatal})"
echo " crcs fatal: $(boolean_value ${FLAGS_crc_fatal})"
echo " suspend command: ${suspend_cmd}"
echo " wakeup source to ignore: ${FLAGS_ignore_wakeup_source}"
echo " record_dmesg_dir: ${FLAGS_record_dmesg_dir}"
echo " pm_print_times: $(boolean_value ${FLAGS_pm_print_times})"
initial_successes=$(get_success_count)
suspend_interval=$(( FLAGS_suspend_max - FLAGS_suspend_min + 1 ))
wake_interval=$(( FLAGS_wake_max - FLAGS_wake_min + 1 ))
preserved_pm_print_times=$(cat /sys/power/pm_print_times)
if [ ${FLAGS_pm_print_times} -eq ${FLAGS_TRUE} ]; then
echo 1 > /sys/power/pm_print_times
else
echo 0 > /sys/power/pm_print_times
fi
trap dump_stats_and_exit INT
# Turn off network sync of time to prevent any spurious RTC wake events
stop tlsdated
cur=0
wakealarm_errors=0
firmware_errors=0
s0ix_errors=0
s2idle_errors=0
last_successes=${initial_successes}
exit_loop=0
cur_ignore_count=0
last_ignore_count=0
premature_wake_count=0
late_wake_count=0
last_firmware_errors=$(get_firmware_error_counts)
while true; do
: $(( cur += 1 ))
printf "Suspend %5d of ${FLAGS_count}: " "${cur}"
# Flush file system buffer before setting wake alarm to make it
# less likely that the kernel's sys_sync() call takes so long
# that the alarm fires early.
sync
sus_time=$(( ( $(random) % suspend_interval ) + FLAGS_suspend_min ))
printf "sleep for %2d seconds..." "${sus_time}"
last_s0ix_count=$(get_s0ix_count)
last_s2idle_count=$(get_s2idle_count)
last_ignore_count=$(get_wakeup_source_event_count \
${FLAGS_ignore_wakeup_source})
wakeup_count=$(cat /sys/power/wakeup_count)
# TODO (harrypan): How to deal with the backup_rtc?
if [ ${FLAGS_backup_rtc} -eq ${FLAGS_TRUE} ]; then
echo 0 > /sys/class/rtc/rtc1/wakealarm
echo "+$(( sus_time + 5 ))" > /sys/class/rtc/rtc1/wakealarm
fi
start_time="$(cat /sys/class/rtc/rtc0/since_epoch)"
${suspend_cmd} --wakeup_count=${wakeup_count} --suspend_for_sec=${sus_time}
if [ -n "${FLAGS_post_resume_command}" ]; then
# Leading space because this will appear on the same line as the
# printf output from above.
echo " running ${FLAGS_post_resume_command}"
${FLAGS_post_resume_command}
fi
end_time="$(cat /sys/class/rtc/rtc0/since_epoch)"
actual_sus_time=$(( end_time - start_time ))
diff_sus_time=$(( actual_sus_time - sus_time ))
sus_time_str="slept for ${actual_sus_time}s, expected ${sus_time}s"
if [ ${diff_sus_time} -lt -${FLAGS_wake_early_margin} ]; then
: $(( premature_wake_count += 1 ))
echo "Premature wake detected: ${sus_time_str} (ignoring)."
fi
if [ ${diff_sus_time} -gt ${FLAGS_wake_worst_case} ]; then
: $(( late_wake_count += 1 ))
echo "Late wake detected: ${sus_time_str} (ignoring)."
fi
# Wakealarm is expected to be cleared after the set wakeup time,
# which is the resume time if the device does not resume due to
# other sources. Although we always clear the wakealarm file before
# setting it, we still report this error if it is not cleared.
main_wakealarm="$(cat /sys/class/rtc/rtc0/wakealarm)"
if [ -n "${main_wakealarm}" ] && [ ${main_wakealarm} -lt ${end_time} ]; then
: $(( wakealarm_errors += 1 ))
echo "Wakealarm is not cleared (ignoring)."
fi
if [ ${FLAGS_backup_rtc} -eq ${FLAGS_TRUE} ]; then
backup_wakealarm="$(cat /sys/class/rtc/rtc1/wakealarm)"
if [ -n "${backup_wakealarm}" ] &&
[ ${backup_wakealarm} -lt ${end_time} ]; then
: $(( wakealarm_errors += 1 ))
echo "Backup wakealarm is not cleared (ignoring)."
fi
fi
# Look for errors in firmware log.
cur_firmware_errors=$(get_firmware_error_counts)
number_of_new_errors=$(( ${cur_firmware_errors} - ${last_firmware_errors} ))
if [ ${number_of_new_errors} -gt 0 ]; then
: $(( firmware_errors += 1 ))
cbmem -1 | grep ERROR | tail -n ${number_of_new_errors}
if [ ${FLAGS_errors_fatal} -eq ${FLAGS_TRUE} -a \
${FLAGS_fw_errors_fatal} -eq ${FLAGS_TRUE} ]; then
exit_loop=1
echo "Firmware error found."
else
echo "Firmware error found (ignoring)."
fi
fi
last_firmware_errors=${cur_firmware_errors}
# Make sure suspend succeeded
cur_successes=$(get_success_count)
cur_ignore_count=$(get_wakeup_source_event_count \
${FLAGS_ignore_wakeup_source})
cur_s0ix_count=$(get_s0ix_count)
cur_s2idle_count=$(get_s2idle_count)
if [ ${cur_successes} -eq ${last_successes} -a \
${cur_ignore_count} -eq ${last_ignore_count} ]; then
if [ ${FLAGS_errors_fatal} -eq ${FLAGS_TRUE} ]; then
echo "Suspend failed."
exit_loop=1
else
if [ -n "${FLAGS_record_dmesg_dir}" ]; then
record_dmesg_for_iteration ${cur}
fi
printf "(suspend failed, ignoring)"
fi
elif [ "${check_s0ix_errors}" = 1 ] && \
[ "${cur_s0ix_count}" = "${last_s0ix_count}" ]; then
echo "Suspend failed, s0ix count did not increment from ${last_s0ix_count}"
: $(( s0ix_errors += 1 ))
if [ ${FLAGS_errors_fatal} -eq ${FLAGS_TRUE} ]; then
exit_loop=1
elif [ -n "${FLAGS_record_dmesg_dir}" ]; then
record_dmesg_for_iteration ${cur}
fi
elif [ "${check_s2idle_errors}" = 1 ] && \
[ "${cur_s2idle_count}" = "${last_s2idle_count}" ]; then
echo "Suspend failed, s2idle count did not increment from ${last_s2idle_count}"
: $(( s2idle_errors += 1 ))
if [ ${FLAGS_errors_fatal} -eq ${FLAGS_TRUE} ]; then
exit_loop=1
elif [ -n "${FLAGS_record_dmesg_dir}" ]; then
record_dmesg_for_iteration ${cur}
fi
elif [ "$(($(get_ec_resume_result) & 0x80000000))" -ne 0 ]; then
echo "EC reported S0ix failure: $(get_ec_resume_result)"
: $(( s0ix_errors += 1 ))
if [ ${FLAGS_errors_fatal} -eq ${FLAGS_TRUE} ]; then
exit_loop=1
elif [ -n "${FLAGS_record_dmesg_dir}" ]; then
record_dmesg_for_iteration ${cur}
fi
fi
last_successes=${cur_successes}
# For BUG and CRC errors counting existing occurrences in dmesg
# is not that useful as dmesg will wrap so we would need to account
# for the count shrinking over time.
# Exit on BUG
if [ ${FLAGS_bug_fatal} -eq ${FLAGS_TRUE} ] &&
dmesg | grep -w BUG; then
echo "BUG found."
exit_loop=1
fi
# Exit on WARNING
if [ ${FLAGS_warn_fatal} -eq ${FLAGS_TRUE} ] &&
dmesg | grep -w WARNING; then
echo "WARNING found."
exit_loop=1
fi
# Exit on CRC error
if [ ${FLAGS_crc_fatal} -eq ${FLAGS_TRUE} ] && dmesg | grep "CRC.*error"; then
echo "CRC error found."
exit_loop=1
fi
# Exit the loop if requested from errors or done with iterations
if [ ${cur} -eq ${FLAGS_count} ] || [ ${exit_loop} -eq 1 ]; then
if [ ${exit_loop} -eq 1 -a -n "${FLAGS_record_dmesg_dir}" ]; then
record_dmesg_for_iteration ${cur}
fi
echo ""
break
fi
wake_time=$(( ( $(random) % wake_interval ) + FLAGS_wake_min ))
printf " wake for %2d seconds..." "${wake_time}"
sleep "${wake_time}"
echo ""
done
dump_stats_and_exit