blob: 9f2ac850019269903b8bd11903869bd601527964 [file] [log] [blame]
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import logging
import re
import time
import xmlrpclib
from autotest_lib.client.common_lib import error
from autotest_lib.server.cros.faft.firmware_test import FirmwareTest
class firmware_ECThermal(FirmwareTest):
"""
Servo based EC thermal engine test.
"""
version = 1
# Delay for waiting fan to start or stop
FAN_DELAY = 5
# Delay for waiting device stressing to stablize
STRESS_DELAY = 30
# Delay for stressing device with fan off to check temperature increase
STRESS_DELAY_NO_FAN = 12
# Margin for comparing servo based and ectool based CPU temperature
TEMP_MISMATCH_MARGIN = 3
# Minimum increase of CPU temperature when stressing DUT
TEMP_STRESS_INCREASE = 3
# Pseudo INT_MAX. Used as infinity when comparing temperature readings
INT_MAX = 10000
# Sensor type ID of ignored sensors
SENSOR_TYPE_IGNORED = 255
# PID of DUT stressing processes
_stress_pid = list()
def enable_auto_fan_control(self):
"""Enable EC automatic fan speed control"""
# We use set_nocheck because servo reports current target
# RPM instead 'auto', and therefore servo.set always fails.
self.servo.set_nocheck('fan_target_rpm', 'auto')
def max_fan(self):
"""Maximize fan speed"""
# We use set_nocheck because servo reports current target
# RPM instead 'max', and therefore servo.set always fails.
self.servo.set_nocheck('fan_target_rpm', 'max')
def turn_off_fan(self):
"""Turn off fan"""
self.servo.set('fan_target_rpm', 'off')
def _get_setting_for_type(self, type_id):
"""
Retrieve thermal setting for a given type of sensor
Args:
type_id: The ID of sensor type.
Returns:
A list containing thresholds in the following order:
Warning
CPU off
All power off
Fan speed thresholds
"""
setting = list()
current_id = 0
while True:
try:
lines = self.faft_client.system.run_shell_command_get_output(
'ectool thermalget %d %d' % (type_id, current_id))
except xmlrpclib.Fault:
break
pattern = re.compile('Threshold \d* [a-z ]* \d* is (\d*) K.')
for line in lines:
matched = pattern.match(line)
if matched is not None:
# Convert degree K to degree C
setting.append(int(matched.group(1)) - 273)
current_id = current_id + 1
if len(setting) == 0:
return None
return setting
def get_fan_steps(self):
"""Retrieve fan step config from EC"""
num_steps = len(self._thermal_setting[0]) - 3
self._fan_steps = list()
expected_pat = (["Lowest speed: ([0-9-]+) RPM"] +
["\d+ K:\s+([0-9-]+) RPM"] * num_steps)
match = self.ec.send_command_get_output("thermalfan 0", expected_pat)
for m in match:
self._fan_steps.append(int(m[1]))
# Get the actual value of each fan step
for i in xrange(num_steps + 1):
if self._fan_steps[i] == 0:
continue
self.servo.set_nocheck('fan_target_rpm', "%d" % self._fan_steps[i])
self._fan_steps[i] = int(self.servo.get('fan_target_rpm'))
logging.info("Actual fan steps: %s", self._fan_steps)
def get_thermal_setting(self):
"""Retrieve thermal engine setting from EC"""
self._thermal_setting = list()
type_id = 0
while True:
setting = self._get_setting_for_type(type_id)
if setting is None:
break
self._thermal_setting.append(setting)
type_id = type_id + 1
logging.info("Number of tempearture sensor types: %d", type_id)
# Get the number of temperature sensors
self._num_temp_sensor = 0
while True:
try:
self.faft_client.system.run_shell_command('ectool temps %d' %
self._num_temp_sensor)
self._num_temp_sensor = self._num_temp_sensor + 1
except xmlrpclib.Fault:
break
logging.info("Number of temperature sensor: %d", self._num_temp_sensor)
def initialize(self, host, cmdline_args):
super(firmware_ECThermal, self).initialize(host, cmdline_args)
self.ec.send_command("chan 0")
try:
self.faft_client.system.run_shell_command('stop temp_metrics')
except xmlrpclib.Fault:
self._has_temp_metrics = False
else:
logging.info('Stopped temp_metrics')
self._has_temp_metrics = True
if self.check_ec_capability(['thermal']):
self.get_thermal_setting()
self.get_fan_steps()
self.enable_auto_fan_control()
def cleanup(self):
if self.check_ec_capability(['thermal']):
self.enable_auto_fan_control()
if self._has_temp_metrics:
logging.info('Starting temp_metrics')
self.faft_client.system.run_shell_command('start temp_metrics')
self.ec.send_command("chan 0xffffffff")
super(firmware_ECThermal, self).cleanup()
def _find_cpu_sensor_id(self):
"""
This function find CPU temperature sensor using ectool.
Returns:
Integer ID of CPU temperature sensor.
Raises:
error.TestFail: Raised if we fail to find PECI temparture through
ectool.
"""
for temp_id in range(self._num_temp_sensor):
lines = self.faft_client.system.run_shell_command_get_output(
'ectool tempsinfo %d' % temp_id)
for line in lines:
matched = re.match('Sensor name: (.*)', line)
if matched is not None and matched.group(1) == 'PECI':
return temp_id
raise error.TestFail('Cannot find CPU temperature sensor ID.')
def _get_temp_reading(self, sensor_id):
"""
Get temperature reading on a sensor through ectool
Args:
sensor_id: Temperature sensor ID.
Returns:
Temperature reading in degree C.
Raises:
xmlrpclib.Fault: Raised when we fail to read temperature.
error.TestError: Raised if ectool doesn't behave as we expected.
"""
assert sensor_id < self._num_temp_sensor
pattern = re.compile('Reading temperature...(\d*)')
lines = self.faft_client.system.run_shell_command_get_output(
'ectool temps %d' % sensor_id)
for line in lines:
matched = pattern.match(line)
if matched is not None:
return int(matched.group(1)) - 273
# Should never reach here
raise error.TestError("Unexpected error occurred")
def check_temp_report(self):
"""
Checker of temperature reporting.
This function reads CPU temperature from servo and ectool. If
the two readings mismatches by more than TEMP_MISMATCH_MARGIN,'
test fails.
Raises:
error.TestFail: Raised when temperature reading mismatches by
more than TEMP_MISMATCH_MARGIN.
"""
cpu_temp_id = self._find_cpu_sensor_id()
logging.info("CPU temperature sensor ID is %d", cpu_temp_id)
ectool_cpu_temp = self._get_temp_reading(cpu_temp_id)
servo_cpu_temp = int(self.servo.get('cpu_temp'))
logging.info("CPU temperature from servo: %d C", servo_cpu_temp)
logging.info("CPU temperature from ectool: %d C", ectool_cpu_temp)
if abs(ectool_cpu_temp - servo_cpu_temp) > self.TEMP_MISMATCH_MARGIN:
raise error.TestFail(
'CPU temperature readings from servo and ectool differ')
def _stress_dut(self, threads=4):
"""
Stress DUT system.
By reading from /dev/urandom and writing to /dev/null, we can stress
DUT and cause CPU temperature to go up. We stress the system forever,
until _stop_stressing is called to kill the stress threads. This
function is non-blocking.
Args:
threads: Number of threads (processes) when stressing forever.
Returns:
A list of stress process IDs is returned.
"""
logging.info("Stressing DUT with %d threads...", threads)
self.faft_client.system.run_shell_command('pkill dd')
stress_cmd = 'dd if=/dev/urandom of=/dev/null bs=1M &'
# Grep for [d]d instead of dd to prevent getting the PID of grep
# itself.
pid_cmd = "ps -ef | grep '[d]d if=/dev/urandom' | awk '{print $2}'"
self._stress_pid = list()
for _ in xrange(threads):
self.faft_client.system.run_shell_command(stress_cmd)
lines = self.faft_client.system.run_shell_command_get_output(
pid_cmd)
for line in lines:
logging.info("PID is %s", line)
self._stress_pid.append(int(line.strip()))
return self._stress_pid
def _stop_stressing(self):
"""Stop stressing DUT system"""
stop_cmd = 'kill -9 %d'
for pid in self._stress_pid:
self.faft_client.system.run_shell_command(stop_cmd % pid)
def check_fan_off(self):
"""
Checker of fan turned off.
The function first delay FAN_DELAY seconds to ensure fan stops.
Then it reads fan speed and return False if fan speed is non-zero.
Then it stresses the system a bit and check if the temperature
goes up by more than TEMP_STRESS_INCREASE.
Raises:
error.TestFail: Raised when temperature doesn't increase by more than
TEMP_STRESS_INCREASE.
"""
time.sleep(self.FAN_DELAY)
fan_speed = self.servo.get('fan_actual_rpm')
if int(fan_speed) != 0:
raise error.TestFail("Fan is not turned off.")
logging.info("EC reports fan turned off.")
cpu_temp_before = int(self.servo.get('cpu_temp'))
logging.info("CPU temperature before stressing is %d C",
cpu_temp_before)
self._stress_dut()
time.sleep(self.STRESS_DELAY_NO_FAN)
cpu_temp_after = int(self.servo.get('cpu_temp'))
self._stop_stressing()
logging.info("CPU temperature after stressing is %d C",
cpu_temp_after)
if cpu_temp_after - cpu_temp_before < self.TEMP_STRESS_INCREASE:
raise error.TestFail(
"CPU temperature did not go up by more than %d degrees" %
self.TEMP_STRESS_INCREASE)
def _get_temp_sensor_type(self, sensor_id):
"""
Get type of a given temperature sensor
Args:
sensor_id: Temperature sensor ID.
Returns:
Type ID of the temperature sensor.
Raises:
error.TestError: Raised when ectool doesn't behave as we expected.
"""
assert sensor_id < self._num_temp_sensor
pattern = re.compile('Sensor type: (\d*)')
lines = self.faft_client.system.run_shell_command_get_output(
'ectool tempsinfo %d' % sensor_id)
for line in lines:
matched = pattern.match(line)
if matched is not None:
return int(matched.group(1))
# Should never reach here
raise error.TestError("Unexpected error occurred")
def _check_fan_speed_per_sensor(self, fan_speed, sensor_id):
"""
Check if the given fan_speed is reasonable from the view of certain
temperature sensor. There could be three types of outcome:
1. Fan speed is higher than expected. This may be due to other
sensor sensing higher temperature and setting fan to higher
speed.
2. Fan speed is as expected.
3. Fan speed is lower than expected. In this case, EC is not
working as expected and an error should be raised.
Args:
fan_speed: The current fan speed in RPM.
sensor_id: The ID of temperature sensor.
Returns:
0x00: Fan speed is higher than expected.
0x01: Fan speed is as expected.
0x10: Fan speed is lower than expected.
Raises:
error.TestError: Raised when getting unexpected fan speed.
"""
sensor_type = self._get_temp_sensor_type(sensor_id)
if sensor_type == self.SENSOR_TYPE_IGNORED:
# This sensor should be ignored
return 0x00
if self._thermal_setting[sensor_type][-1] == -273:
# The fan stepping for this type of sensor is disabled
return 0x00
try:
idx = self._fan_steps.index(fan_speed)
except:
raise error.TestError("Unexpected fan speed: %d" % fan_speed)
if idx == 0:
lower_bound = -self.INT_MAX
upper_bound = self._thermal_setting[sensor_type][3]
elif idx == len(self._fan_steps) - 1:
lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3
upper_bound = self.INT_MAX
else:
lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3
upper_bound = self._thermal_setting[sensor_type][idx + 3]
temp_reading = self._get_temp_reading(sensor_id)
logging.info("Sensor %d = %d C", sensor_id, temp_reading)
logging.info(" Expecting %d - %d C", lower_bound, upper_bound)
if temp_reading > upper_bound:
return 0x00
elif temp_reading < lower_bound:
return 0x10
else:
return 0x01
def check_auto_fan(self):
"""
Checker of thermal engine automatic fan speed control.
Stress DUT system for a longer period to make temperature more stable
and check if fan speed is controlled as expected.
Raises:
error.TestFail: Raised when fan speed is not as expected.
"""
self._stress_dut()
time.sleep(self.STRESS_DELAY)
fan_rpm = int(self.servo.get('fan_target_rpm'))
logging.info('Fan speed is %d RPM', fan_rpm)
try:
result = reduce(lambda x, y: x | y,
[self._check_fan_speed_per_sensor(fan_rpm, x)
for x in range(self._num_temp_sensor)])
finally:
self._stop_stressing()
if result == 0x00:
raise error.TestFail("Fan speed higher than expected")
if result == 0x10:
raise error.TestFail("Fan speed lower than expected")
def run_once(self):
if not self.check_ec_capability(['thermal']):
raise error.TestNAError("Nothing needs to be tested on this device")
logging.info("Checking host temperature report.")
self.check_temp_report()
self.turn_off_fan()
logging.info("Verifying fan is turned off.")
self.check_fan_off()
self.enable_auto_fan_control()
logging.info("Verifying automatic fan control functionality.")
self.check_auto_fan()