blob: 3ba5f943ea3a50fa1ab58c83dc8680cb83d84a10 [file] [log] [blame]
# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# Expects to be run in an environment with sudo and no interactive password
# prompt, such as within the Chromium OS development chroot.
"""This file provides core logic for servo verify/repair process."""
import logging
import os
import time
import traceback
import xmlrpclib
from autotest_lib.client.bin import utils
from autotest_lib.client.common_lib import error
from autotest_lib.client.common_lib import global_config
from autotest_lib.client.common_lib import hosts
from autotest_lib.client.common_lib.cros import retry
from autotest_lib.client.common_lib.cros.network import ping_runner
from autotest_lib.server.cros.servo import servo
from autotest_lib.server.hosts import servo_repair
from autotest_lib.server.hosts import base_servohost
# Names of the host attributes in the database that represent the values for
# the servo_host and servo_port for a servo connected to the DUT.
SERVO_HOST_ATTR = 'servo_host'
SERVO_PORT_ATTR = 'servo_port'
SERVO_BOARD_ATTR = 'servo_board'
# Model is inferred from host labels.
SERVO_MODEL_ATTR = 'servo_model'
SERVO_SERIAL_ATTR = 'servo_serial'
SERVO_ATTR_KEYS = (
SERVO_BOARD_ATTR,
SERVO_HOST_ATTR,
SERVO_PORT_ATTR,
SERVO_SERIAL_ATTR,
)
# Timeout value for stop/start servod process.
SERVOD_TEARDOWN_TIMEOUT = 3
SERVOD_QUICK_STARTUP_TIMEOUT = 20
SERVOD_STARTUP_TIMEOUT = 60
_CONFIG = global_config.global_config
ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
AUTOTEST_BASE = _CONFIG.get_config_value(
'SCHEDULER', 'drone_installation_directory',
default='/usr/local/autotest')
class ServoHost(base_servohost.BaseServoHost):
"""Host class for a servo host(e.g. beaglebone, labstation)
that with a servo instance for a specific port.
@type _servo: servo.Servo | None
"""
DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
# Timeout for initializing servo signals.
INITIALIZE_SERVO_TIMEOUT_SECS = 60
# Ready test function
SERVO_READY_METHOD = 'get_version'
def _initialize(self, servo_host='localhost',
servo_port=DEFAULT_PORT, servo_board=None,
servo_model=None, servo_serial=None, is_in_lab=None,
*args, **dargs):
"""Initialize a ServoHost instance.
A ServoHost instance represents a host that controls a servo.
@param servo_host: Name of the host where the servod process
is running.
@param servo_port: Port the servod process is listening on. Defaults
to the SERVOD_PORT environment variable if set,
otherwise 9999.
@param servo_board: Board that the servo is connected to.
@param servo_model: Model that the servo is connected to.
@param is_in_lab: True if the servo host is in Cros Lab. Default is set
to None, for which utils.host_is_in_lab_zone will be
called to check if the servo host is in Cros lab.
"""
super(ServoHost, self)._initialize(hostname=servo_host,
is_in_lab=is_in_lab, *args, **dargs)
self.servo_port = int(servo_port)
self.servo_board = servo_board
self.servo_model = servo_model
self.servo_serial = servo_serial
self._servo = None
self._servod_server_proxy = None
# Path of the servo host lock file.
self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port)
+ self.LOCK_FILE_POSTFIX)
# File path to declare a reboot request.
self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port)
+ self.REBOOT_FILE_POSTFIX)
# Lock the servo host if it's an in-lab labstation to prevent other
# task to reboot it until current task completes. We also wait and
# make sure the labstation is up here, in the case of the labstation is
# in the middle of reboot.
self._is_locked = False
if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
and self.is_labstation()):
self._lock()
self._repair_strategy = (
servo_repair.create_servo_repair_strategy())
def connect_servo(self):
"""Establish a connection to the servod server on this host.
Initializes `self._servo` and then verifies that all network
connections are working. This will create an ssh tunnel if
it's required.
As a side effect of testing the connection, all signals on the
target servo are reset to default values, and the USB stick is
set to the neutral (off) position.
"""
servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
self._servo = servo_obj
timeout, _ = retry.timeout(
servo_obj.initialize_dut,
timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
if timeout:
raise hosts.AutoservVerifyError(
'Servo initialize timed out.')
def disconnect_servo(self):
"""Disconnect our servo if it exists.
If we've previously successfully connected to our servo,
disconnect any established ssh tunnel, and set `self._servo`
back to `None`.
"""
if self._servo:
# N.B. This call is safe even without a tunnel:
# rpc_server_tracker.disconnect() silently ignores
# unknown ports.
self.rpc_server_tracker.disconnect(self.servo_port)
self._servo = None
def _create_servod_server_proxy(self):
"""Create a proxy that can be used to communicate with servod server.
@returns: An xmlrpclib.ServerProxy that is connected to the servod
server on the host.
"""
if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
return self.rpc_server_tracker.xmlrpc_connect(
None, self.servo_port,
ready_test_name=self.SERVO_READY_METHOD,
timeout_seconds=60,
request_timeout_seconds=3600)
else:
remote = 'http://%s:%s' % (self.hostname, self.servo_port)
return xmlrpclib.ServerProxy(remote)
def get_servod_server_proxy(self):
"""Return a cached proxy if exists; otherwise, create a new one.
@returns: An xmlrpclib.ServerProxy that is connected to the servod
server on the host.
"""
# Single-threaded execution, no race
if self._servod_server_proxy is None:
self._servod_server_proxy = self._create_servod_server_proxy()
return self._servod_server_proxy
def verify(self, silent=False):
"""Update the servo host and verify it's in a good state.
@param silent If true, suppress logging in `status.log`.
"""
message = 'Beginning verify for servo host %s port %s serial %s'
message %= (self.hostname, self.servo_port, self.servo_serial)
self.record('INFO', None, None, message)
try:
self._repair_strategy.verify(self, silent)
except:
self.disconnect_servo()
self.stop_servod()
raise
def repair(self, silent=False):
"""Attempt to repair servo host.
@param silent If true, suppress logging in `status.log`.
"""
message = 'Beginning repair for servo host %s port %s serial %s'
message %= (self.hostname, self.servo_port, self.servo_serial)
self.record('INFO', None, None, message)
try:
self._repair_strategy.repair(self, silent)
# If target is a labstation then try to withdraw any existing
# reboot request created by this servo because it passed repair.
if self.is_labstation():
self.withdraw_reboot_request()
except:
self.disconnect_servo()
self.stop_servod()
raise
def get_servo(self):
"""Get the cached servo.Servo object.
@return: a servo.Servo object.
@rtype: autotest_lib.server.cros.servo.servo.Servo
"""
return self._servo
def request_reboot(self):
"""Request servohost to be rebooted when it's safe to by touch a file.
"""
logging.debug('Request to reboot servohost %s has been created by '
'servo with port # %s', self.hostname, self.servo_port)
self.run('touch %s' % self._reboot_file, ignore_status=True)
def withdraw_reboot_request(self):
"""Withdraw a servohost reboot request if exists by remove the flag
file.
"""
logging.debug('Withdrawing request to reboot servohost %s that created'
' by servo with port # %s if exists.',
self.hostname, self.servo_port)
self.run('rm -f %s' % self._reboot_file, ignore_status=True)
def start_servod(self, quick_startup=False):
"""Start the servod process on servohost.
"""
# Skip if running on the localhost.(crbug.com/1038168)
if self.is_localhost():
logging.debug("Servohost is a localhost, skipping start servod.")
return
cmd = 'start servod'
if self.servo_board:
cmd += ' BOARD=%s' % self.servo_board
if self.servo_model:
cmd += ' MODEL=%s' % self.servo_model
else:
logging.warning('Board for DUT is unknown; starting servod'
' assuming a pre-configured board.')
cmd += ' PORT=%d' % self.servo_port
if self.servo_serial:
cmd += ' SERIAL=%s' % self.servo_serial
self.run(cmd, timeout=60)
# There's a lag between when `start servod` completes and when
# the _ServodConnectionVerifier trigger can actually succeed.
# The call to time.sleep() below gives time to make sure that
# the trigger won't fail after we return.
# Normally servod on servo_v3 and labstation take ~10 seconds to ready,
# But in the rare case all servo on a labstation are in heavy use they
# may take ~30 seconds. So the timeout value will double these value,
# and we'll try quick start up when first time initialize servohost,
# and use standard start up timeout in repair.
if quick_startup:
timeout = SERVOD_QUICK_STARTUP_TIMEOUT
else:
timeout = SERVOD_STARTUP_TIMEOUT
logging.debug('Wait %s seconds for servod process fully up.', timeout)
time.sleep(timeout)
def stop_servod(self):
"""Stop the servod process on servohost.
"""
# Skip if running on the localhost.(crbug.com/1038168)
if self.is_localhost():
logging.debug("Servohost is a localhost, skipping stop servod.")
return
logging.debug('Stopping servod on port %s', self.servo_port)
self.run('stop servod PORT=%d' % self.servo_port,
timeout=60, ignore_status=True)
logging.debug('Wait %s seconds for servod process fully teardown.',
SERVOD_TEARDOWN_TIMEOUT)
time.sleep(SERVOD_TEARDOWN_TIMEOUT)
def restart_servod(self, quick_startup=False):
"""Restart the servod process on servohost.
"""
self.stop_servod()
self.start_servod(quick_startup)
def _lock(self):
"""lock servohost by touching a file.
"""
logging.debug('Locking servohost %s by touching %s file',
self.hostname, self._lock_file)
self.run('touch %s' % self._lock_file, ignore_status=True)
self._is_locked = True
def _unlock(self):
"""Unlock servohost by removing the lock file.
"""
logging.debug('Unlocking servohost by removing %s file',
self._lock_file)
self.run('rm %s' % self._lock_file, ignore_status=True)
self._is_locked = False
def close(self):
"""Close the associated servo and the host object."""
if self._servo:
# In some cases when we run as lab-tools, the job object is None.
if self.job and not self._servo.uart_logs_dir:
self._servo.uart_logs_dir = self.job.resultdir
self._servo.close()
if self._is_locked:
# Remove the lock if the servohost has been locked.
try:
self._unlock()
except error.AutoservSSHTimeout:
logging.error('Unlock servohost failed due to ssh timeout.'
' It may caused by servohost went down during'
' the task.')
# We want always stop servod after task to minimum the impact of bad
# servod process interfere other servods.(see crbug.com/1028665)
self.stop_servod()
super(ServoHost, self).close()
def make_servo_hostname(dut_hostname):
"""Given a DUT's hostname, return the hostname of its servo.
@param dut_hostname: hostname of a DUT.
@return hostname of the DUT's servo.
"""
host_parts = dut_hostname.split('.')
host_parts[0] = host_parts[0] + '-servo'
return '.'.join(host_parts)
def servo_host_is_up(servo_hostname):
"""Given a servo host name, return if it's up or not.
@param servo_hostname: hostname of the servo host.
@return True if it's up, False otherwise
"""
# Technically, this duplicates the SSH ping done early in the servo
# proxy initialization code. However, this ping ends in a couple
# seconds when if fails, rather than the 60 seconds it takes to decide
# that an SSH ping has timed out. Specifically, that timeout happens
# when our servo DNS name resolves, but there is no host at that IP.
logging.info('Pinging servo host at %s', servo_hostname)
ping_config = ping_runner.PingConfig(
servo_hostname, count=3,
ignore_result=True, ignore_status=True)
return ping_runner.PingRunner().ping(ping_config).received > 0
def _map_afe_board_to_servo_board(afe_board):
"""Map a board we get from the AFE to a servo appropriate value.
Many boards are identical to other boards for servo's purposes.
This function makes that mapping.
@param afe_board string board name received from AFE.
@return board we expect servo to have.
"""
KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
BOARD_MAP = {'gizmo': 'panther'}
mapped_board = afe_board
if afe_board in BOARD_MAP:
mapped_board = BOARD_MAP[afe_board]
else:
for suffix in KNOWN_SUFFIXES:
if afe_board.endswith(suffix):
mapped_board = afe_board[0:-len(suffix)]
break
if mapped_board != afe_board:
logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
return mapped_board
def get_servo_args_for_host(dut_host):
"""Return servo data associated with a given DUT.
@param dut_host Instance of `Host` on which to find the servo
attributes.
@return `servo_args` dict with host and an optional port.
"""
info = dut_host.host_info_store.get()
servo_args = {k: v for k, v in info.attributes.iteritems()
if k in SERVO_ATTR_KEYS}
if SERVO_PORT_ATTR in servo_args:
try:
servo_args[SERVO_PORT_ATTR] = int(servo_args[SERVO_PORT_ATTR])
except ValueError:
logging.error('servo port is not an int: %s',
servo_args[SERVO_PORT_ATTR])
# Reset servo_args because we don't want to use an invalid port.
servo_args.pop(SERVO_HOST_ATTR, None)
if info.board:
servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(info.board)
if info.model:
servo_args[SERVO_MODEL_ATTR] = info.model
return servo_args if SERVO_HOST_ATTR in servo_args else None
def _tweak_args_for_ssp_moblab(servo_args):
if servo_args[SERVO_HOST_ATTR] in ['localhost', '127.0.0.1']:
servo_args[SERVO_HOST_ATTR] = _CONFIG.get_config_value(
'SSP', 'host_container_ip', type=str, default=None)
def create_servo_host(dut, servo_args, try_lab_servo=False,
try_servo_repair=False, dut_host_info=None):
"""Create a ServoHost object for a given DUT, if appropriate.
This function attempts to create and verify or repair a `ServoHost`
object for a servo connected to the given `dut`, subject to various
constraints imposed by the parameters:
* When the `servo_args` parameter is not `None`, a servo
host must be created, and must be checked with `repair()`.
* Otherwise, if a servo exists in the lab and `try_lab_servo` is
true:
* If `try_servo_repair` is true, then create a servo host and
check it with `repair()`.
* Otherwise, if the servo responds to `ping` then create a
servo host and check it with `verify()`.
In cases where `servo_args` was not `None`, repair failure
exceptions are passed back to the caller; otherwise, exceptions
are logged and then discarded. Note that this only happens in cases
where we're called from a test (not special task) control file that
has an explicit dependency on servo. In that case, we require that
repair not write to `status.log`, so as to avoid polluting test
results.
TODO(jrbarnette): The special handling for servo in test control
files is a thorn in my flesh; I dearly hope to see it cut out before
my retirement.
Parameters for a servo host consist of a host name, port number, and
DUT board, and are determined from one of these sources, in order of
priority:
* Servo attributes from the `dut` parameter take precedence over
all other sources of information.
* If a DNS entry for the servo based on the DUT hostname exists in
the CrOS lab network, that hostname is used with the default
port and the DUT's board.
* If no other options are found, the parameters will be taken
from the `servo_args` dict passed in from the caller.
@param dut An instance of `Host` from which to take
servo parameters (if available).
@param servo_args A dictionary with servo parameters to use if
they can't be found from `dut`. If this
argument is supplied, unrepaired exceptions
from `verify()` will be passed back to the
caller.
@param try_lab_servo If not true, servo host creation will be
skipped unless otherwise required by the
caller.
@param try_servo_repair If true, check a servo host with
`repair()` instead of `verify()`.
@returns: A ServoHost object or None. See comments above.
"""
servo_dependency = servo_args is not None
if dut is not None and (try_lab_servo or servo_dependency):
servo_args_override = get_servo_args_for_host(dut)
if servo_args_override is not None:
if utils.in_moblab_ssp():
_tweak_args_for_ssp_moblab(servo_args_override)
logging.debug(
'Overriding provided servo_args (%s) with arguments'
' determined from the host (%s)',
servo_args,
servo_args_override,
)
servo_args = servo_args_override
if servo_args is None:
logging.debug('No servo_args provided, and failed to find overrides.')
return None
if SERVO_HOST_ATTR not in servo_args:
logging.debug('%s attribute missing from servo_args: %s',
SERVO_HOST_ATTR, servo_args)
return None
if (not servo_dependency and not try_servo_repair and
not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
logging.debug('ServoHost is not up.')
return None
newhost = ServoHost(**servo_args)
try:
newhost.restart_servod(quick_startup=True)
except error.AutoservSSHTimeout:
logging.warning("Restart servod failed due ssh connection "
"to servohost timed out. This error is forgiven"
" here, we will retry in servo repair process.")
except error.AutoservRunError as e:
logging.warning("Restart servod failed due to:\n%s\n"
"This error is forgiven here, we will retry"
" in servo repair process.", str(e))
# TODO(gregorynisbet): Clean all of this up.
logging.debug('create_servo_host: attempt to set info store on '
'servo host')
try:
if dut_host_info is None:
logging.debug('create_servo_host: dut_host_info is '
'None, skipping')
else:
newhost.set_dut_host_info(dut_host_info)
logging.debug('create_servo_host: successfully set info '
'store')
except Exception:
logging.error("create_servo_host: (%s)", traceback.format_exc())
# Note that the logic of repair() includes everything done
# by verify(). It's sufficient to call one or the other;
# we don't need both.
if servo_dependency:
newhost.repair(silent=True)
return newhost
if try_servo_repair:
try:
newhost.repair()
except Exception:
logging.exception('servo repair failed for %s', newhost.hostname)
else:
try:
newhost.verify()
except Exception:
logging.exception('servo verify failed for %s', newhost.hostname)
return newhost