blob: 0bd73b7ad898f86d3fe0a1c003f55380a86ce26e [file] [log] [blame]
# Copyright (c) 2019 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""This file provides core logic for labstation verify/repair process."""
import logging
from autotest_lib.client.common_lib import error
from autotest_lib.server import afe_utils
from autotest_lib.server.hosts import base_label
from autotest_lib.server.hosts import cros_label
from autotest_lib.server.hosts import labstation_repair
from autotest_lib.server.cros import provision
from autotest_lib.server.hosts import base_servohost
from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
from autotest_lib.server.cros.dynamic_suite import tools
from autotest_lib.client.common_lib.cros import dev_server
from autotest_lib.server import utils as server_utils
from autotest_lib.site_utils.rpm_control_system import rpm_client
class LabstationHost(base_servohost.BaseServoHost):
"""Labstation specific host class"""
# Threshold we decide to ignore a in_use file lock. In minutes
IN_USE_FILE_EXPIRE_MINS = 90
# Uptime threshold to perform a labstation reboot, this is to prevent a
# broken DUT keep trying to reboot a labstation. In hours
UP_TIME_THRESH_HOLD_HOURS = 6
VERSION_PREFIX = provision.CROS_VERSION_PREFIX
@staticmethod
def check_host(host, timeout=10):
"""
Check if the given host is a labstation host.
@param host: An ssh host representing a device.
@param timeout: The timeout for the run command.
@return: True if the host device is labstation.
@raises AutoservRunError: If the command failed.
@raises AutoservSSHTimeout: Ssh connection has timed out.
"""
try:
result = host.run(
'grep -q labstation /etc/lsb-release',
ignore_status=True, timeout=timeout)
except (error.AutoservRunError, error.AutoservSSHTimeout):
return False
return result.exit_status == 0
def _initialize(self, hostname, *args, **dargs):
super(LabstationHost, self)._initialize(hostname=hostname,
*args, **dargs)
self._repair_strategy = (
labstation_repair.create_labstation_repair_strategy())
self.labels = base_label.LabelRetriever(cros_label.LABSTATION_LABELS)
def is_reboot_requested(self):
"""Check if a reboot is requested for this labstation, the reboot can
either be requested from labstation or DUTs. For request from DUTs we
only process it if uptime longer than a threshold because we want
to prevent a broken servo keep its labstation in reboot cycle.
@returns True if a reboot is required, otherwise False
"""
if self._check_update_status() == self.UPDATE_STATE.PENDING_REBOOT:
logging.info('Labstation reboot requested from labstation for'
' update image')
return True
if not self._validate_uptime():
logging.info('Ignoring DUTs reboot request because %s was'
' rebooted in last %d hours.',
self.hostname, self.UP_TIME_THRESH_HOLD_HOURS)
return False
cmd = 'find %s*%s' % (self.TEMP_FILE_DIR, self.REBOOT_FILE_POSTFIX)
output = self.run(cmd, ignore_status=True).stdout
if output:
in_use_file_list = output.strip().split('\n')
logging.info('%s DUT(s) are currently requesting to'
' reboot labstation.', len(in_use_file_list))
return True
else:
return False
def try_reboot(self):
"""Try to reboot the labstation if it's safe to do(no servo in use,
and not processing updates), and cleanup reboot control file.
"""
if self._is_servo_in_use():
logging.info('Aborting reboot action because some DUT(s) are'
' currently using servo(s).')
return
update_state = self._check_update_status()
if update_state == self.UPDATE_STATE.RUNNING:
logging.info('Aborting reboot action because an update process'
' is running.')
return
if update_state == self.UPDATE_STATE.PENDING_REBOOT:
self._post_update_reboot()
else:
self._servo_host_reboot()
self.update_cros_version_label()
logging.info('Cleaning up reboot control files.')
self._cleanup_post_reboot()
def get_labels(self):
"""Return the detected labels on the host."""
return self.labels.get_labels(self)
def get_os_type(self):
return 'labstation'
def verify_job_repo_url(self, tag=''):
"""
Make sure job_repo_url of this host is valid.
Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
download and extract it. If the devserver embedded in the url is
unresponsive, update the job_repo_url of the host after staging it on
another devserver.
@param job_repo_url: A url pointing to the devserver where the autotest
package for this build should be staged.
@param tag: The tag from the server job, in the format
<job_id>-<user>/<hostname>, or <hostless> for a server job.
@raises DevServerException: If we could not resolve a devserver.
@raises AutoservError: If we're unable to save the new job_repo_url as
a result of choosing a new devserver because the old one failed to
respond to a health check.
@raises urllib2.URLError: If the devserver embedded in job_repo_url
doesn't respond within the timeout.
"""
info = self.host_info_store.get()
job_repo_url = info.attributes.get(ds_constants.JOB_REPO_URL, '')
if not job_repo_url:
logging.warning('No job repo url set on host %s', self.hostname)
return
logging.info('Verifying job repo url %s', job_repo_url)
devserver_url, image_name = tools.get_devserver_build_from_package_url(
job_repo_url)
ds = dev_server.ImageServer(devserver_url)
logging.info('Staging autotest artifacts for %s on devserver %s',
image_name, ds.url())
ds.stage_artifacts(image_name, ['autotest_packages'])
def host_version_prefix(self, image):
"""Return version label prefix.
In case the CrOS provisioning version is something other than the
standard CrOS version e.g. CrOS TH version, this function will
find the prefix from provision.py.
@param image: The image name to find its version prefix.
@returns: A prefix string for the image type.
"""
return provision.get_version_label_prefix(image)
def stage_server_side_package(self, image=None):
"""Stage autotest server-side package on devserver.
@param image: Full path of an OS image to install or a build name.
@return: A url to the autotest server-side package.
@raise: error.AutoservError if fail to locate the build to test with, or
fail to stage server-side package.
"""
# If enable_drone_in_restricted_subnet is False, do not set hostname
# in devserver.resolve call, so a devserver in non-restricted subnet
# is picked to stage autotest server package for drone to download.
hostname = self.hostname
if not server_utils.ENABLE_DRONE_IN_RESTRICTED_SUBNET:
hostname = None
if image:
image_name = tools.get_build_from_image(image)
if not image_name:
raise error.AutoservError(
'Failed to parse build name from %s' % image)
ds = dev_server.ImageServer.resolve(image_name, hostname)
else:
info = self.host_info_store.get()
job_repo_url = info.attributes.get(ds_constants.JOB_REPO_URL, '')
if job_repo_url:
devserver_url, image_name = (
tools.get_devserver_build_from_package_url(job_repo_url))
# If enable_drone_in_restricted_subnet is True, use the
# existing devserver. Otherwise, resolve a new one in
# non-restricted subnet.
if server_utils.ENABLE_DRONE_IN_RESTRICTED_SUBNET:
ds = dev_server.ImageServer(devserver_url)
else:
ds = dev_server.ImageServer.resolve(image_name)
elif info.build is not None:
ds = dev_server.ImageServer.resolve(info.build, hostname)
image_name = info.build
else:
raise error.AutoservError(
'Failed to stage server-side package. The host has '
'no job_repo_url attribute or cros-version label.')
ds.stage_artifacts(image_name, ['autotest_server_package'])
return '%s/static/%s/%s' % (ds.url(), image_name,
'autotest_server_package.tar.bz2')
def repair(self):
"""Attempt to repair a labstation."""
message = 'Beginning repair for host %s board %s model %s'
info = self.host_info_store.get()
message %= (self.hostname, info.board, info.model)
self.record('INFO', None, None, message)
self._repair_strategy.repair(self)
def update_cros_version_label(self):
"""Update cros-version label on labstation"""
image_name = self.get_full_release_path()
if not image_name:
logging.info('Could not get labstation version, it could be'
' the labstation is running a customized image.')
info = self.host_info_store.get()
info.clear_version_labels(version_prefix=self.VERSION_PREFIX)
self.host_info_store.commit(info)
return
afe_utils.add_provision_labels(self, self.VERSION_PREFIX, image_name)
def _validate_uptime(self):
return (float(self.check_uptime()) >
self.UP_TIME_THRESH_HOLD_HOURS * 3600)
def _is_servo_in_use(self):
"""Determine if there are any DUTs currently running task that uses
servo, only files that has been touched within pre-set threshold of
minutes counts.
@returns True if any DUTs is using servos, otherwise False.
"""
cmd = 'find %s*%s -mmin -%s' % (self.TEMP_FILE_DIR,
self.LOCK_FILE_POSTFIX,
self.IN_USE_FILE_EXPIRE_MINS)
result = self.run(cmd, ignore_status=True)
return bool(result.stdout)
def _cleanup_post_reboot(self):
"""Clean up all xxxx_reboot file after reboot."""
cmd = 'rm %s*%s' % (self.TEMP_FILE_DIR, self.REBOOT_FILE_POSTFIX)
self.run(cmd, ignore_status=True)
def rpm_power_on_and_wait(self, _rpm_client=None):
"""Power on a labstation through RPM and wait for it to come up"""
return self.change_rpm_state_and_wait("ON", _rpm_client=_rpm_client)
def rpm_power_off_and_wait(self, _rpm_client=None):
"""Power off a labstation through RPM and wait for it to shut down"""
return self.change_rpm_state_and_wait("OFF", _rpm_client=_rpm_client)
def change_rpm_state_and_wait(self, state, _rpm_client=None):
"""Change the state of a labstation
@param state: on or off
@param _rpm_client: rpm_client module, to support testing
"""
_rpm_client = _rpm_client or rpm_client
wait = {
"ON": self.wait_up,
"OFF": self.wait_down,
}[state]
timeout = {
"ON": self.BOOT_TIMEOUT,
"OFF": self.WAIT_DOWN_REBOOT_TIMEOUT,
}[state]
_rpm_client.set_power(self, state)
if not wait(timeout=timeout):
msg = "%s didn't enter %s state in %s seconds" % (
getattr(self, 'hostname', None),
state,
timeout,
)
raise Exception(msg)