blob: b627c4413451298b62fd206164b98f27561e53fc [file] [log] [blame]
#!/usr/bin/python -u
# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
# Released under the GPL v2
"""
Run a control file through the server side engine
"""
import sys, os, re, traceback, signal, time, logging, getpass, urllib2
import common
from autotest_lib.client.common_lib import control_data
from autotest_lib.client.common_lib import global_config
require_atfork = global_config.global_config.get_config_value(
'AUTOSERV', 'require_atfork_module', type=bool, default=True)
# Number of seconds to wait before returning if testing mode is enabled
TESTING_MODE_SLEEP_SECS = 10
try:
import atfork
atfork.monkeypatch_os_fork_functions()
import atfork.stdlib_fixer
# Fix the Python standard library for threading+fork safety with its
# internal locks. http://code.google.com/p/python-atfork/
import warnings
warnings.filterwarnings('ignore', 'logging module already imported')
atfork.stdlib_fixer.fix_logging_module()
except ImportError, e:
from autotest_lib.client.common_lib import global_config
if global_config.global_config.get_config_value(
'AUTOSERV', 'require_atfork_module', type=bool, default=False):
print >>sys.stderr, 'Please run utils/build_externals.py'
print e
sys.exit(1)
from autotest_lib.server import frontend
from autotest_lib.server import server_logging_config
from autotest_lib.server import server_job, utils, autoserv_parser, autotest
from autotest_lib.server import utils as server_utils
from autotest_lib.client.common_lib import pidfile, logging_manager
from autotest_lib.client.common_lib.cros.graphite import stats
def log_alarm(signum, frame):
logging.error("Received SIGALARM. Ignoring and continuing on.")
sys.exit(1)
def run_autoserv(pid_file_manager, results, parser):
# send stdin to /dev/null
dev_null = os.open(os.devnull, os.O_RDONLY)
os.dup2(dev_null, sys.stdin.fileno())
os.close(dev_null)
# Create separate process group
os.setpgrp()
# Implement SIGTERM handler
def handle_sigterm(signum, frame):
logging.debug('Received SIGTERM')
if pid_file_manager:
pid_file_manager.close_file(1, signal.SIGTERM)
logging.debug('Finished writing to pid_file. Killing process.')
# TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
# This sleep allows the pending output to be logged before the kill
# signal is sent.
time.sleep(.1)
os.killpg(os.getpgrp(), signal.SIGKILL)
# Set signal handler
signal.signal(signal.SIGTERM, handle_sigterm)
# faulthandler is only needed to debug in the Lab and is not avaliable to
# be imported in the chroot as part of VMTest, so Try-Except it.
try:
import faulthandler
faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
logging.debug('faulthandler registered on SIGTERM.')
except ImportError:
pass
# Ignore SIGTTOU's generated by output from forked children.
signal.signal(signal.SIGTTOU, signal.SIG_IGN)
# If we received a SIGALARM, let's be loud about it.
signal.signal(signal.SIGALRM, log_alarm)
# Server side tests that call shell scripts often depend on $USER being set
# but depending on how you launch your autotest scheduler it may not be set.
os.environ['USER'] = getpass.getuser()
if parser.options.machines:
machines = parser.options.machines.replace(',', ' ').strip().split()
else:
machines = []
machines_file = parser.options.machines_file
label = parser.options.label
group_name = parser.options.group_name
user = parser.options.user
client = parser.options.client
server = parser.options.server
install_before = parser.options.install_before
install_after = parser.options.install_after
verify = parser.options.verify
repair = parser.options.repair
cleanup = parser.options.cleanup
provision = parser.options.provision
reset = parser.options.reset
job_labels = parser.options.job_labels
no_tee = parser.options.no_tee
parse_job = parser.options.parse_job
execution_tag = parser.options.execution_tag
if not execution_tag:
execution_tag = parse_job
host_protection = parser.options.host_protection
ssh_user = parser.options.ssh_user
ssh_port = parser.options.ssh_port
ssh_pass = parser.options.ssh_pass
collect_crashinfo = parser.options.collect_crashinfo
control_filename = parser.options.control_filename
test_retry = parser.options.test_retry
verify_job_repo_url = parser.options.verify_job_repo_url
skip_crash_collection = parser.options.skip_crash_collection
ssh_verbosity = int(parser.options.ssh_verbosity)
ssh_options = parser.options.ssh_options
# can't be both a client and a server side test
if client and server:
parser.parser.error("Can not specify a test as both server and client!")
if provision and client:
parser.parser.error("Cannot specify provisioning and client!")
is_special_task = (verify or repair or cleanup or collect_crashinfo or
provision or reset)
if len(parser.args) < 1 and not is_special_task:
parser.parser.error("Missing argument: control file")
if ssh_verbosity > 0:
# ssh_verbosity is an integer between 0 and 3, inclusive
ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
else:
ssh_verbosity_flag = ''
# We have a control file unless it's just a verify/repair/cleanup job
if len(parser.args) > 0:
control = parser.args[0]
else:
control = None
if machines_file:
machines = []
for m in open(machines_file, 'r').readlines():
# remove comments, spaces
m = re.sub('#.*', '', m).strip()
if m:
machines.append(m)
print "Read list of machines from file: %s" % machines_file
print ','.join(machines)
if machines:
for machine in machines:
if not machine or re.search('\s', machine):
parser.parser.error("Invalid machine: %s" % str(machine))
machines = list(set(machines))
machines.sort()
if group_name and len(machines) < 2:
parser.parser.error("-G %r may only be supplied with more than one machine."
% group_name)
kwargs = {'group_name': group_name, 'tag': execution_tag,
'disable_sysinfo': parser.options.disable_sysinfo}
if control_filename:
kwargs['control_filename'] = control_filename
job = server_job.server_job(control, parser.args[1:], results, label,
user, machines, client, parse_job,
ssh_user, ssh_port, ssh_pass,
ssh_verbosity_flag, ssh_options,
test_retry, **kwargs)
job.logging.start_logging()
job.init_parser()
# perform checks
job.precheck()
# run the job
exit_code = 0
try:
try:
if repair:
job.repair(host_protection, job_labels)
elif verify:
job.verify(job_labels)
elif provision:
job.provision(job_labels)
elif reset:
job.reset(job_labels)
elif cleanup:
job.cleanup(job_labels)
else:
job.run(install_before, install_after,
verify_job_repo_url=verify_job_repo_url,
only_collect_crashinfo=collect_crashinfo,
skip_crash_collection=skip_crash_collection,
job_labels=job_labels)
finally:
while job.hosts:
host = job.hosts.pop()
host.close()
except:
exit_code = 1
traceback.print_exc()
if pid_file_manager:
pid_file_manager.num_tests_failed = job.num_tests_failed
pid_file_manager.close_file(exit_code)
job.cleanup_parser()
sys.exit(exit_code)
def main():
# White list of tests with run time measurement enabled.
measure_run_time_tests_names = global_config.global_config.get_config_value(
'AUTOSERV', 'measure_run_time_tests', type=str)
if measure_run_time_tests_names:
measure_run_time_tests = [t.strip() for t in
measure_run_time_tests_names.split(',')]
else:
measure_run_time_tests = []
# grab the parser
parser = autoserv_parser.autoserv_parser
parser.parse_args()
if len(sys.argv) == 1:
parser.parser.print_help()
sys.exit(1)
if parser.options.no_logging:
results = None
else:
results = parser.options.results
if not results:
results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
results = os.path.abspath(results)
resultdir_exists = False
for filename in ('control.srv', 'status.log', '.autoserv_execute'):
if os.path.exists(os.path.join(results, filename)):
resultdir_exists = True
if not parser.options.use_existing_results and resultdir_exists:
error = "Error: results directory already exists: %s\n" % results
sys.stderr.write(error)
sys.exit(1)
# Now that we certified that there's no leftover results dir from
# previous jobs, lets create the result dir since the logging system
# needs to create the log file in there.
if not os.path.isdir(results):
os.makedirs(results)
logging_manager.configure_logging(
server_logging_config.ServerLoggingConfig(), results_dir=results,
use_console=not parser.options.no_tee,
verbose=parser.options.verbose,
no_console_prefix=parser.options.no_console_prefix)
if results:
logging.info("Results placed in %s" % results)
# wait until now to perform this check, so it get properly logged
if parser.options.use_existing_results and not resultdir_exists:
logging.error("No existing results directory found: %s", results)
sys.exit(1)
logging.debug('autoserv command was: %s', ' '.join(sys.argv))
if parser.options.write_pidfile:
pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
results)
pid_file_manager.open_file()
else:
pid_file_manager = None
autotest.BaseAutotest.set_install_in_tmpdir(
parser.options.install_in_tmpdir)
timer = None
try:
# Take the first argument as control file name, get the test name from
# the control file. If the test name exists in the list of tests with
# run time measurement enabled, start a timer to begin measurement.
if (len(parser.args) > 0 and parser.args[0] != '' and
parser.options.machines):
try:
test_name = control_data.parse_control(parser.args[0],
raise_warnings=True).name
except control_data.ControlVariableException:
logging.debug('Failed to retrieve test name from control file.')
test_name = None
if test_name in measure_run_time_tests:
machines = parser.options.machines.replace(',', ' '
).strip().split()
try:
afe = frontend.AFE()
board = server_utils.get_board_from_afe(machines[0], afe)
timer = stats.Timer('autoserv_run_time.%s.%s' %
(board, test_name))
timer.start()
except (urllib2.HTTPError, urllib2.URLError):
# Ignore error if RPC failed to get board
pass
except control_data.ControlVariableException as e:
logging.error(str(e))
exit_code = 0
try:
try:
if not global_config.global_config.get_config_value(
'AUTOSERV', 'testing_mode', type=bool, default=False):
run_autoserv(pid_file_manager, results, parser)
else:
# TODO(beeps): Extend this to cover different failure modes.
time.sleep(TESTING_MODE_SLEEP_SECS)
except SystemExit as e:
exit_code = e.code
if exit_code:
logging.exception(e)
except Exception as e:
# If we don't know what happened, we'll classify it as
# an 'abort' and return 1.
logging.exception(e)
exit_code = 1
finally:
if pid_file_manager:
pid_file_manager.close_file(exit_code)
if timer:
timer.stop()
sys.exit(exit_code)
if __name__ == '__main__':
main()