blob: f315195ab715abe48324ea5157c8a73f177b9101 [file] [log] [blame] [edit]
#!/usr/bin/python
#
# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import datetime as datetime_base
import logging
from datetime import datetime
import common
from autotest_lib.server import utils
from autotest_lib.server.cros.dynamic_suite import reporting_utils
class JobTimer(object):
"""Utility class capable of measuring job timeouts.
"""
# Format used in datetime - string conversion.
time_format = '%m-%d-%Y [%H:%M:%S]'
def __init__(self, job_created_time, timeout_mins):
"""JobTimer constructor.
@param job_created_time: float representing the time a job was
created. Eg: time.time()
@param timeout_mins: float representing the timeout in minutes.
"""
self.job_created_time = datetime.fromtimestamp(job_created_time)
self.timeout_hours = datetime_base.timedelta(hours=timeout_mins/60.0)
self.past_halftime = False
@classmethod
def format_time(cls, datetime_obj):
"""Get the string formatted version of the datetime object.
@param datetime_obj: A datetime.datetime object.
Eg: datetime.datetime.now()
@return: A formatted string containing the date/time of the
input datetime.
"""
return datetime_obj.strftime(cls.time_format)
def elapsed_time(self):
"""Get the time elapsed since this job was created.
@return: A timedelta object representing the elapsed time.
"""
return datetime.now() - self.job_created_time
def is_suite_timeout(self):
"""Check if the suite timed out.
@return: True if more than timeout_hours has elapsed since the suite job
was created.
"""
if self.elapsed_time() >= self.timeout_hours:
logging.info('Suite timed out. Started on %s, timed out on %s',
self.format_time(self.job_created_time),
self.format_time(datetime.now()))
return True
return False
def first_past_halftime(self):
"""Check if we just crossed half time.
This method will only return True once, the first time it is called
after a job's elapsed time is past half its timeout.
@return True: If this is the first call of the method after halftime.
"""
if (not self.past_halftime and
self.elapsed_time() > self.timeout_hours/2):
self.past_halftime = True
return True
return False
class RPCHelper(object):
"""A class to help diagnose a suite run through the rpc interface.
"""
def __init__(self, rpc_interface):
"""Constructor for rpc helper class.
@param rpc_interface: An rpc object, eg: A RetryingAFE instance.
"""
self.rpc_interface = rpc_interface
def diagnose_pool(self, board, pool, time_delta_hours, limit=5):
"""Log diagnostic information about a timeout for a board/pool.
@param board: The board for which the current suite was run.
@param pool: The pool against which the current suite was run.
@param time_delta_hours: The time from which we should log information.
This is a datetime.timedelta object, as stored by the JobTimer.
@param limit: The maximum number of jobs per host, to log.
@raises proxy.JSONRPCException: For exceptions thrown across the wire.
"""
hosts = self.rpc_interface.get_hosts(
multiple_labels=('pool:%s' % pool, 'board:%s' % board))
if not hosts:
logging.warning('No hosts found for board:%s in pool:%s',
board, pool)
return
cutoff = datetime.now() - time_delta_hours
for host in hosts:
jobs = self.rpc_interface.get_host_queue_entries(
host__id=host.id, started_on__gte=str(cutoff))
job_info = ''
for job in jobs[-limit:]:
job_info += ('%s %s started on: %s status %s\n' %
(job.id, job.job.name, job.started_on, job.status))
logging.error('host:%s, status:%s, locked: %s'
'\nlabels: %s\nLast %s jobs within %s:\n%s',
host.hostname, host.status, host.locked, host.labels,
limit, time_delta_hours, job_info)
def check_dut_availability(self, board, pool, minimum_duts=0):
"""Check if DUT availability for a given board and pool is less than
minimum.
@param board: The board to check DUT availability.
@param pool: The pool to check DUT availability.
@param minimum_duts: Minimum Number of available machines required to
run the suite. Default is set to 0, which means do
not force the check of available machines before
running the suite.
@raise: TestLabException if DUT availability is lower than minimum,
or failed to get host information from rpc interface.
"""
if minimum_duts == 0:
return
hosts = self.rpc_interface.get_hosts(
invalid=False,
multiple_labels=('pool:%s' % pool, 'board:%s' % board))
if not hosts:
raise utils.TestLabException(
'No hosts found for board:%s in pool:%s' %
(board, pool))
if len(hosts) < minimum_duts:
logging.debug('The total number of DUTs for %s in pool:%s is %d, '
'which is no more than the required minimum number of'
' available DUTS of %d. Minimum available DUT rule is'
' not enforced.', board, pool, len(hosts),
minimum_duts)
return
# TODO(dshi): Replace the hard coded string with enum value,
# models.Host.Status.REPAIRING and REPAIR_FAILED
# setup_django_environment can't be imported now as paygen server does
# not have django package.
bad_statuses = ('Repair Failed', 'Repairing')
available_hosts = [host for host in hosts
if not host.status in bad_statuses]
logging.debug('%d of %d DUTs are available for board %s pool %s.',
len(available_hosts), len(hosts), board, pool)
if len(available_hosts) < minimum_duts:
raise utils.TestLabException(
'Number of available DUTs for board %s pool %s is %d, which'
' is less than the minimum value %d.' %
(board, pool, len(available_hosts), minimum_duts))
def diagnose_job(self, job_id):
"""Diagnose a suite job.
Logs information about the jobs that are still to run in the suite.
@param job_id: The id of the suite job to get information about.
No meaningful information gets logged if the id is for a sub-job.
"""
incomplete_jobs = self.rpc_interface.get_jobs(
parent_job_id=job_id, summary=True,
hostqueueentry__complete=False)
if incomplete_jobs:
logging.info('\n%s printing summary of incomplete jobs (%s):\n',
JobTimer.format_time(datetime.now()),
len(incomplete_jobs))
for job in incomplete_jobs:
logging.info('%s: %s', job.testname[job.testname.rfind('/')+1:],
reporting_utils.link_job(job.id))
else:
logging.info('All jobs in suite have already completed.')