site_utils/dut_status.py - mirrors/cros/chromiumos/third_party/autotest - Git at Google

 #!/usr/bin/env python
 # Copyright 2014 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import argparse
 import sys
 import time

 import common

 from autotest_lib.client.common_lib import global_config
 from autotest_lib.client.common_lib import time_utils
 from autotest_lib.client.common_lib.cros.graphite import es_utils


 # Values used to describe the diagnosis of a DUT.  These values
 # are used to indicate both DUT status after a single state
 # transition, and also diagnosis of whether the DUT was working
 # at the end of a given time interval.
 #
 # _NO_STATUS:  Used when there are no state transitions recorded in
 #     a given time interval.
 # _UNKNOWN:  For an individual transition, indicates that the DUT
 #     status is unchanged from the previous transition.  For a time
 #     interval, indicates that the DUT's status can't be determined
 #     from the transition history.
 # _WORKING:  Indicates that the DUT was working normally after the
 #     transition, or at the end of the time interval.
 # _BROKEN:  Indicates that the DUT needed manual repair after the
 #     transition, or at the end of the time interval.
 #
 _NO_STATUS = 0
 _UNKNOWN = 1
 _WORKING = 2
 _BROKEN = 3

 # List of string values to display for the diagnosis values above,
 # indexed by those values.
 _DIAGNOSIS_IDS = ['??', '--', 'OK', 'NO']


 # Default time interval for the --duration option when a value isn't
 # specified on the command line.
 _DEFAULT_DURATION = 12


 def _parse_time(time_string):
     return int(time_utils.date_string_to_epoch_time(time_string))


 class StateTransition(object):
     """Information about a state transistion in host history.

     This remembers the relevant data from a host history object
     in the elastic search database.  This include primary data from
     the ES database such as the hostname, the AFE host state of the
     transition, and the time of the transition.  It also includes
     secondary data calculated from the primary data.

     State transitions can be caused by either a regular test job or
     a special task.  The specific kind of transition is determined
     by the `job_id` and `task_id` member fields as follows:
       * `self.job_id is not None and self.task_id is None` -
         This indicates a regular job with id `self.job_id`.
       * `self.job_id is None and self.task_id is not None` -
         This indicates a 'Repair' special task with id
         `self.task_id`, triggered on a failed DUT.
       * `self.job_id is not None and self.task_id is not None` -
         This indicates a special task with id `self.task_id`
         associated with an HQE (such as a 'Provision' job).  The HQE
         job has the id `self.job_id`.
     The `job_id` and `task_id` fields cannot both be `None`.

     Each state transition implies a diagnosis about whether the DUT
     was working at the time, based on the `status` value:
       * 'Ready' - The device was working normally at the time of the
         transition.
       * 'Repair Failed' - The device probably needed manual
         intervention at the time of the transition.
       * All other status values - The device's state is unchanged
         from the previous transition.

     @property hostname    Host name of the DUT.
     @property status      DUT state after the transition; valid
                           values are the same as for
                           `afe_hosts.status`.
     @property timestamp   Time when the scheduler recorded the
                           transition.
     @property job_id      ID in the AFE database for the job that
                           triggered the transition.  `None` if the
                           transition was for a special task without
                           an associated HQE.
     @property task_id     ID in the AFE database for the special
                           task that triggered the transition.
                           `None` if the transition was for a regular
                           job.
     @property job_url     URL to the logs for the job that triggered
                           the transition.
     @property diagnosis   Working status of the DUT, derived from
                           `status`.

     """

     get_config_value = global_config.global_config.get_config_value
     _AFE_HOSTNAME = get_config_value('SERVER', 'hostname')
     _LOG_URL_PATTERN = get_config_value('CROS', 'log_url_pattern')

     @classmethod
     def get_transitions(cls, hostname, start_time, end_time):
         """Get a list of StateTransition objects from ES host history.

         The returned list includes all transitions on the given host
         in the given time interval.

         @param hostname    Host for the transitions in the host
                            history.
         @param start_time  Start of the time interval to search.
         @param end_time    End of the time interval to search.

         """
         equality_constraints = [('_type', 'host_history'),
                                 ('hostname', hostname)]
         range_constraints = [('time_recorded', start_time, end_time)]
         query = es_utils.create_range_eq_query_multiple(
                     fields_returned=None,
                     equality_constraints=equality_constraints,
                     range_constraints=range_constraints,
                     size=end_time - start_time,
                     sort_specs=[{'time_recorded': 'desc'}])
         result = es_utils.execute_query(query)
         return [cls(o['_source']) for o in result['hits']['hits']]


     def __init__(self, transition_data):
         self.hostname = transition_data['hostname']
         self.status = transition_data['status']
         self.timestamp = transition_data['time_recorded']
         self.job_id = transition_data.get('job_id')
         self.task_id = transition_data.get('task_id')
         if self.task_id is not None:
             logdir = ('hosts/%s/%s-%s' %
                       (self.hostname, self.task_id,
                        transition_data['task_name'].lower()))
         else:
             logdir = '%s-%s' % (self.job_id, transition_data['owner'])
         self.job_url = StateTransition._LOG_URL_PATTERN % (
                 StateTransition._AFE_HOSTNAME, logdir)
         self.diagnosis = _UNKNOWN
         if self.status == 'Repair Failed':
             self.diagnosis = _BROKEN
         elif self.status == 'Ready':
             self.diagnosis = _WORKING


 class HostStateHistory(object):
     """Class to query and remember DUT state transition history.

     This class is responsible for querying the elastic search
     database to determine the history of a single DUT in a time
     interval of interest, and for remembering the query results for
     reporting.

     @property hostname    Host name of the DUT.
     @property start_time  Start of the requested time interval.
     @property end_time    End of the requested time interval.
     @property history     A list of state transitions on the DUT
                           during the given time interval, ordered
                           from most to least recent.

     """

     def __init__(self, hostname, start_time, end_time):
         self.hostname = hostname
         self.start_time = start_time
         self.end_time = end_time
         self.history = self._get_history(start_time, end_time)

     def __iter__(self):
         return self.history.__iter__()

     def _get_history(self, start_time, end_time):
         return StateTransition.get_transitions(
                 self.hostname, start_time, end_time)

     def last_diagnosis(self):
         """Return the most recent diagnosis for the DUT.

         This searches the DUT's state history from most to least
         recent, looking for transitions that indicate whether the
         DUT was working.  Return a tuple of `(diagnosis, transition)`.

         The `diagnosis` entry in the tuple is one of these values:
           * _NO_STATUS - The state transition history is empty.
           * _UNKNOWN - No state in the history indicated a
               positive diagnosis.
           * _WORKING - At last check, the DUT was working.
           * _BROKEN - At last check, the DUT likely required manual
               intervention.

         The `transition` entry in the tuple is the entry that led to
         the diagnosis.  The transition will be `None` if the value
         is `_NO_STATUS` or `_UNKNOWN`.

         @return A tuple with the DUT's status and the transition that
                 determined the diagnosis.

         """
         if not self.history:
             return _NO_STATUS, None
         for transition in self:
             diagnosis = transition.diagnosis
             if diagnosis == _BROKEN or diagnosis == _WORKING:
                 return diagnosis, transition
         return _UNKNOWN, None


 def _print_simple_status(arguments):
     fmt = '%-28s %-2s  %-19s  %s'
     print fmt % ('hostname', 'S', 'last checked', 'URL')
     for hostname in arguments.hostnames:
         history = HostStateHistory(hostname,
                                    arguments.since, arguments.until)
         status, transition = history.last_diagnosis()
         if transition is not None:
             url = transition.job_url
             datestr = time_utils.epoch_time_to_date_string(
                     transition.timestamp)
         else:
             url = '---'
             datestr = '---'
         print fmt % (history.hostname,
                      _DIAGNOSIS_IDS[status],
                      datestr,
                      url)


 def _print_host_transitions(arguments):
     for hostname in arguments.hostnames:
         print hostname
         history = HostStateHistory(hostname,
                                    arguments.since, arguments.until)
         for transition in history:
             start_time = time_utils.epoch_time_to_date_string(
                     transition.timestamp)
             print '    %s  %s %s' % (
                     start_time,
                     _DIAGNOSIS_IDS[transition.diagnosis],
                     transition.job_url)


 def _validate_command(arguments):
     if (arguments.duration is not None and
             arguments.since is not None and arguments.until is not None):
         print >>sys.stderr, ('Can specify at most two of '
                              '--since, --until, and --duration')
         sys.exit(1)
     if (arguments.until is None and (arguments.since is None or
                                      arguments.duration is None)):
         arguments.until = int(time.time())
     if arguments.since is None:
         if arguments.duration is None:
             arguments.duration = _DEFAULT_DURATION
         arguments.since = (arguments.until -
                            arguments.duration * 60 * 60)
     elif arguments.until is None:
         arguments.until = (arguments.since +
                            arguments.duration * 60 * 60)


 def _parse_command(argv):
     parser = argparse.ArgumentParser(
             prog=argv[0],
             description='Display DUT status and execution history',
             epilog='You can specify one or two of --since, --until, '
                    'and --duration, but not all three.\n'
                    'The date/time format is "YYYY-MM-DD HH:MM:SS".')
     parser.add_argument('-s', '--since', type=_parse_time,
                         metavar='DATE/TIME',
                         help='starting time for history display')
     parser.add_argument('-u', '--until', type=_parse_time,
                         metavar='DATE/TIME',
                         help='ending time for history display'
                              ' (default: now)')
     parser.add_argument('-d', '--duration', type=int,
                         metavar='HOURS',
                         help='number of hours of history to display'
                              ' (default: %d)' % _DEFAULT_DURATION)
     parser.add_argument('-f', '--full_history', action='store_true')
     parser.add_argument('hostnames',
                         nargs='*',
                         help='host names of DUTs to display')
     arguments = parser.parse_args(argv[1:])
     _validate_command(arguments)
     return arguments


 def main(argv):
     """Standard main() for command line processing.

     @param argv Command line arguments (normally sys.argv).

     """
     arguments = _parse_command(argv)
     if arguments.full_history:
         _print_host_transitions(arguments)
     else:
         _print_simple_status(arguments)


 if __name__ == '__main__':
     main(sys.argv)
	#!/usr/bin/env python
	# Copyright 2014 The Chromium OS Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import argparse
	import sys
	import time

	import common

	from autotest_lib.client.common_lib import global_config
	from autotest_lib.client.common_lib import time_utils
	from autotest_lib.client.common_lib.cros.graphite import es_utils


	# Values used to describe the diagnosis of a DUT. These values
	# are used to indicate both DUT status after a single state
	# transition, and also diagnosis of whether the DUT was working
	# at the end of a given time interval.
	#
	# _NO_STATUS: Used when there are no state transitions recorded in
	# a given time interval.
	# _UNKNOWN: For an individual transition, indicates that the DUT
	# status is unchanged from the previous transition. For a time
	# interval, indicates that the DUT's status can't be determined
	# from the transition history.
	# _WORKING: Indicates that the DUT was working normally after the
	# transition, or at the end of the time interval.
	# _BROKEN: Indicates that the DUT needed manual repair after the
	# transition, or at the end of the time interval.
	#
	_NO_STATUS = 0
	_UNKNOWN = 1
	_WORKING = 2
	_BROKEN = 3

	# List of string values to display for the diagnosis values above,
	# indexed by those values.
	_DIAGNOSIS_IDS = ['??', '--', 'OK', 'NO']


	# Default time interval for the --duration option when a value isn't
	# specified on the command line.
	_DEFAULT_DURATION = 12


	def _parse_time(time_string):
	return int(time_utils.date_string_to_epoch_time(time_string))


	class StateTransition(object):
	"""Information about a state transistion in host history.

	This remembers the relevant data from a host history object
	in the elastic search database. This include primary data from
	the ES database such as the hostname, the AFE host state of the
	transition, and the time of the transition. It also includes
	secondary data calculated from the primary data.

	State transitions can be caused by either a regular test job or
	a special task. The specific kind of transition is determined
	by the `job_id` and `task_id` member fields as follows:
	* `self.job_id is not None and self.task_id is None` -
	This indicates a regular job with id `self.job_id`.
	* `self.job_id is None and self.task_id is not None` -
	This indicates a 'Repair' special task with id
	`self.task_id`, triggered on a failed DUT.
	* `self.job_id is not None and self.task_id is not None` -
	This indicates a special task with id `self.task_id`
	associated with an HQE (such as a 'Provision' job). The HQE
	job has the id `self.job_id`.
	The `job_id` and `task_id` fields cannot both be `None`.

	Each state transition implies a diagnosis about whether the DUT
	was working at the time, based on the `status` value:
	* 'Ready' - The device was working normally at the time of the
	transition.
	* 'Repair Failed' - The device probably needed manual
	intervention at the time of the transition.
	* All other status values - The device's state is unchanged
	from the previous transition.

	@property hostname Host name of the DUT.
	@property status DUT state after the transition; valid
	values are the same as for
	`afe_hosts.status`.
	@property timestamp Time when the scheduler recorded the
	transition.
	@property job_id ID in the AFE database for the job that
	triggered the transition. `None` if the
	transition was for a special task without
	an associated HQE.
	@property task_id ID in the AFE database for the special
	task that triggered the transition.
	`None` if the transition was for a regular
	job.
	@property job_url URL to the logs for the job that triggered
	the transition.
	@property diagnosis Working status of the DUT, derived from
	`status`.

	"""

	get_config_value = global_config.global_config.get_config_value
	_AFE_HOSTNAME = get_config_value('SERVER', 'hostname')
	_LOG_URL_PATTERN = get_config_value('CROS', 'log_url_pattern')

	@classmethod
	def get_transitions(cls, hostname, start_time, end_time):
	"""Get a list of StateTransition objects from ES host history.

	The returned list includes all transitions on the given host
	in the given time interval.

	@param hostname Host for the transitions in the host
	history.
	@param start_time Start of the time interval to search.
	@param end_time End of the time interval to search.

	"""
	equality_constraints = [('_type', 'host_history'),
	('hostname', hostname)]
	range_constraints = [('time_recorded', start_time, end_time)]
	query = es_utils.create_range_eq_query_multiple(
	fields_returned=None,
	equality_constraints=equality_constraints,
	range_constraints=range_constraints,
	size=end_time - start_time,
	sort_specs=[{'time_recorded': 'desc'}])
	result = es_utils.execute_query(query)
	return [cls(o['_source']) for o in result['hits']['hits']]


	def __init__(self, transition_data):
	self.hostname = transition_data['hostname']
	self.status = transition_data['status']
	self.timestamp = transition_data['time_recorded']
	self.job_id = transition_data.get('job_id')
	self.task_id = transition_data.get('task_id')
	if self.task_id is not None:
	logdir = ('hosts/%s/%s-%s' %
	(self.hostname, self.task_id,
	transition_data['task_name'].lower()))
	else:
	logdir = '%s-%s' % (self.job_id, transition_data['owner'])
	self.job_url = StateTransition._LOG_URL_PATTERN % (
	StateTransition._AFE_HOSTNAME, logdir)
	self.diagnosis = _UNKNOWN
	if self.status == 'Repair Failed':
	self.diagnosis = _BROKEN
	elif self.status == 'Ready':
	self.diagnosis = _WORKING


	class HostStateHistory(object):
	"""Class to query and remember DUT state transition history.

	This class is responsible for querying the elastic search
	database to determine the history of a single DUT in a time
	interval of interest, and for remembering the query results for
	reporting.

	@property hostname Host name of the DUT.
	@property start_time Start of the requested time interval.
	@property end_time End of the requested time interval.
	@property history A list of state transitions on the DUT
	during the given time interval, ordered
	from most to least recent.

	"""

	def __init__(self, hostname, start_time, end_time):
	self.hostname = hostname
	self.start_time = start_time
	self.end_time = end_time
	self.history = self._get_history(start_time, end_time)

	def __iter__(self):
	return self.history.__iter__()

	def _get_history(self, start_time, end_time):
	return StateTransition.get_transitions(
	self.hostname, start_time, end_time)

	def last_diagnosis(self):
	"""Return the most recent diagnosis for the DUT.

	This searches the DUT's state history from most to least
	recent, looking for transitions that indicate whether the
	DUT was working. Return a tuple of `(diagnosis, transition)`.

	The `diagnosis` entry in the tuple is one of these values:
	* _NO_STATUS - The state transition history is empty.
	* _UNKNOWN - No state in the history indicated a
	positive diagnosis.
	* _WORKING - At last check, the DUT was working.
	* _BROKEN - At last check, the DUT likely required manual
	intervention.

	The `transition` entry in the tuple is the entry that led to
	the diagnosis. The transition will be `None` if the value
	is `_NO_STATUS` or `_UNKNOWN`.

	@return A tuple with the DUT's status and the transition that
	determined the diagnosis.

	"""
	if not self.history:
	return _NO_STATUS, None
	for transition in self:
	diagnosis = transition.diagnosis
	if diagnosis == _BROKEN or diagnosis == _WORKING:
	return diagnosis, transition
	return _UNKNOWN, None


	def _print_simple_status(arguments):
	fmt = '%-28s %-2s %-19s %s'
	print fmt % ('hostname', 'S', 'last checked', 'URL')
	for hostname in arguments.hostnames:
	history = HostStateHistory(hostname,
	arguments.since, arguments.until)
	status, transition = history.last_diagnosis()
	if transition is not None:
	url = transition.job_url
	datestr = time_utils.epoch_time_to_date_string(
	transition.timestamp)
	else:
	url = '---'
	datestr = '---'
	print fmt % (history.hostname,
	_DIAGNOSIS_IDS[status],
	datestr,
	url)


	def _print_host_transitions(arguments):
	for hostname in arguments.hostnames:
	print hostname
	history = HostStateHistory(hostname,
	arguments.since, arguments.until)
	for transition in history:
	start_time = time_utils.epoch_time_to_date_string(
	transition.timestamp)
	print ' %s %s %s' % (
	start_time,
	_DIAGNOSIS_IDS[transition.diagnosis],
	transition.job_url)


	def _validate_command(arguments):
	if (arguments.duration is not None and
	arguments.since is not None and arguments.until is not None):
	print >>sys.stderr, ('Can specify at most two of '
	'--since, --until, and --duration')
	sys.exit(1)
	if (arguments.until is None and (arguments.since is None or
	arguments.duration is None)):
	arguments.until = int(time.time())
	if arguments.since is None:
	if arguments.duration is None:
	arguments.duration = _DEFAULT_DURATION
	arguments.since = (arguments.until -
	arguments.duration * 60 * 60)
	elif arguments.until is None:
	arguments.until = (arguments.since +
	arguments.duration * 60 * 60)


	def _parse_command(argv):
	parser = argparse.ArgumentParser(
	prog=argv[0],
	description='Display DUT status and execution history',
	epilog='You can specify one or two of --since, --until, '
	'and --duration, but not all three.\n'
	'The date/time format is "YYYY-MM-DD HH:MM:SS".')
	parser.add_argument('-s', '--since', type=_parse_time,
	metavar='DATE/TIME',
	help='starting time for history display')
	parser.add_argument('-u', '--until', type=_parse_time,
	metavar='DATE/TIME',
	help='ending time for history display'
	' (default: now)')
	parser.add_argument('-d', '--duration', type=int,
	metavar='HOURS',
	help='number of hours of history to display'
	' (default: %d)' % _DEFAULT_DURATION)
	parser.add_argument('-f', '--full_history', action='store_true')
	parser.add_argument('hostnames',
	nargs='*',
	help='host names of DUTs to display')
	arguments = parser.parse_args(argv[1:])
	_validate_command(arguments)
	return arguments


	def main(argv):
	"""Standard main() for command line processing.

	@param argv Command line arguments (normally sys.argv).

	"""
	arguments = _parse_command(argv)
	if arguments.full_history:
	_print_host_transitions(arguments)
	else:
	_print_simple_status(arguments)


	if __name__ == '__main__':
	main(sys.argv)