scheduler/site_monitor_db.py - mirrors/cros/chromiumos/third_party/autotest - Git at Google

 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 #pylint: disable-msg=C0111

 import os
 import logging
 import time

 from autotest_lib.client.common_lib import global_config
 from autotest_lib.client.common_lib.cros.graphite import autotest_stats
 from autotest_lib.frontend.afe import models
 from autotest_lib.scheduler import email_manager
 from autotest_lib.scheduler import scheduler_config, scheduler_models


 # Override default parser with our site parser.
 def parser_path(install_dir):
     """Return site implementation of parser.

     @param install_dir: installation directory.
     """
     return os.path.join(install_dir, 'tko', 'site_parse')


 class SiteAgentTask(object):
     """
     SiteAgentTask subclasses BaseAgentTask in monitor_db.
     """


     def _archive_results(self, queue_entries):
         """
         Set the status of queue_entries to ARCHIVING.

         This method sets the status of the queue_entries to ARCHIVING
         if the enable_archiving flag is true in global_config.ini.
         Otherwise, it bypasses the archiving step and sets the queue entries
         to the final status of current step.
         """
         enable_archiving = global_config.global_config.get_config_value(
             scheduler_config.CONFIG_SECTION, 'enable_archiving', type=bool)
         # Set the status of the queue entries to archiving or self final status
         if enable_archiving:
             status = models.HostQueueEntry.Status.ARCHIVING
         else:
             status = self._final_status()

         for queue_entry in self.queue_entries:
             queue_entry.set_status(status)


     def _check_queue_entry_statuses(self, queue_entries, allowed_hqe_statuses,
                                     allowed_host_statuses=None):
         """
         Forked from monitor_db.py
         """
         class_name = self.__class__.__name__
         for entry in queue_entries:
             if entry.status not in allowed_hqe_statuses:
                 # In the orignal code, here we raise an exception. In an
                 # effort to prevent downtime we will instead abort the job and
                 # send out an email notifying us this has occured.
                 error_message = ('%s attempting to start entry with invalid '
                                  'status %s: %s. Aborting Job: %s.'
                                  % (class_name, entry.status, entry,
                                     entry.job))
                 logging.error(error_message)
                 email_manager.manager.enqueue_notify_email(
                     'Job Aborted - Invalid Host Queue Entry Status',
                     error_message)
                 entry.job.request_abort()
             invalid_host_status = (
                     allowed_host_statuses is not None
                     and entry.host.status not in allowed_host_statuses)
             if invalid_host_status:
                 # In the orignal code, here we raise an exception. In an
                 # effort to prevent downtime we will instead abort the job and
                 # send out an email notifying us this has occured.
                 error_message = ('%s attempting to start on queue entry with '
                                  'invalid host status %s: %s. Aborting Job: %s'
                                  % (class_name, entry.host.status, entry,
                                     entry.job))
                 logging.error(error_message)
                 email_manager.manager.enqueue_notify_email(
                     'Job Aborted - Invalid Host Status', error_message)
                 entry.job.request_abort()


 class SiteDispatcher(object):
     """
     SiteDispatcher subclasses BaseDispatcher in monitor_db.
     """
     DEFAULT_REQUESTED_BY_USER_ID = 1


     _timer = autotest_stats.Timer('scheduler')
     _gauge = autotest_stats.Gauge('scheduler_rel')
     _tick_start = None


     @_timer.decorate
     def tick(self):
         self._tick_start = time.time()
         super(SiteDispatcher, self).tick()
         self._gauge.send('tick', time.time() - self._tick_start)

     @_timer.decorate
     def _garbage_collection(self):
         super(SiteDispatcher, self)._garbage_collection()
         if self._tick_start:
             self._gauge.send('_garbage_collection',
                              time.time() - self._tick_start)

     @_timer.decorate
     def _run_cleanup(self):
         super(SiteDispatcher, self)._run_cleanup()
         if self._tick_start:
             self._gauge.send('_run_cleanup', time.time() - self._tick_start)

     @_timer.decorate
     def _find_aborting(self):
         super(SiteDispatcher, self)._find_aborting()
         if self._tick_start:
             self._gauge.send('_find_aborting', time.time() - self._tick_start)

     @_timer.decorate
     def _process_recurring_runs(self):
         super(SiteDispatcher, self)._process_recurring_runs()
         if self._tick_start:
             self._gauge.send('_process_recurring_runs',
                              time.time() - self._tick_start)

     @_timer.decorate
     def _schedule_delay_tasks(self):
         super(SiteDispatcher, self)._schedule_delay_tasks()
         if self._tick_start:
             self._gauge.send('_schedule_delay_tasks',
                              time.time() - self._tick_start)

     @_timer.decorate
     def _schedule_running_host_queue_entries(self):
         super(SiteDispatcher, self)._schedule_running_host_queue_entries()
         if self._tick_start:
             self._gauge.send('_schedule_running_host_queue_entries',
                              time.time() - self._tick_start)

     @_timer.decorate
     def _schedule_special_tasks(self):
         super(SiteDispatcher, self)._schedule_special_tasks()
         if self._tick_start:
             self._gauge.send('_schedule_special_tasks',
                              time.time() - self._tick_start)

     @_timer.decorate
     def _schedule_new_jobs(self):
         super(SiteDispatcher, self)._schedule_new_jobs()
         if self._tick_start:
             self._gauge.send('_schedule_new_jobs',
                              time.time() - self._tick_start)


     @_timer.decorate
     def _handle_agents(self):
         super(SiteDispatcher, self)._handle_agents()
         if self._tick_start:
             self._gauge.send('_handle_agents', time.time() - self._tick_start)


     def _reverify_hosts_where(self, where,
                               print_message='Reverifying host %s'):
         """
         This is an altered version of _reverify_hosts_where the class to
         models.SpecialTask.objects.create passes in an argument for
         requested_by, in order to allow the Reset task to be created
         properly.
         """
         full_where='locked = 0 AND invalid = 0 AND ' + where
         for host in scheduler_models.Host.fetch(where=full_where):
             if self.host_has_agent(host):
                 # host has already been recovered in some way
                 continue
             if self._host_has_scheduled_special_task(host):
                 # host will have a special task scheduled on the next cycle
                 continue
             if print_message:
                 logging.error(print_message, host.hostname)
             try:
                 user = models.User.objects.get(login='autotest_system')
             except models.User.DoesNotExist:
                 user = models.User.objects.get(
                         id=SiteDispatcher.DEFAULT_REQUESTED_BY_USER_ID)
             models.SpecialTask.objects.create(
                     task=models.SpecialTask.Task.RESET,
                     host=models.Host.objects.get(id=host.id),
                     requested_by=user)


     def _check_for_unrecovered_verifying_entries(self):
         # Verify is replaced by Reset.
         queue_entries = scheduler_models.HostQueueEntry.fetch(
                 where='status = "%s"' % models.HostQueueEntry.Status.RESETTING)
         for queue_entry in queue_entries:
             special_tasks = models.SpecialTask.objects.filter(
                     task__in=(models.SpecialTask.Task.CLEANUP,
                               models.SpecialTask.Task.VERIFY,
                               models.SpecialTask.Task.RESET),
                     queue_entry__id=queue_entry.id,
                     is_complete=False)
             if special_tasks.count() == 0:
                 logging.error('Unrecovered Resetting host queue entry: %s. '
                               'Setting status to Queued.', str(queue_entry))
                 # Essentially this host queue entry was set to be Verifying
                 # however no special task exists for entry. This occurs if the
                 # scheduler dies between changing the status and creating the
                 # special task. By setting it to queued, the job can restart
                 # from the beginning and proceed correctly. This is much more
                 # preferable than having monitor_db not launching.
                 queue_entry.set_status('Queued')
	# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	#pylint: disable-msg=C0111

	import os
	import logging
	import time

	from autotest_lib.client.common_lib import global_config
	from autotest_lib.client.common_lib.cros.graphite import autotest_stats
	from autotest_lib.frontend.afe import models
	from autotest_lib.scheduler import email_manager
	from autotest_lib.scheduler import scheduler_config, scheduler_models


	# Override default parser with our site parser.
	def parser_path(install_dir):
	"""Return site implementation of parser.

	@param install_dir: installation directory.
	"""
	return os.path.join(install_dir, 'tko', 'site_parse')


	class SiteAgentTask(object):
	"""
	SiteAgentTask subclasses BaseAgentTask in monitor_db.
	"""


	def _archive_results(self, queue_entries):
	"""
	Set the status of queue_entries to ARCHIVING.

	This method sets the status of the queue_entries to ARCHIVING
	if the enable_archiving flag is true in global_config.ini.
	Otherwise, it bypasses the archiving step and sets the queue entries
	to the final status of current step.
	"""
	enable_archiving = global_config.global_config.get_config_value(
	scheduler_config.CONFIG_SECTION, 'enable_archiving', type=bool)
	# Set the status of the queue entries to archiving or self final status
	if enable_archiving:
	status = models.HostQueueEntry.Status.ARCHIVING
	else:
	status = self._final_status()

	for queue_entry in self.queue_entries:
	queue_entry.set_status(status)


	def _check_queue_entry_statuses(self, queue_entries, allowed_hqe_statuses,
	allowed_host_statuses=None):
	"""
	Forked from monitor_db.py
	"""
	class_name = self.__class__.__name__
	for entry in queue_entries:
	if entry.status not in allowed_hqe_statuses:
	# In the orignal code, here we raise an exception. In an
	# effort to prevent downtime we will instead abort the job and
	# send out an email notifying us this has occured.
	error_message = ('%s attempting to start entry with invalid '
	'status %s: %s. Aborting Job: %s.'
	% (class_name, entry.status, entry,
	entry.job))
	logging.error(error_message)
	email_manager.manager.enqueue_notify_email(
	'Job Aborted - Invalid Host Queue Entry Status',
	error_message)
	entry.job.request_abort()
	invalid_host_status = (
	allowed_host_statuses is not None
	and entry.host.status not in allowed_host_statuses)
	if invalid_host_status:
	# In the orignal code, here we raise an exception. In an
	# effort to prevent downtime we will instead abort the job and
	# send out an email notifying us this has occured.
	error_message = ('%s attempting to start on queue entry with '
	'invalid host status %s: %s. Aborting Job: %s'
	% (class_name, entry.host.status, entry,
	entry.job))
	logging.error(error_message)
	email_manager.manager.enqueue_notify_email(
	'Job Aborted - Invalid Host Status', error_message)
	entry.job.request_abort()


	class SiteDispatcher(object):
	"""
	SiteDispatcher subclasses BaseDispatcher in monitor_db.
	"""
	DEFAULT_REQUESTED_BY_USER_ID = 1


	_timer = autotest_stats.Timer('scheduler')
	_gauge = autotest_stats.Gauge('scheduler_rel')
	_tick_start = None


	@_timer.decorate
	def tick(self):
	self._tick_start = time.time()
	super(SiteDispatcher, self).tick()
	self._gauge.send('tick', time.time() - self._tick_start)

	@_timer.decorate
	def _garbage_collection(self):
	super(SiteDispatcher, self)._garbage_collection()
	if self._tick_start:
	self._gauge.send('_garbage_collection',
	time.time() - self._tick_start)

	@_timer.decorate
	def _run_cleanup(self):
	super(SiteDispatcher, self)._run_cleanup()
	if self._tick_start:
	self._gauge.send('_run_cleanup', time.time() - self._tick_start)

	@_timer.decorate
	def _find_aborting(self):
	super(SiteDispatcher, self)._find_aborting()
	if self._tick_start:
	self._gauge.send('_find_aborting', time.time() - self._tick_start)

	@_timer.decorate
	def _process_recurring_runs(self):
	super(SiteDispatcher, self)._process_recurring_runs()
	if self._tick_start:
	self._gauge.send('_process_recurring_runs',
	time.time() - self._tick_start)

	@_timer.decorate
	def _schedule_delay_tasks(self):
	super(SiteDispatcher, self)._schedule_delay_tasks()
	if self._tick_start:
	self._gauge.send('_schedule_delay_tasks',
	time.time() - self._tick_start)

	@_timer.decorate
	def _schedule_running_host_queue_entries(self):
	super(SiteDispatcher, self)._schedule_running_host_queue_entries()
	if self._tick_start:
	self._gauge.send('_schedule_running_host_queue_entries',
	time.time() - self._tick_start)

	@_timer.decorate
	def _schedule_special_tasks(self):
	super(SiteDispatcher, self)._schedule_special_tasks()
	if self._tick_start:
	self._gauge.send('_schedule_special_tasks',
	time.time() - self._tick_start)

	@_timer.decorate
	def _schedule_new_jobs(self):
	super(SiteDispatcher, self)._schedule_new_jobs()
	if self._tick_start:
	self._gauge.send('_schedule_new_jobs',
	time.time() - self._tick_start)


	@_timer.decorate
	def _handle_agents(self):
	super(SiteDispatcher, self)._handle_agents()
	if self._tick_start:
	self._gauge.send('_handle_agents', time.time() - self._tick_start)


	def _reverify_hosts_where(self, where,
	print_message='Reverifying host %s'):
	"""
	This is an altered version of _reverify_hosts_where the class to
	models.SpecialTask.objects.create passes in an argument for
	requested_by, in order to allow the Reset task to be created
	properly.
	"""
	full_where='locked = 0 AND invalid = 0 AND ' + where
	for host in scheduler_models.Host.fetch(where=full_where):
	if self.host_has_agent(host):
	# host has already been recovered in some way
	continue
	if self._host_has_scheduled_special_task(host):
	# host will have a special task scheduled on the next cycle
	continue
	if print_message:
	logging.error(print_message, host.hostname)
	try:
	user = models.User.objects.get(login='autotest_system')
	except models.User.DoesNotExist:
	user = models.User.objects.get(
	id=SiteDispatcher.DEFAULT_REQUESTED_BY_USER_ID)
	models.SpecialTask.objects.create(
	task=models.SpecialTask.Task.RESET,
	host=models.Host.objects.get(id=host.id),
	requested_by=user)


	def _check_for_unrecovered_verifying_entries(self):
	# Verify is replaced by Reset.
	queue_entries = scheduler_models.HostQueueEntry.fetch(
	where='status = "%s"' % models.HostQueueEntry.Status.RESETTING)
	for queue_entry in queue_entries:
	special_tasks = models.SpecialTask.objects.filter(
	task__in=(models.SpecialTask.Task.CLEANUP,
	models.SpecialTask.Task.VERIFY,
	models.SpecialTask.Task.RESET),
	queue_entry__id=queue_entry.id,
	is_complete=False)
	if special_tasks.count() == 0:
	logging.error('Unrecovered Resetting host queue entry: %s. '
	'Setting status to Queued.', str(queue_entry))
	# Essentially this host queue entry was set to be Verifying
	# however no special task exists for entry. This occurs if the
	# scheduler dies between changing the status and creating the
	# special task. By setting it to queued, the job can restart
	# from the beginning and proceed correctly. This is much more
	# preferable than having monitor_db not launching.
	queue_entry.set_status('Queued')