| # pylint: disable=missing-docstring |
| |
| """ This is the module for everything related to the AgentTask. |
| |
| The AgentTask imposes an interface through which the scheduler can monitor |
| a processes; Examples of such processes include Verify, Cleanup and the Queue |
| Tasks that run the tests. The scheduler itself only understands Agents. |
| Agents: |
| The Agent is the bridge between the scheduler and the AgentTask. The |
| schedulers tick has a method called handle_agents, which calls the |
| tick of each agent in the Dispatchers queue. This leads to the Agent |
| polling its AgentTask. The scheduler will keep polling a task through |
| the associated Agent till the Agent is removed from the dispatcher. |
| |
| At a high level: |
| agents finished = tasks done |
| agent polls till finished |
| task polls till done |
| task sets done |
| agent is removed from dispatcher |
| AgentTasks: |
| Basic AgentTasks are created when an hqe changes state. Examples of these |
| are the QueueTask, which is created when a hqe goes into the Starting state |
| and the FinalReparseTask, which is created when the hqe goes into parsing. |
| SpecialAgentTasks: |
| Unlike AgentTasks, SpecialAgentTasks are only created when a row is inserted |
| in the afe_special_tasks table. All PrejobTasks are SpecialAgentTasks. |
| |
| Monitor_db.get_agent_task_for_special_task/get_agent_task_for_queue_entry maps |
| an AgentTask to an Agent, which the scheduler understands. From this point |
| onward, the scheduler manages the task through the Agents interface,as follows: |
| At a high level: |
| task poll |
| start |
| prolog |
| tick till we get an exit code |
| finished(exit==0) |
| done=True |
| epilog |
| cleanup |
| set is_active, is_complete, success (checked in scheduler) |
| |
| The first special task for an HQE is usually Reset. |
| -poll: The first poll will start the task, polls thereafter will call the tasks |
| tick method. A started task will have the started bit set. |
| - start: Call prolog, run the process and set the start bit. |
| - prolog: Usually where one puts any model state changes that happen before |
| the actual task. Different per Task. Examples of things that might |
| happen in a prolog: |
| - state of Host, HQE (to something like Resetting) |
| - delete any unwanted queued special tasks |
| - register a pidfile |
| - set the is_active bit on the special task |
| - run: |
| - create a PidfileRunMonitor |
| - pass the autoserv command, working directory etc to drone manager. |
| This will start the actual autoserv process. |
| - set the start bit: so subsequent polls do not 'start' again |
| |
| - tick: For as long as a started tasks done bit is not set, a poll will lead |
| to a tick. The tick monitors the pid file of the autoserv process |
| running on the drone through the PidfileRunMonitor created in prolog. |
| If the autoserv process has finished we call finished with true/false |
| depending on autoserv exit code. |
| |
| - finished: sets the done and success values, then calls epilog. The |
| done bit is important because the Agent polls this bit to |
| measure the success or failure of its task. |
| |
| - epilog: Is generally where we set status of the Host/HQE again, |
| requeue any other task that needs to run after this one |
| and perform cleanup. Just like the prolog, this step is |
| different per task. |
| |
| - cleanup: Sets the is_active and is_complete and success |
| states on the tasks model. Also uses the |
| drone_manager to: |
| unregister the pidfile |
| copy results of the task |
| (Note this is not to be confused with the |
| special task called cleanup). |
| |
| The actions we take in the epilog are based on the |
| success/failure of the autoserv process set in cleanup, |
| eg: if reset failed we will enqueue a repair, but if all |
| is well the epilog will just return. Prejob task epilogs |
| also have an on_pending method that change the status of |
| the HQE to pending/starting, which gets picked up in the |
| scheduler. |
| By this point the is_done flag is set, which results in the Agent noticing that |
| the task has finished and unregistering it from the dispatcher.Class hierarchy: |
| AgentTask |
| |--->SpecialAgentTask (prejob_task.py) |
| |--->RepairTask |
| |--->PreJobTask |
| |--->Verify, Cleanup, Reset, Provision |
| |
| |--->AbstractQueueTask (monitor_db.py) |
| |--->QueueTask |
| |--->HostlessQueueTask |
| |
| |--->PostJobTask (postjob_task.py) |
| |--->GatherLogsTask |
| |--->SelfThrottledPostJobTask |
| |--->FinalReparseTask |
| |
| """ |
| |
| import logging |
| import os |
| import time |
| import urllib |
| |
| import common |
| |
| from autotest_lib.client.common_lib import global_config |
| from autotest_lib.client.common_lib import utils |
| from autotest_lib.frontend.afe import models |
| from autotest_lib.scheduler import drone_manager |
| from autotest_lib.scheduler import email_manager |
| from autotest_lib.scheduler import pidfile_monitor |
| from autotest_lib.scheduler import rdb_lib |
| from autotest_lib.scheduler import scheduler_lib |
| from autotest_lib.scheduler import scheduler_models |
| from autotest_lib.server import autoserv_utils |
| from autotest_lib.server import system_utils |
| |
| try: |
| from chromite.lib import metrics |
| except ImportError: |
| metrics = utils.metrics_mock |
| |
| |
| CONFIG = global_config.global_config |
| AUTOSERV_NICE_LEVEL = 10 |
| |
| ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value( |
| 'CROS', 'enable_drone_in_restricted_subnet', type=bool, |
| default=False) |
| |
| |
| class AgentTask(object): |
| class _NullMonitor(object): |
| pidfile_id = None |
| |
| def has_process(self): |
| return True |
| |
| |
| def __init__(self, log_file_name=None): |
| """ |
| @param log_file_name: (optional) name of file to log command output to |
| """ |
| self._drone_manager = drone_manager.instance() |
| self.done = False |
| self.started = False |
| self.success = None |
| self.aborted = False |
| self.monitor = None |
| self.queue_entry_ids = [] |
| self.host_ids = [] |
| # A map between host id and hostname. |
| self.hostnames = {} |
| self._log_file_name = log_file_name |
| |
| |
| def _set_ids(self, host=None, queue_entries=None): |
| if queue_entries and queue_entries != [None]: |
| self.host_ids = [] |
| self.queue_entry_ids = [] |
| self.hostnames = {} |
| for entry in queue_entries: |
| if entry.host is not None: |
| self.host_ids.append(entry.host.id) |
| self.queue_entry_ids.append(entry.id) |
| self.hostnames[entry.host.id] = entry.host.hostname |
| else: |
| logging.debug( |
| 'No host is found for host_queue_entry_id: %r', |
| entry.id) |
| raise scheduler_lib.NoHostIdError( |
| 'Failed to schedule a job whose ' |
| 'host_queue_entry_id=%r due to no host_id.' |
| % entry.id) |
| else: |
| assert host |
| self.host_ids = [host.id] |
| self.hostnames = {host.id: host.hostname} |
| |
| |
| def poll(self): |
| if not self.started: |
| self.start() |
| if not self.done: |
| self.tick() |
| |
| |
| def tick(self): |
| assert self.monitor |
| exit_code = self.monitor.exit_code() |
| if exit_code is None: |
| return |
| |
| success = (exit_code == 0) |
| self.finished(success) |
| |
| |
| def is_done(self): |
| return self.done |
| |
| |
| def finished(self, success): |
| if self.done: |
| assert self.started |
| return |
| self.started = True |
| self.done = True |
| self.success = success |
| self.epilog() |
| |
| |
| def prolog(self): |
| """ |
| To be overridden. |
| """ |
| assert not self.monitor |
| self.register_necessary_pidfiles() |
| |
| |
| def _log_file(self): |
| if not self._log_file_name: |
| return None |
| return os.path.join(self._working_directory(), self._log_file_name) |
| |
| |
| def cleanup(self): |
| log_file = self._log_file() |
| if self.monitor and log_file: |
| self.monitor.try_copy_to_results_repository(log_file) |
| |
| |
| def epilog(self): |
| """ |
| To be overridden. |
| """ |
| self.cleanup() |
| logging.info("%s finished with success=%s", type(self).__name__, |
| self.success) |
| |
| |
| def start(self): |
| if not self.started: |
| self.prolog() |
| self.run() |
| |
| self.started = True |
| |
| |
| def abort(self): |
| if self.monitor: |
| self.monitor.kill() |
| self.done = True |
| self.aborted = True |
| self.cleanup() |
| |
| |
| def _get_consistent_execution_path(self, execution_entries): |
| first_execution_path = execution_entries[0].execution_path() |
| for execution_entry in execution_entries[1:]: |
| assert execution_entry.execution_path() == first_execution_path, ( |
| '%s (%s) != %s (%s)' % (execution_entry.execution_path(), |
| execution_entry, |
| first_execution_path, |
| execution_entries[0])) |
| return first_execution_path |
| |
| |
| def _copy_results(self, execution_entries, use_monitor=None): |
| """ |
| @param execution_entries: list of objects with execution_path() method |
| """ |
| if use_monitor is not None and not use_monitor.has_process(): |
| return |
| |
| assert len(execution_entries) > 0 |
| if use_monitor is None: |
| assert self.monitor |
| use_monitor = self.monitor |
| assert use_monitor.has_process() |
| execution_path = self._get_consistent_execution_path(execution_entries) |
| results_path = execution_path + '/' |
| use_monitor.try_copy_to_results_repository(results_path) |
| |
| |
| def _parse_results(self, queue_entries): |
| for queue_entry in queue_entries: |
| queue_entry.set_status(models.HostQueueEntry.Status.PARSING) |
| |
| |
| def _command_line(self): |
| """ |
| Return the command line to run. Must be overridden. |
| """ |
| raise NotImplementedError |
| |
| |
| @property |
| def num_processes(self): |
| """ |
| Return the number of processes forked by this AgentTask's process. |
| It may only be approximate. To be overridden if necessary. |
| """ |
| return 1 |
| |
| |
| def _paired_with_monitor(self): |
| """ |
| If this AgentTask's process must run on the same machine as some |
| previous process, this method should be overridden to return a |
| PidfileRunMonitor for that process. |
| """ |
| return self._NullMonitor() |
| |
| |
| @property |
| def owner_username(self): |
| """ |
| Return login of user responsible for this task. May be None. Must be |
| overridden. |
| """ |
| raise NotImplementedError |
| |
| |
| def _working_directory(self): |
| """ |
| Return the directory where this AgentTask's process executes. |
| Must be overridden. |
| """ |
| raise NotImplementedError |
| |
| |
| def _pidfile_name(self): |
| """ |
| Return the name of the pidfile this AgentTask's process uses. To be |
| overridden if necessary. |
| """ |
| return drone_manager.AUTOSERV_PID_FILE |
| |
| |
| def _check_paired_results_exist(self): |
| if not self._paired_with_monitor().has_process(): |
| metrics.Counter( |
| 'chromeos/autotest/errors/scheduler/no_paired_results' |
| ).increment() |
| self.finished(False) |
| return False |
| return True |
| |
| |
| def _create_monitor(self): |
| assert not self.monitor |
| self.monitor = pidfile_monitor.PidfileRunMonitor() |
| |
| |
| def run(self): |
| if not self._check_paired_results_exist(): |
| return |
| |
| self._create_monitor() |
| self.monitor.run( |
| self._command_line(), self._working_directory(), |
| num_processes=self.num_processes, |
| nice_level=AUTOSERV_NICE_LEVEL, log_file=self._log_file(), |
| pidfile_name=self._pidfile_name(), |
| paired_with_pidfile=self._paired_with_monitor().pidfile_id, |
| username=self.owner_username, |
| drone_hostnames_allowed=self.get_drone_hostnames_allowed()) |
| |
| |
| def get_drone_hostnames_allowed( |
| self, restricted_subnets=utils.RESTRICTED_SUBNETS, |
| enable_drone_in_subnet=ENABLE_DRONE_IN_RESTRICTED_SUBNET): |
| filtered_drones = None |
| has_unrestricted_host = False |
| if (self.hostnames and restricted_subnets and enable_drone_in_subnet): |
| for hostname in self.hostnames.values(): |
| subnet = utils.get_restricted_subnet(hostname, |
| restricted_subnets) |
| |
| # Return an empty set if the list of hosts exists both in |
| # restricted and unrestricted subnet. No drone can work in such |
| # case. |
| if ((not subnet and filtered_drones is not None) or |
| (subnet and has_unrestricted_host)): |
| logging.error('The test has some DUT in restricted subnet, ' |
| 'but some in unrestricted subnet. Therefore, ' |
| 'no drone is available to run the test.') |
| return set() |
| |
| if not subnet: |
| has_unrestricted_host = True |
| continue |
| |
| server_ip_map=system_utils.DroneCache.get_drone_ip_map() |
| filtered_drones_for_host = set( |
| utils.get_servers_in_same_subnet( |
| subnet[0], subnet[1], |
| server_ip_map=server_ip_map)) |
| logging.info('DUT %s is in restricted subnet, drone can only ' |
| 'be chosen from %s', hostname, |
| filtered_drones_for_host) |
| if filtered_drones is None: |
| filtered_drones = filtered_drones_for_host |
| else: |
| filtered_drones = set.intersection( |
| filtered_drones, filtered_drones_for_host) |
| |
| # If filtered_drones is an empty set, that means no drone is |
| # allowed to run the task. This is different fron None, which |
| # means all drones are allowed. |
| if filtered_drones == set(): |
| logging.error('DUT(s) is in restricted subnet, but no ' |
| 'drone is available to run the test.') |
| return filtered_drones |
| |
| # If host is not in restricted subnet, use the unrestricted drones only. |
| if (filtered_drones is None and restricted_subnets and |
| enable_drone_in_subnet): |
| filtered_drones = set( |
| system_utils.DroneCache.get_unrestricted_drones( |
| restricted_subnets=restricted_subnets)) |
| |
| if not models.DroneSet.drone_sets_enabled(): |
| return filtered_drones |
| |
| hqes = models.HostQueueEntry.objects.filter(id__in=self.queue_entry_ids) |
| if not hqes: |
| # Only special tasks could be missing host queue entries |
| assert isinstance(self, SpecialAgentTask) |
| return self._user_or_global_default_drone_set( |
| self.task, self.task.requested_by) |
| |
| job_ids = hqes.values_list('job', flat=True).distinct() |
| assert job_ids.count() == 1, ("AgentTask's queue entries " |
| "span multiple jobs") |
| |
| job = models.Job.objects.get(id=job_ids[0]) |
| drone_set = job.drone_set |
| if not drone_set: |
| return self._user_or_global_default_drone_set(job, job.user()) |
| |
| if filtered_drones: |
| return set.intersection(filtered_drones, |
| drone_set.get_drone_hostnames()) |
| else: |
| return drone_set.get_drone_hostnames() |
| |
| |
| def _user_or_global_default_drone_set(self, obj_with_owner, user): |
| """ |
| Returns the user's default drone set, if present. |
| |
| Otherwise, returns the global default drone set. |
| """ |
| default_hostnames = models.DroneSet.get_default().get_drone_hostnames() |
| if not user: |
| logging.warning('%s had no owner; using default drone set', |
| obj_with_owner) |
| return default_hostnames |
| if not user.drone_set: |
| logging.warning('User %s has no default drone set, using global ' |
| 'default', user.login) |
| return default_hostnames |
| return user.drone_set.get_drone_hostnames() |
| |
| |
| def register_necessary_pidfiles(self): |
| pidfile_id = self._drone_manager.get_pidfile_id_from( |
| self._working_directory(), self._pidfile_name()) |
| self._drone_manager.register_pidfile(pidfile_id) |
| |
| paired_pidfile_id = self._paired_with_monitor().pidfile_id |
| if paired_pidfile_id: |
| self._drone_manager.register_pidfile(paired_pidfile_id) |
| |
| |
| def recover(self): |
| if not self._check_paired_results_exist(): |
| return |
| |
| self._create_monitor() |
| self.monitor.attach_to_existing_process( |
| self._working_directory(), pidfile_name=self._pidfile_name(), |
| num_processes=self.num_processes) |
| if not self.monitor.has_process(): |
| # no process to recover; wait to be started normally |
| self.monitor = None |
| return |
| |
| self.started = True |
| logging.info('Recovering process %s for %s at %s', |
| self.monitor.get_process(), type(self).__name__, |
| self._working_directory()) |
| |
| |
| def _check_queue_entry_statuses(self, queue_entries, allowed_hqe_statuses, |
| allowed_host_statuses=None): |
| class_name = self.__class__.__name__ |
| for entry in queue_entries: |
| if entry.status not in allowed_hqe_statuses: |
| # In the orignal code, here we raise an exception. In an |
| # effort to prevent downtime we will instead abort the job and |
| # send out an email notifying us this has occured. |
| error_message = ('%s attempting to start entry with invalid ' |
| 'status %s: %s. Aborting Job: %s.' |
| % (class_name, entry.status, entry, |
| entry.job)) |
| logging.error(error_message) |
| email_manager.manager.enqueue_notify_email( |
| 'Job Aborted - Invalid Host Queue Entry Status', |
| error_message) |
| entry.job.request_abort() |
| invalid_host_status = ( |
| allowed_host_statuses is not None |
| and entry.host.status not in allowed_host_statuses) |
| if invalid_host_status: |
| # In the orignal code, here we raise an exception. In an |
| # effort to prevent downtime we will instead abort the job and |
| # send out an email notifying us this has occured. |
| error_message = ('%s attempting to start on queue entry with ' |
| 'invalid host status %s: %s. Aborting Job: %s' |
| % (class_name, entry.host.status, entry, |
| entry.job)) |
| logging.error(error_message) |
| email_manager.manager.enqueue_notify_email( |
| 'Job Aborted - Invalid Host Status', error_message) |
| entry.job.request_abort() |
| |
| |
| class TaskWithJobKeyvals(object): |
| """AgentTask mixin providing functionality to help with job keyval files.""" |
| _KEYVAL_FILE = 'keyval' |
| def _format_keyval(self, key, value): |
| return '%s=%s' % (key, value) |
| |
| |
| def _keyval_path(self): |
| """Subclasses must override this""" |
| raise NotImplementedError |
| |
| |
| def _write_keyval_after_job(self, field, value): |
| assert self.monitor |
| if not self.monitor.has_process(): |
| return |
| self._drone_manager.write_lines_to_file( |
| self._keyval_path(), [self._format_keyval(field, value)], |
| paired_with_process=self.monitor.get_process()) |
| |
| |
| def _job_queued_keyval(self, job): |
| return 'job_queued', int(time.mktime(job.created_on.timetuple())) |
| |
| |
| def _write_job_finished(self): |
| self._write_keyval_after_job("job_finished", int(time.time())) |
| |
| |
| def _write_keyvals_before_job_helper(self, keyval_dict, keyval_path): |
| keyval_contents = '\n'.join(self._format_keyval(key, value) |
| for key, value in keyval_dict.iteritems()) |
| # always end with a newline to allow additional keyvals to be written |
| keyval_contents += '\n' |
| self._drone_manager.attach_file_to_execution(self._working_directory(), |
| keyval_contents, |
| file_path=keyval_path) |
| |
| |
| def _write_keyvals_before_job(self, keyval_dict): |
| self._write_keyvals_before_job_helper(keyval_dict, self._keyval_path()) |
| |
| |
| def _write_host_keyvals(self, host): |
| keyval_path = os.path.join(self._working_directory(), 'host_keyvals', |
| host.hostname) |
| platform, all_labels = host.platform_and_labels() |
| all_labels = [ urllib.quote(label) for label in all_labels ] |
| keyval_dict = dict(platform=platform, labels=','.join(all_labels)) |
| self._write_keyvals_before_job_helper(keyval_dict, keyval_path) |
| |
| |
| class SpecialAgentTask(AgentTask, TaskWithJobKeyvals): |
| """ |
| Subclass for AgentTasks that correspond to a SpecialTask entry in the DB. |
| """ |
| |
| TASK_TYPE = None |
| host = None |
| queue_entry = None |
| _COUNT_METRIC = 'chromeos/autotest/scheduler/special_task_count' |
| _DUT_METRIC = 'chromeos/autotest/scheduler/special_task_by_dut' |
| _DURATION_METRIC = 'chromeos/autotest/scheduler/special_task_durations' |
| |
| |
| def __init__(self, task, extra_command_args): |
| super(SpecialAgentTask, self).__init__() |
| |
| assert self.TASK_TYPE is not None, 'self.TASK_TYPE must be overridden' |
| |
| self.host = rdb_lib.get_hosts([task.host.id])[0] |
| self.host.dbg_str = 'Task: %s' % str(task) |
| self.queue_entry = None |
| if task.queue_entry: |
| self.queue_entry = scheduler_models.HostQueueEntry( |
| id=task.queue_entry.id) |
| self.host.dbg_str += self.queue_entry.get_dbg_str() |
| |
| # This is of type SpecialTask (as defined in frontend/afe/models.py) |
| self.task = task |
| self._extra_command_args = extra_command_args |
| self.host.metadata = self.get_metadata() |
| self._milestone = '' |
| |
| |
| def get_metadata(self): |
| """Get a dictionary that contains task information. |
| |
| The return value is a dictionary that includes task information like id, |
| name and related job information. The value will be stored in metadata |
| database. |
| @return: A dictionary containing the task id, name and related job id. |
| If some attributes are failed to be accessed, an empty |
| dictionary will be returned, and error will be logged. |
| """ |
| try: |
| metadata = {'task_id':self.task.id, 'task_name':self.task.task, |
| 'hostname':self.task.host.hostname} |
| if self.task.queue_entry: |
| job = self.task.queue_entry.job |
| metadata.update( |
| scheduler_models.get_job_metadata(job)) |
| return metadata |
| except AttributeError as e: |
| logging.error('Task has missing attribute: %s', e) |
| return {} |
| |
| |
| def _keyval_path(self): |
| return os.path.join(self._working_directory(), self._KEYVAL_FILE) |
| |
| |
| def _command_line(self): |
| return autoserv_utils.autoserv_run_job_command( |
| autoserv_utils.autoserv_directory, |
| self.host.hostname, |
| results_directory=drone_manager.WORKING_DIRECTORY, |
| extra_args=self._extra_command_args, |
| queue_entry=self.queue_entry, |
| in_lab=True, |
| ) |
| |
| |
| def _working_directory(self): |
| return self.task.execution_path() |
| |
| |
| @property |
| def owner_username(self): |
| if self.task.requested_by: |
| return self.task.requested_by.login |
| return None |
| |
| |
| def prolog(self): |
| super(SpecialAgentTask, self).prolog() |
| self.task.activate() |
| self._write_host_keyvals(self.host) |
| |
| |
| def _fail_queue_entry(self): |
| assert self.queue_entry |
| |
| if self.queue_entry.meta_host: |
| return # don't fail metahost entries, they'll be reassigned |
| |
| self.queue_entry.update_from_database() |
| if self.queue_entry.status != models.HostQueueEntry.Status.QUEUED: |
| return # entry has been aborted |
| |
| self._actually_fail_queue_entry() |
| |
| |
| def epilog(self): |
| super(SpecialAgentTask, self).epilog() |
| self._emit_special_task_status_metric() |
| |
| |
| def _emit_special_task_status_metric(self): |
| """Increments an accumulator associated with this special task.""" |
| fields = {'type': self.TASK_TYPE, |
| 'success': bool(self.success), |
| 'board': str(self.host.board), |
| 'milestone': self._milestone} |
| metrics.Counter(self._COUNT_METRIC).increment( |
| fields=fields) |
| |
| if (self.task.time_finished and self.task.time_started): |
| duration = (self.task.time_finished - |
| self.task.time_started).total_seconds() |
| metrics.SecondsDistribution(self._DURATION_METRIC).add( |
| duration, fields=fields) |
| |
| dut_fields = { |
| 'type': self.TASK_TYPE, |
| 'success': bool(self.success), |
| 'board': str(self.host.board), |
| 'dut_host_name': self.host.hostname |
| } |
| metrics.Counter(self._DUT_METRIC).increment(fields=dut_fields) |
| |
| # TODO(milleral): http://crbug.com/268607 |
| # All this used to be a part of _fail_queue_entry. The |
| # exact semantics of when one should and should not be failing a queue |
| # entry need to be worked out, because provisioning has placed us in a |
| # case where we want to fail a queue entry that could be requeued, |
| # which makes us fail the two above if statements, and thus |
| # _fail_queue_entry() would exit early and have no effect. |
| # What's left here with _actually_fail_queue_entry is a hack to be able to |
| # bypass the checks and unconditionally execute the code. |
| def _actually_fail_queue_entry(self): |
| self.queue_entry.set_execution_subdir() |
| queued_key, queued_time = self._job_queued_keyval( |
| self.queue_entry.job) |
| self._write_keyval_after_job(queued_key, queued_time) |
| self._write_job_finished() |
| |
| # copy results logs into the normal place for job results |
| self.monitor.try_copy_results_on_drone( |
| source_path=self._working_directory() + '/', |
| destination_path=self.queue_entry.execution_path() + '/') |
| |
| pidfile_id = self._drone_manager.get_pidfile_id_from( |
| self.queue_entry.execution_path(), |
| pidfile_name=drone_manager.AUTOSERV_PID_FILE) |
| self._drone_manager.register_pidfile(pidfile_id) |
| |
| # TODO(ayatane): This should obey self.queue_entry.job.parse_failed_repair |
| # But nothing sets self.queue_entry.job.parse_failed_repair? |
| # Check Git blame |
| self._parse_results([self.queue_entry]) |
| |
| # Also fail all other special tasks that have not yet run for this HQE |
| pending_tasks = models.SpecialTask.objects.filter( |
| queue_entry__id=self.queue_entry.id, |
| is_complete=0) |
| for task in pending_tasks: |
| task.finish(False) |
| |
| |
| def cleanup(self): |
| super(SpecialAgentTask, self).cleanup() |
| |
| # We will consider an aborted task to be "Failed" |
| self.task.finish(bool(self.success)) |
| |
| if self.monitor: |
| if self.monitor.has_process(): |
| self._copy_results([self.task]) |
| if self.monitor.pidfile_id is not None: |
| self._drone_manager.unregister_pidfile(self.monitor.pidfile_id) |
| |
| |
| def remove_special_tasks(self, special_task_to_remove, keep_last_one=False): |
| """Remove a type of special task in all tasks, keep last one if needed. |
| |
| @param special_task_to_remove: type of special task to be removed, e.g., |
| models.SpecialTask.Task.VERIFY. |
| @param keep_last_one: True to keep the last special task if its type is |
| the same as of special_task_to_remove. |
| |
| """ |
| queued_special_tasks = models.SpecialTask.objects.filter( |
| host__id=self.host.id, |
| task=special_task_to_remove, |
| is_active=False, is_complete=False, queue_entry=None) |
| if keep_last_one: |
| queued_special_tasks = queued_special_tasks.exclude(id=self.task.id) |
| queued_special_tasks.delete() |
| |
| |
| def _generate_autoserv_label_args(self, task): |
| """ |
| @param task: An instance of afe model's SpecialTask. |
| @returns: The list of arguments to pass to autoserv to tell it what the |
| labels of a job are. |
| |
| """ |
| labels = {x.name for x in task.queue_entry.job.labels} |
| return ['--job-labels', ','.join(labels)] |