| # Copyright (c) 2014 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| # This file contains utility functions for host_history. |
| |
| import collections |
| import copy |
| |
| import common |
| from autotest_lib.client.common_lib import time_utils |
| from autotest_lib.client.common_lib.cros.graphite import es_utils |
| from autotest_lib.frontend import setup_django_environment |
| from autotest_lib.frontend.afe import models |
| from autotest_lib.site_utils import job_history |
| |
| def prepopulate_dict(keys, value, extras=None): |
| """Creates a dictionary with val=value for each key. |
| |
| @param keys: list of keys |
| @param value: the value of each entry in the dict. |
| @param extras: list of additional keys |
| @returns: dictionary |
| """ |
| result = collections.OrderedDict() |
| extra_keys = tuple(extras if extras else []) |
| for key in keys + extra_keys: |
| result[key] = value |
| return result |
| |
| |
| def lock_history_to_intervals(initial_lock_val, t_start, t_end, lock_history): |
| """Converts lock history into a list of intervals of locked times. |
| |
| @param initial_lock_val: Initial value of the lock (False or True) |
| @param t_start: beginning of the time period we are interested in. |
| @param t_end: end of the time period we are interested in. |
| @param lock_history: Result of querying es for locks (dict) |
| This dictionary should contain keys 'locked' and 'time_recorded' |
| @returns: Returns a list of tuples where the elements of each tuples |
| represent beginning and end of intervals of locked, respectively. |
| """ |
| locked_intervals = [] |
| t_prev = t_start |
| state_prev = initial_lock_val |
| for entry in lock_history['hits']['hits']: |
| t_curr = entry['fields']['time_recorded'][0] |
| |
| #If it is locked, then we put into locked_intervals |
| if state_prev: |
| locked_intervals.append((t_prev, t_curr)) |
| |
| # update vars |
| t_prev = t_curr |
| state_prev = entry['fields']['locked'][0] |
| if state_prev: |
| locked_intervals.append((t_prev, t_end)) |
| return locked_intervals |
| |
| |
| def find_most_recent_entry_before(t, type_str, hostname, fields, index): |
| """Returns the fields of the most recent entry before t. |
| |
| @param t: time we are interested in. |
| @param type_str: _type in esdb, such as 'host_history' (string) |
| @param hostname: hostname of DUT (string) |
| @param fields: list of fields we are interested in |
| @param index: index in elasticsearch to query data for. |
| @returns: time, field_value of the latest entry. |
| """ |
| query = es_utils.create_range_eq_query_multiple( |
| fields_returned=fields, |
| equality_constraints=[('_type', type_str), |
| ('hostname', hostname)], |
| range_constraints=[('time_recorded', None, t)], |
| size=1, |
| sort_specs=[{'time_recorded': 'desc'}]) |
| result = es_utils.execute_query( |
| query, index, |
| es_utils.METADATA_ES_SERVER, es_utils.ES_PORT) |
| if result['hits']['total'] > 0: |
| # If fields are not specified, the query returns all data for the |
| # record under key "_source" |
| key = 'fields' if fields else '_source' |
| return es_utils.convert_hit(result['hits']['hits'][0][key]) |
| return {} |
| |
| |
| def host_history_intervals(t_start, t_end, hostname, size, index): |
| """Gets stats for a host. |
| |
| @param t_start: beginning of time period we are interested in. |
| @param t_end: end of time period we are interested in. |
| @param hostname: hostname for the host we are interested in (string) |
| @param size: maximum number of entries returned per query |
| @param index: index in elasticsearch to query data for. |
| @returns: dictionary, num_entries_found |
| dictionary of status: time spent in that status |
| num_entries_found: number of host history entries |
| found in [t_start, t_end] |
| |
| """ |
| lock_history_recent = find_most_recent_entry_before( |
| t=t_start, type_str='lock_history', hostname=hostname, |
| fields=['time_recorded', 'locked'], index=index) |
| # I use [0] and [None] because lock_history_recent's type is list. |
| t_lock = lock_history_recent.get('time_recorded', None) |
| t_lock_val = lock_history_recent.get('locked', None) |
| host_history_recent = find_most_recent_entry_before( |
| t=t_start, type_str='host_history', hostname=hostname, |
| fields=None, index=index) |
| t_host = host_history_recent.get('time_recorded', None) |
| t_host_stat = host_history_recent.get('status', None) |
| t_metadata = es_utils.get_metadata(host_history_recent, |
| ['time_recorded', 'status']) |
| |
| status_first = t_host_stat if t_host else 'Ready' |
| t = min([t for t in [t_lock, t_host, t_start] if t]) |
| |
| query_lock_history = es_utils.create_range_eq_query_multiple( |
| fields_returned=['locked', 'time_recorded'], |
| equality_constraints=[('_type', 'lock_history'), |
| ('hostname', hostname)], |
| range_constraints=[('time_recorded', t, t_end)], |
| size=size, |
| sort_specs=[{'time_recorded': 'asc'}]) |
| |
| lock_history_entries = es_utils.execute_query( |
| query_lock_history, index, |
| es_utils.METADATA_ES_SERVER, es_utils.ES_PORT) |
| |
| locked_intervals = lock_history_to_intervals(t_lock_val, t, t_end, |
| lock_history_entries) |
| query_host_history = es_utils.create_range_eq_query_multiple( |
| fields_returned=None, |
| equality_constraints=[("_type", "host_history"), |
| ("hostname", hostname)], |
| range_constraints=[("time_recorded", t_start, t_end)], |
| size=size, |
| sort_specs=[{"time_recorded": "asc"}]) |
| host_history_entries = es_utils.execute_query( |
| query_host_history, index, |
| es_utils.METADATA_ES_SERVER, es_utils.ES_PORT) |
| num_entries_found = host_history_entries['hits']['total'] |
| t_prev = t_start |
| status_prev = status_first |
| metadata_prev = t_metadata |
| intervals_of_statuses = collections.OrderedDict() |
| |
| for entry in host_history_entries['hits']['hits']: |
| t_curr = entry['_source']['time_recorded'] |
| status_curr = entry['_source']['status'] |
| metadata = es_utils.get_metadata(entry['_source'], |
| ['time_recorded', 'status']) |
| intervals_of_statuses.update(calculate_status_times( |
| t_prev, t_curr, status_prev, metadata_prev, locked_intervals)) |
| # Update vars |
| t_prev = t_curr |
| status_prev = status_curr |
| metadata_prev = metadata |
| |
| # Do final as well. |
| intervals_of_statuses.update(calculate_status_times( |
| t_prev, t_end, status_prev, metadata_prev, locked_intervals)) |
| return intervals_of_statuses, num_entries_found |
| |
| |
| def calculate_total_times(intervals_of_statuses): |
| """Calculates total times in each status. |
| |
| @param intervals_of_statuses: ordereddict where key=(ti, tf) and val=status |
| @returns: dictionary where key=status value=time spent in that status |
| """ |
| total_times = prepopulate_dict(models.Host.Status.names, 0.0, |
| extras=['Locked']) |
| for key, status_info in intervals_of_statuses.iteritems(): |
| ti, tf = key |
| total_times[status_info['status']] += tf - ti |
| return total_times |
| |
| |
| def aggregate_hosts(intervals_of_statuses_list): |
| """Aggregates history of multiple hosts |
| |
| @param intervals_of_statuses_list: A list of dictionaries where keys |
| are tuple (ti, tf), and value is the status along with other metadata. |
| @returns: A dictionary where keys are strings, e.g. 'status' and |
| value is total time spent in that status among all hosts. |
| """ |
| stats_all = prepopulate_dict(models.Host.Status.names, 0.0, |
| extras=['Locked']) |
| num_hosts = len(intervals_of_statuses_list) |
| for intervals_of_statuses in intervals_of_statuses_list: |
| total_times = calculate_total_times(intervals_of_statuses) |
| for status, delta in total_times.iteritems(): |
| stats_all[status] += delta |
| return stats_all, num_hosts |
| |
| |
| def get_stats_string_aggregate(labels, t_start, t_end, aggregated_stats, |
| num_hosts): |
| """Returns string reporting overall host history for a group of hosts. |
| |
| @param labels: A list of labels useful for describing the group |
| of hosts these overall stats represent. |
| @param t_start: beginning of time period we are interested in. |
| @param t_end: end of time period we are interested in. |
| @param aggregated_stats: A dictionary where keys are string, e.g. 'status' |
| value is total time spent in that status among all hosts. |
| @returns: string representing the aggregate stats report. |
| """ |
| result = 'Overall stats for hosts: %s \n' % (', '.join(labels)) |
| result += ' %s - %s \n' % (time_utils.epoch_time_to_date_string(t_start), |
| time_utils.epoch_time_to_date_string(t_end)) |
| result += ' Number of total hosts: %s \n' % (num_hosts) |
| # This is multiplied by time_spent to get percentage_spent |
| multiplication_factor = 100.0 / ((t_end - t_start) * num_hosts) |
| for status, time_spent in aggregated_stats.iteritems(): |
| # Normalize by the total time we are interested in among ALL hosts. |
| spaces = ' ' * (15 - len(status)) |
| percent_spent = multiplication_factor * time_spent |
| result += ' %s: %s %.2f %%\n' % (status, spaces, percent_spent) |
| result += '- -- --- ---- ----- ---- --- -- -\n' |
| return result |
| |
| |
| def get_overall_report(label, t_start, t_end, intervals_of_statuses_list): |
| """Returns string reporting overall host history for a group of hosts. |
| |
| @param label: A string that can be useful for showing what type group |
| of hosts these overall stats represent. |
| @param t_start: beginning of time period we are interested in. |
| @param t_end: end of time period we are interested in. |
| @param intervals_of_statuses_list: A list of dictionaries where keys |
| are tuple (ti, tf), and value is the status along with other metadata, |
| e.g., task_id, task_name, job_id etc. |
| """ |
| stats_all, num_hosts = aggregate_hosts( |
| intervals_of_statuses_list) |
| return get_stats_string_aggregate( |
| label, t_start, t_end, stats_all, num_hosts) |
| |
| |
| def get_report_for_host(t_start, t_end, hostname, size, |
| print_each_interval, index): |
| """Gets stats report for a host |
| |
| @param t_start: beginning of time period we are interested in. |
| @param t_end: end of time period we are interested in. |
| @param hostname: hostname for the host we are interested in (string) |
| @param print_each_interval: True or False, whether we want to |
| display all intervals |
| @param index: index in elasticsearch to query data for. |
| @returns: stats report for this particular host (string) |
| """ |
| intervals_of_statuses, num_entries_found = host_history_intervals( |
| t_start, t_end, hostname, size, index) |
| total_times = calculate_total_times(intervals_of_statuses) |
| return (get_stats_string( |
| t_start, t_end, total_times, intervals_of_statuses, |
| hostname, num_entries_found, print_each_interval), |
| intervals_of_statuses) |
| |
| |
| def get_stats_string(t_start, t_end, total_times, intervals_of_statuses, |
| hostname, num_entries_found, print_each_interval): |
| """Returns string reporting host_history for this host. |
| @param t_start: beginning of time period we are interested in. |
| @param t_end: end of time period we are interested in. |
| @param total_times: dictionary where key=status, |
| value=(time spent in that status) |
| @param intervals_of_statuses: dictionary where keys is tuple (ti, tf), |
| and value is the status along with other metadata. |
| @param hostname: hostname for the host we are interested in (string) |
| @param num_entries_found: Number of entries found for the host in es |
| @param print_each_interval: boolean, whether to print each interval |
| """ |
| delta = t_end - t_start |
| result = 'usage stats for host: %s \n' % (hostname) |
| result += ' %s - %s \n' % (time_utils.epoch_time_to_date_string(t_start), |
| time_utils.epoch_time_to_date_string(t_end)) |
| result += ' Num entries found in this interval: %s\n' % (num_entries_found) |
| for status, value in total_times.iteritems(): |
| spaces = (15 - len(status)) * ' ' |
| result += ' %s: %s %.2f %%\n' % (status, spaces, 100*value/delta) |
| result += '- -- --- ---- ----- ---- --- -- -\n' |
| if print_each_interval: |
| for interval, status_info in intervals_of_statuses.iteritems(): |
| t0, t1 = interval |
| t0_string = time_utils.epoch_time_to_date_string(t0) |
| t1_string = time_utils.epoch_time_to_date_string(t1) |
| status = status_info['status'] |
| spaces = (15 - len(status)) * ' ' |
| delta = int(t1-t0) |
| result += ' %s : %s %s %s %ss\n' % (t0_string, t1_string, |
| status_info['status'], |
| spaces, |
| delta, |
| ) |
| return result |
| |
| |
| def calculate_status_times(t_start, t_end, int_status, metadata, |
| locked_intervals): |
| """Returns a list of intervals along w/ statuses associated with them. |
| |
| @param t_start: start time |
| @param t_end: end time |
| @param int_status: status of [t_start, t_end] if not locked |
| @param metadata: metadata of the status change, e.g., task_id, task_name. |
| @param locked_intervals: list of tuples denoting intervals of locked states |
| @returns: dictionary where key = (t_interval_start, t_interval_end), |
| val = (status, metadata) |
| t_interval_start: beginning of interval for that status |
| t_interval_end: end of the interval for that status |
| status: string such as 'Repair Failed', 'Locked', etc. |
| metadata: A dictionary of metadata, e.g., |
| {'task_id':123, 'task_name':'Reset'} |
| """ |
| statuses = collections.OrderedDict() |
| |
| prev_interval_end = t_start |
| |
| # TODO: Put allow more information here in info/locked status |
| status_info = {'status': int_status, |
| 'metadata': metadata} |
| locked_info = {'status': 'Locked', |
| 'metadata': {}} |
| if not locked_intervals: |
| statuses[(t_start, t_end)] = status_info |
| return statuses |
| for lock_start, lock_end in locked_intervals: |
| if lock_start > t_end: |
| # optimization to break early |
| # case 0 |
| # Timeline of status change: t_start t_end |
| # Timeline of lock action: lock_start lock_end |
| break |
| elif lock_end < t_start: |
| # case 1 |
| # t_start t_end |
| # lock_start lock_end |
| continue |
| elif lock_end < t_end and lock_start > t_start: |
| # case 2 |
| # t_start t_end |
| # lock_start lock_end |
| statuses[(prev_interval_end, lock_start)] = status_info |
| statuses[(lock_start, lock_end)] = locked_info |
| elif lock_end > t_start and lock_start < t_start: |
| # case 3 |
| # t_start t_end |
| # lock_start lock_end |
| statuses[(t_start, lock_end)] = locked_info |
| elif lock_start < t_end and lock_end > t_end: |
| # case 4 |
| # t_start t_end |
| # lock_start lock_end |
| statuses[(prev_interval_end, lock_start)] = status_info |
| statuses[(lock_start, t_end)] = locked_info |
| prev_interval_end = lock_end |
| # Otherwise we are in the case where lock_end < t_start OR |
| # lock_start > t_end, which means the lock doesn't apply. |
| if t_end > prev_interval_end: |
| # This is to avoid logging the same time |
| statuses[(prev_interval_end, t_end)] = status_info |
| return statuses |
| |
| |
| def get_log_url(hostname, metadata): |
| """Compile a url to job's debug log from debug string. |
| |
| @param hostname: Hostname of the dut. |
| @param metadata: A dictionary of other metadata, e.g., |
| {'task_id':123, 'task_name':'Reset'} |
| @return: Url of the debug log for special task or job url for test job. |
| """ |
| log_url = None |
| if 'task_id' in metadata and 'task_name' in metadata: |
| log_url = job_history.TASK_URL % {'hostname': hostname, |
| 'task_id': metadata['task_id'], |
| 'task_name': metadata['task_name']} |
| elif 'job_id' in metadata and 'owner' in metadata: |
| log_url = job_history.JOB_URL % {'hostname': hostname, |
| 'job_id': metadata['job_id'], |
| 'owner': metadata['owner']} |
| |
| return log_url |
| |
| |
| def build_history(hostname, status_intervals): |
| """Get host history information from given state intervals. |
| |
| @param hostname: Hostname of the dut. |
| @param status_intervals: A ordered dictionary with |
| key as (t_start, t_end) and value as (status, metadata) |
| status = status of the host. e.g. 'Repair Failed' |
| t_start is the beginning of the interval where the DUT's has |
| that status |
| t_end is the end of the interval where the DUT has that |
| status |
| metadata: A dictionary of other metadata, e.g., |
| {'task_id':123, 'task_name':'Reset'} |
| @return: A list of host history, e.g., |
| [{'status': 'Resetting' |
| 'start_time': '2014-08-07 10:02:16', |
| 'end_time': '2014-08-07 10:03:16', |
| 'log_url': 'http://autotest/reset-546546/debug', |
| 'task_id': 546546}, |
| {'status': 'Running' |
| 'start_time': '2014-08-07 10:03:18', |
| 'end_time': '2014-08-07 10:13:00', |
| 'log_url': 'http://autotest/afe/#tab_id=view_job&object_id=1683', |
| 'job_id': 1683} |
| ] |
| """ |
| history = [] |
| for time_interval, status_info in status_intervals.items(): |
| start_time = time_utils.epoch_time_to_date_string(time_interval[0]) |
| end_time = time_utils.epoch_time_to_date_string(time_interval[1]) |
| interval = {'status': status_info['status'], |
| 'start_time': start_time, |
| 'end_time': end_time} |
| interval['log_url'] = get_log_url(hostname, status_info['metadata']) |
| interval.update(status_info['metadata']) |
| history.append(interval) |
| return history |
| |
| |
| def get_status_intervals(history_details): |
| """Get a list of status interval from history details. |
| |
| This is a reverse method of above build_history. Caller gets the history |
| details from RPC get_host_history, and use this method to get the list of |
| status interval, which can be used to calculate stats from |
| host_history_utils.aggregate_hosts. |
| |
| @param history_details: A dictionary of host history for each host, e.g., |
| {'172.22.33.51': [{'status': 'Resetting' |
| 'start_time': '2014-08-07 10:02:16', |
| 'end_time': '2014-08-07 10:03:16', |
| 'log_url': 'http://autotest/reset-546546/debug', |
| 'task_id': 546546},] |
| } |
| @return: A list of dictionaries where keys are tuple (start_time, end_time), |
| and value is a dictionary containing at least key 'status'. |
| """ |
| status_intervals = [] |
| for host,history in history_details.iteritems(): |
| intervals = collections.OrderedDict() |
| for interval in history: |
| start_time = time_utils.to_epoch_time(interval['start_time']) |
| end_time = time_utils.to_epoch_time(interval['end_time']) |
| metadata = copy.deepcopy(interval) |
| metadata['hostname'] = host |
| intervals[(start_time, end_time)] = {'status': interval['status'], |
| 'metadata': metadata} |
| status_intervals.append(intervals) |
| return status_intervals |
| |
| |
| def get_machine_utilization_rate(stats): |
| """Get machine utilization rate from given stats. |
| |
| @param stats: A dictionary with a status as key and value is the total |
| number of seconds spent on the status. |
| @return: The percentage of time when dut is running test jobs. |
| """ |
| not_utilized_status = ['Repairing', 'Repair Failed', 'Ready', 'Verifying'] |
| excluded_status = ['Locked'] |
| total_time = 0 |
| total_time_not_utilized = 0.0 |
| for status, interval in stats.iteritems(): |
| if status in excluded_status: |
| continue |
| total_time += interval |
| if status in not_utilized_status: |
| total_time_not_utilized += interval |
| if total_time == 0: |
| # All duts are locked, assume MUR is 0% |
| return 0 |
| else: |
| return 1 - total_time_not_utilized/total_time |
| |
| |
| def get_machine_availability_rate(stats): |
| """Get machine availability rate from given stats. |
| |
| @param stats: A dictionary with a status as key and value is the total |
| number of seconds spent on the status. |
| @return: The percentage of time when dut is available to run jobs. |
| """ |
| not_available_status = ['Repairing', 'Repair Failed', 'Verifying'] |
| excluded_status = ['Locked'] |
| total_time = 0 |
| total_time_not_available = 0.0 |
| for status, interval in stats.iteritems(): |
| if status in excluded_status: |
| continue |
| total_time += interval |
| if status in not_available_status: |
| total_time_not_available += interval |
| if total_time == 0: |
| # All duts are locked, assume MAR is 0% |
| return 0 |
| else: |
| return 1 - total_time_not_available/total_time |