| #!/usr/bin/python2 |
| |
| # Copyright 2016 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Selects the optimal set of benchmarks. |
| |
| For each benchmark, there is a file with the common functions, as extracted by |
| the process_hot_functions module. |
| |
| The script receives as input the CSV file with the CWP inclusive count values, |
| the file with Chrome OS groups and the path containing a file with common |
| functions for every benchmark. |
| |
| It extracts for every benchmark and for the CWP data all the functions that |
| match the given Chrome OS groups. |
| |
| It generates all possible combinations of benchmark sets of a given size and |
| it computes for every set a metric. |
| It outputs the optimal sets, based on which ones have the best metric. |
| |
| Three different metrics have been used: function count, distance |
| variation and score. |
| |
| For the function count metric, we count the unique functions covered by a |
| set of benchmarks. Besides the number of unique functions, we compute also |
| the fraction of unique functions out of the amount of CWP functions from the |
| given groups. The benchmark set with the highest amount of unique functions |
| that belong to all the given groups is considered better. |
| |
| For the distance variation metric, we compute the sum of the distance variations |
| of the functions covered by a set of benchmarks. We define the distance |
| variation as the difference between the distance value of a function and the |
| ideal distance value (1.0). If a function appears in multiple common functions |
| files, we consider only the minimum value. We compute also the distance |
| variation per function. The set that has the smaller value for the |
| distance variation per function is considered better. |
| |
| For the score metric, we compute the sum of the scores of the functions from a |
| set of benchmarks. If a function appears in multiple common functions files, |
| we consider only the maximum value. We compute also the fraction of this sum |
| from the sum of all the scores of the functions from the CWP data covering the |
| given groups, in the ideal case (the ideal score of a function is 1.0). |
| |
| We compute the metrics in the same manner for individual Chrome OS groups. |
| """ |
| |
| from collections import defaultdict |
| |
| import argparse |
| import csv |
| import itertools |
| import json |
| import operator |
| import os |
| import sys |
| |
| import benchmark_metrics |
| import utils |
| |
| |
| class BenchmarkSet(object): |
| """Selects the optimal set of benchmarks of given size.""" |
| |
| # Constants that specify the metric type. |
| FUNCTION_COUNT_METRIC = 'function_count' |
| DISTANCE_METRIC = 'distance_variation' |
| SCORE_METRIC = 'score_fraction' |
| |
| def __init__(self, benchmark_set_size, benchmark_set_output_file, |
| benchmark_set_common_functions_path, cwp_inclusive_count_file, |
| cwp_function_groups_file, metric): |
| """Initializes the BenchmarkSet. |
| |
| Args: |
| benchmark_set_size: Constant representing the size of a benchmark set. |
| benchmark_set_output_file: The output file that will contain the set of |
| optimal benchmarks with the metric values. |
| benchmark_set_common_functions_path: The directory containing the files |
| with the common functions for the list of benchmarks. |
| cwp_inclusive_count_file: The CSV file containing the CWP functions with |
| their inclusive count values. |
| cwp_function_groups_file: The file that contains the CWP function groups. |
| metric: The type of metric used for the analysis. |
| """ |
| self._benchmark_set_size = int(benchmark_set_size) |
| self._benchmark_set_output_file = benchmark_set_output_file |
| self._benchmark_set_common_functions_path = \ |
| benchmark_set_common_functions_path |
| self._cwp_inclusive_count_file = cwp_inclusive_count_file |
| self._cwp_function_groups_file = cwp_function_groups_file |
| self._metric = metric |
| |
| @staticmethod |
| def OrganizeCWPFunctionsInGroups(cwp_inclusive_count_statistics, |
| cwp_function_groups): |
| """Selects the CWP functions that match the given Chrome OS groups. |
| |
| Args: |
| cwp_inclusive_count_statistics: A dict with the CWP functions. |
| cwp_function_groups: A list with the CWP function groups. |
| |
| Returns: |
| A dict having as a key the name of the groups and as a value the list of |
| CWP functions that match an individual group. |
| """ |
| cwp_functions_grouped = defaultdict(list) |
| for function_key in cwp_inclusive_count_statistics: |
| _, file_name = function_key.split(',') |
| for group_name, file_path in cwp_function_groups: |
| if file_path not in file_name: |
| continue |
| cwp_functions_grouped[group_name].append(function_key) |
| break |
| return cwp_functions_grouped |
| |
| @staticmethod |
| def OrganizeBenchmarkSetFunctionsInGroups(benchmark_set_files, |
| benchmark_set_common_functions_path, |
| cwp_function_groups): |
| """Selects the benchmark functions that match the given Chrome OS groups. |
| |
| Args: |
| benchmark_set_files: The list of common functions files corresponding to a |
| benchmark. |
| benchmark_set_common_functions_path: The directory containing the files |
| with the common functions for the list of benchmarks. |
| cwp_function_groups: A list with the CWP function groups. |
| |
| Returns: |
| A dict having as a key the name of a common functions file. The value is |
| a dict having as a key the name of a group and as value a list of |
| functions that match the given group. |
| """ |
| |
| benchmark_set_functions_grouped = {} |
| for benchmark_file_name in benchmark_set_files: |
| benchmark_full_file_path = \ |
| os.path.join(benchmark_set_common_functions_path, |
| benchmark_file_name) |
| with open(benchmark_full_file_path) as input_file: |
| statistics_reader = \ |
| csv.DictReader(input_file, delimiter=',') |
| benchmark_functions_grouped = defaultdict(dict) |
| for statistic in statistics_reader: |
| function_name = statistic['function'] |
| file_name = statistic['file'] |
| for group_name, file_path in cwp_function_groups: |
| if file_path not in file_name: |
| continue |
| function_key = ','.join([function_name, file_name]) |
| distance = float(statistic['distance']) |
| score = float(statistic['score']) |
| benchmark_functions_grouped[group_name][function_key] = \ |
| (distance, score) |
| break |
| benchmark_set_functions_grouped[benchmark_file_name] = \ |
| benchmark_functions_grouped |
| return benchmark_set_functions_grouped |
| |
| @staticmethod |
| def SelectOptimalBenchmarkSetBasedOnMetric(all_benchmark_combinations_sets, |
| benchmark_set_functions_grouped, |
| cwp_functions_grouped, |
| metric_function_for_set, |
| metric_comparison_operator, |
| metric_default_value, |
| metric_string): |
| """Generic method that selects the optimal benchmark set based on a metric. |
| |
| The reason of implementing a generic function is to avoid logic duplication |
| for selecting a benchmark set based on the three different metrics. |
| |
| Args: |
| all_benchmark_combinations_sets: The list with all the sets of benchmark |
| combinations. |
| benchmark_set_functions_grouped: A dict with benchmark functions as |
| returned by OrganizeBenchmarkSetFunctionsInGroups. |
| cwp_functions_grouped: A dict with the CWP functions as returned by |
| OrganizeCWPFunctionsInGroups. |
| metric_function_for_set: The method used to compute the metric for a given |
| benchmark set. |
| metric_comparison_operator: A comparison operator used to compare two |
| values of the same metric (i.e: operator.lt or operator.gt). |
| metric_default_value: The default value for the metric. |
| metric_string: A tuple of strings used in the JSON output for the pair of |
| the values of the metric. |
| |
| Returns: |
| A list of tuples containing for each optimal benchmark set. A tuple |
| contains the list of benchmarks from the set, the pair of metric values |
| and a dictionary with the metrics for each group. |
| """ |
| optimal_sets = [([], metric_default_value, {})] |
| |
| for benchmark_combination_set in all_benchmark_combinations_sets: |
| function_metrics = [benchmark_set_functions_grouped[benchmark] |
| for benchmark in benchmark_combination_set] |
| set_metrics, set_groups_metrics = \ |
| metric_function_for_set(function_metrics, cwp_functions_grouped, |
| metric_string) |
| optimal_value = optimal_sets[0][1][0] |
| if metric_comparison_operator(set_metrics[0], optimal_value): |
| optimal_sets = \ |
| [(benchmark_combination_set, set_metrics, set_groups_metrics)] |
| elif set_metrics[0] == optimal_sets[0][1][0]: |
| optimal_sets.append( |
| (benchmark_combination_set, set_metrics, set_groups_metrics)) |
| |
| return optimal_sets |
| |
| def SelectOptimalBenchmarkSet(self): |
| """Selects the optimal benchmark sets and writes them in JSON format. |
| |
| Parses the CWP inclusive count statistics and benchmark common functions |
| files. Organizes the functions into groups. For every optimal benchmark |
| set, the method writes in the self._benchmark_set_output_file the list of |
| benchmarks, the pair of metrics and a dictionary with the pair of |
| metrics for each group covered by the benchmark set. |
| """ |
| |
| benchmark_set_files = os.listdir(self._benchmark_set_common_functions_path) |
| all_benchmark_combinations_sets = \ |
| itertools.combinations(benchmark_set_files, self._benchmark_set_size) |
| |
| with open(self._cwp_function_groups_file) as input_file: |
| cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines()) |
| |
| cwp_inclusive_count_statistics = \ |
| utils.ParseCWPInclusiveCountFile(self._cwp_inclusive_count_file) |
| cwp_functions_grouped = self.OrganizeCWPFunctionsInGroups( |
| cwp_inclusive_count_statistics, cwp_function_groups) |
| benchmark_set_functions_grouped = \ |
| self.OrganizeBenchmarkSetFunctionsInGroups( |
| benchmark_set_files, self._benchmark_set_common_functions_path, |
| cwp_function_groups) |
| |
| if self._metric == self.FUNCTION_COUNT_METRIC: |
| metric_function_for_benchmark_set = \ |
| benchmark_metrics.ComputeFunctionCountForBenchmarkSet |
| metric_comparison_operator = operator.gt |
| metric_default_value = (0, 0.0) |
| metric_string = ('function_count', 'function_count_fraction') |
| elif self._metric == self.DISTANCE_METRIC: |
| metric_function_for_benchmark_set = \ |
| benchmark_metrics.ComputeDistanceForBenchmarkSet |
| metric_comparison_operator = operator.lt |
| metric_default_value = (float('inf'), float('inf')) |
| metric_string = \ |
| ('distance_variation_per_function', 'total_distance_variation') |
| elif self._metric == self.SCORE_METRIC: |
| metric_function_for_benchmark_set = \ |
| benchmark_metrics.ComputeScoreForBenchmarkSet |
| metric_comparison_operator = operator.gt |
| metric_default_value = (0.0, 0.0) |
| metric_string = ('score_fraction', 'total_score') |
| else: |
| raise ValueError("Invalid metric") |
| |
| optimal_benchmark_sets = \ |
| self.SelectOptimalBenchmarkSetBasedOnMetric( |
| all_benchmark_combinations_sets, benchmark_set_functions_grouped, |
| cwp_functions_grouped, metric_function_for_benchmark_set, |
| metric_comparison_operator, metric_default_value, metric_string) |
| |
| json_output = [] |
| |
| for benchmark_set in optimal_benchmark_sets: |
| json_entry = { |
| 'benchmark_set': |
| list(benchmark_set[0]), |
| 'metrics': { |
| metric_string[0]: benchmark_set[1][0], |
| metric_string[1]: benchmark_set[1][1] |
| }, |
| 'groups': |
| dict(benchmark_set[2]) |
| } |
| json_output.append(json_entry) |
| |
| with open(self._benchmark_set_output_file, 'w') as output_file: |
| json.dump(json_output, output_file) |
| |
| |
| def ParseArguments(arguments): |
| parser = argparse.ArgumentParser() |
| |
| parser.add_argument( |
| '--benchmark_set_common_functions_path', |
| required=True, |
| help='The directory containing the CSV files with the common functions ' |
| 'of the benchmark profiles and CWP data. A file will contain all the hot ' |
| 'functions from a pprof top output file that are also included in the ' |
| 'file containing the cwp inclusive count values. The CSV fields are: the ' |
| 'function name, the file and the object where the function is declared, ' |
| 'the CWP inclusive count and inclusive count fraction values, the ' |
| 'cumulative and average distance, the cumulative and average score. The ' |
| 'files with the common functions will have the same names with the ' |
| 'corresponding pprof output files.') |
| parser.add_argument( |
| '--cwp_inclusive_count_file', |
| required=True, |
| help='The CSV file containing the CWP hot functions with their ' |
| 'inclusive_count values. The CSV fields include the name of the ' |
| 'function, the file and the object with the definition, the inclusive ' |
| 'count value and the inclusive count fraction out of the total amount of ' |
| 'inclusive count values.') |
| parser.add_argument( |
| '--benchmark_set_size', |
| required=True, |
| help='The size of the benchmark sets.') |
| parser.add_argument( |
| '--benchmark_set_output_file', |
| required=True, |
| help='The JSON output file containing optimal benchmark sets with their ' |
| 'metrics. For every optimal benchmark set, the file contains the list of ' |
| 'benchmarks, the pair of metrics and a dictionary with the pair of ' |
| 'metrics for each group covered by the benchmark set.') |
| parser.add_argument( |
| '--metric', |
| required=True, |
| help='The metric used to select the optimal benchmark set. The possible ' |
| 'values are: distance_variation, function_count and score_fraction.') |
| parser.add_argument( |
| '--cwp_function_groups_file', |
| required=True, |
| help='The file that contains the CWP function groups. A line consists in ' |
| 'the group name and a file path describing the group. A group must ' |
| 'represent a Chrome OS component.') |
| |
| options = parser.parse_args(arguments) |
| |
| return options |
| |
| |
| def Main(argv): |
| options = ParseArguments(argv) |
| benchmark_set = BenchmarkSet(options.benchmark_set_size, |
| options.benchmark_set_output_file, |
| options.benchmark_set_common_functions_path, |
| options.cwp_inclusive_count_file, |
| options.cwp_function_groups_file, options.metric) |
| benchmark_set.SelectOptimalBenchmarkSet() |
| |
| |
| if __name__ == '__main__': |
| Main(sys.argv[1:]) |