| # Copyright 2023 The ChromiumOS Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Analyze the execution log files generated by Bazel.""" |
| |
| import logging |
| import os |
| from pathlib import Path |
| import re |
| import shutil |
| from typing import List, Optional |
| |
| from chromite.lib import commandline |
| |
| |
| def translate_target_label_to_file_name(target_label: str) -> str: |
| target_file_name = target_label |
| target_file_name = target_file_name.replace("@", "at-") |
| target_file_name = target_file_name.replace("/", "_") |
| target_file_name = target_file_name.replace("~", "-") |
| target_file_name = target_file_name.replace(".", "_") |
| target_file_name = target_file_name.replace("(", "_") |
| target_file_name = target_file_name.replace(")", "_") |
| target_file_name = target_file_name.replace(" ", "_") |
| target_file_name = target_file_name.replace(":", "___") |
| return target_file_name |
| |
| |
| def split_exec_log(input_file_path: Path, output_dir_path: Path) -> None: |
| """Split an execution log into separate files per action. |
| |
| This function takes a text execution log from a Bazel invocation and |
| breaks it apart into separate files under the provided output directory. |
| This facilitates future comparisons between two execution logs, which is |
| very painful when the content is in a single 100+ GB file. |
| |
| Args: |
| input_file_path: The Path to the input execution log file. |
| output_dir_path: The Path to the dir to write output files. |
| """ |
| if output_dir_path.exists(): |
| shutil.rmtree(output_dir_path) |
| output_dir_path.mkdir() |
| target_label_pattern = re.compile(r'target_label: "(.*)"') |
| listed_outputs_pattern = re.compile(r'listed_outputs: "(.*)"') |
| |
| lines_for_target_file = [] |
| target_label = None |
| action_file_path = None |
| qualifier = None |
| |
| logging.info("output_dir_path=%s", output_dir_path) |
| |
| with open(input_file_path, "r", encoding="utf-8") as input_file: |
| while True: |
| line = input_file.readline() |
| if not line: |
| break |
| |
| if line.startswith("-------"): |
| if os.path.isfile(action_file_path): |
| raise Exception(f"File {action_file_path} already exists.") |
| action_subdir_path.mkdir(exist_ok=True) |
| |
| with open( |
| action_file_path, "w", encoding="utf-8" |
| ) as target_file: |
| target_file.writelines(lines_for_target_file) |
| |
| lines_for_target_file = [] |
| target_label = None |
| action_subdir_path = None |
| action_file_path = None |
| qualifier = None |
| continue |
| |
| # Timing information is going to be inherently non-reproducible, so |
| # exclude it. |
| if "seconds: " in line or "nanos: " in line: |
| continue |
| |
| lines_for_target_file.append(line) |
| if line.startswith("target_label: "): |
| target_label = target_label_pattern.match(line).group(1) |
| action_subdir_path = ( |
| output_dir_path |
| / translate_target_label_to_file_name(target_label) |
| ) |
| action_file_path = ( |
| action_subdir_path |
| / translate_target_label_to_file_name(qualifier) |
| ) |
| logging.info("action_file_path=%s", action_file_path) |
| elif line.startswith("listed_outputs: "): |
| # Only use the first listed_output for a file as its qualifier |
| if not qualifier: |
| match = listed_outputs_pattern.match(line) |
| if match: |
| qualifier = match.group(1) |
| |
| |
| def analyze_exec_logs(exec_log1_path: Path, exec_log2_path: Path) -> None: |
| """Split up to two execution logs into separate files per action. |
| |
| This function takes up to two text execution logs from Bazel invocations and |
| breaks them apart into separate files, under output directories with names |
| derived from the input filenames. |
| |
| This facilitates future comparisons between two execution logs, which is |
| very painful when the content is in a single 100+ GB file. |
| |
| Args: |
| exec_log1_path: The Path to the first input execution log file. |
| exec_log2_path: The Path to the second input execution log file. |
| """ |
| exec_log1_output_dir_path = Path( |
| str(exec_log1_path).split(".", maxsplit=1)[0] + "_actions" |
| ) |
| split_exec_log(exec_log1_path, exec_log1_output_dir_path) |
| |
| if exec_log2_path: |
| exec_log2_output_dir_path = Path( |
| str(exec_log2_path).split(".", maxsplit=1)[0] + "_actions" |
| ) |
| split_exec_log(exec_log2_path, exec_log2_output_dir_path) |
| |
| |
| def _get_parser() -> commandline.ArgumentParser: |
| """Build the argument parser.""" |
| |
| parser = commandline.ArgumentParser() |
| |
| parser.add_argument( |
| "--exec_log1", |
| dest="exec_log1_filename", |
| help="The path to the first exec log to analyze.", |
| ) |
| |
| parser.add_argument( |
| "--exec_log2", |
| dest="exec_log2_filename", |
| help="The path to the second exec log to analyze.", |
| ) |
| |
| return parser |
| |
| |
| def main(argv: Optional[List[str]]) -> Optional[int]: |
| """Main.""" |
| parser = _get_parser() |
| opts = parser.parse_args(argv) |
| |
| path1 = Path(opts.exec_log1_filename) |
| path2 = None |
| if opts.exec_log2_filename: |
| path2 = Path(opts.exec_log2_filename) |
| analyze_exec_logs(path1, path2) |