blob: 012ddc68066fd1c9731843269a5d623e32cb9332 [file] [log] [blame]
# Copyright 2023 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Analyze the execution log files generated by Bazel."""
import logging
import os
from pathlib import Path
import re
import shutil
from typing import List, Optional
from chromite.lib import commandline
def translate_target_label_to_file_name(target_label: str) -> str:
target_file_name = target_label
target_file_name = target_file_name.replace("@", "at-")
target_file_name = target_file_name.replace("/", "_")
target_file_name = target_file_name.replace("~", "-")
target_file_name = target_file_name.replace(".", "_")
target_file_name = target_file_name.replace("(", "_")
target_file_name = target_file_name.replace(")", "_")
target_file_name = target_file_name.replace(" ", "_")
target_file_name = target_file_name.replace(":", "___")
return target_file_name
def split_exec_log(input_file_path: Path, output_dir_path: Path) -> None:
"""Split an execution log into separate files per action.
This function takes a text execution log from a Bazel invocation and
breaks it apart into separate files under the provided output directory.
This facilitates future comparisons between two execution logs, which is
very painful when the content is in a single 100+ GB file.
Args:
input_file_path: The Path to the input execution log file.
output_dir_path: The Path to the dir to write output files.
"""
if output_dir_path.exists():
shutil.rmtree(output_dir_path)
output_dir_path.mkdir()
target_label_pattern = re.compile(r'target_label: "(.*)"')
listed_outputs_pattern = re.compile(r'listed_outputs: "(.*)"')
lines_for_target_file = []
target_label = None
action_file_path = None
qualifier = None
logging.info("output_dir_path=%s", output_dir_path)
with open(input_file_path, "r", encoding="utf-8") as input_file:
while True:
line = input_file.readline()
if not line:
break
if line.startswith("-------"):
if os.path.isfile(action_file_path):
raise Exception(f"File {action_file_path} already exists.")
action_subdir_path.mkdir(exist_ok=True)
with open(
action_file_path, "w", encoding="utf-8"
) as target_file:
target_file.writelines(lines_for_target_file)
lines_for_target_file = []
target_label = None
action_subdir_path = None
action_file_path = None
qualifier = None
continue
# Timing information is going to be inherently non-reproducible, so
# exclude it.
if "seconds: " in line or "nanos: " in line:
continue
lines_for_target_file.append(line)
if line.startswith("target_label: "):
target_label = target_label_pattern.match(line).group(1)
action_subdir_path = (
output_dir_path
/ translate_target_label_to_file_name(target_label)
)
action_file_path = (
action_subdir_path
/ translate_target_label_to_file_name(qualifier)
)
logging.info("action_file_path=%s", action_file_path)
elif line.startswith("listed_outputs: "):
# Only use the first listed_output for a file as its qualifier
if not qualifier:
match = listed_outputs_pattern.match(line)
if match:
qualifier = match.group(1)
def analyze_exec_logs(exec_log1_path: Path, exec_log2_path: Path) -> None:
"""Split up to two execution logs into separate files per action.
This function takes up to two text execution logs from Bazel invocations and
breaks them apart into separate files, under output directories with names
derived from the input filenames.
This facilitates future comparisons between two execution logs, which is
very painful when the content is in a single 100+ GB file.
Args:
exec_log1_path: The Path to the first input execution log file.
exec_log2_path: The Path to the second input execution log file.
"""
exec_log1_output_dir_path = Path(
str(exec_log1_path).split(".", maxsplit=1)[0] + "_actions"
)
split_exec_log(exec_log1_path, exec_log1_output_dir_path)
if exec_log2_path:
exec_log2_output_dir_path = Path(
str(exec_log2_path).split(".", maxsplit=1)[0] + "_actions"
)
split_exec_log(exec_log2_path, exec_log2_output_dir_path)
def _get_parser() -> commandline.ArgumentParser:
"""Build the argument parser."""
parser = commandline.ArgumentParser()
parser.add_argument(
"--exec_log1",
dest="exec_log1_filename",
help="The path to the first exec log to analyze.",
)
parser.add_argument(
"--exec_log2",
dest="exec_log2_filename",
help="The path to the second exec log to analyze.",
)
return parser
def main(argv: Optional[List[str]]) -> Optional[int]:
"""Main."""
parser = _get_parser()
opts = parser.parse_args(argv)
path1 = Path(opts.exec_log1_filename)
path2 = None
if opts.exec_log2_filename:
path2 = Path(opts.exec_log2_filename)
analyze_exec_logs(path1, path2)