scripts/analyze_bazel_exec_logs.py - third_party/chromite - Git at Google

 # Copyright 2023 The ChromiumOS Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Analyze the execution log files generated by Bazel."""

 import logging
 import os
 from pathlib import Path
 import re
 import shutil
 from typing import List, Optional

 from chromite.lib import commandline


 def translate_target_label_to_file_name(target_label: str) -> str:
     target_file_name = target_label
     target_file_name = target_file_name.replace("@", "at-")
     target_file_name = target_file_name.replace("/", "_")
     target_file_name = target_file_name.replace("~", "-")
     target_file_name = target_file_name.replace(".", "_")
     target_file_name = target_file_name.replace("(", "_")
     target_file_name = target_file_name.replace(")", "_")
     target_file_name = target_file_name.replace(" ", "_")
     target_file_name = target_file_name.replace(":", "___")
     return target_file_name


 def split_exec_log(input_file_path: Path, output_dir_path: Path) -> None:
     """Split an execution log into separate files per action.

     This function takes a text execution log from a Bazel invocation and
     breaks it apart into separate files under the provided output directory.
     This facilitates future comparisons between two execution logs, which is
     very painful when the content is in a single 100+ GB file.

     Args:
         input_file_path: The Path to the input execution log file.
         output_dir_path: The Path to the dir to write output files.
     """
     if output_dir_path.exists():
         shutil.rmtree(output_dir_path)
     output_dir_path.mkdir()
     target_label_pattern = re.compile(r'target_label: "(.*)"')
     listed_outputs_pattern = re.compile(r'listed_outputs: "(.*)"')

     lines_for_target_file = []
     target_label = None
     action_file_path = None
     qualifier = None

     logging.info("output_dir_path=%s", output_dir_path)

     with open(input_file_path, "r", encoding="utf-8") as input_file:
         while True:
             line = input_file.readline()
             if not line:
                 break

             if line.startswith("-------"):
                 if os.path.isfile(action_file_path):
                     raise Exception(f"File {action_file_path} already exists.")
                 action_subdir_path.mkdir(exist_ok=True)

                 with open(
                     action_file_path, "w", encoding="utf-8"
                 ) as target_file:
                     target_file.writelines(lines_for_target_file)

                 lines_for_target_file = []
                 target_label = None
                 action_subdir_path = None
                 action_file_path = None
                 qualifier = None
                 continue

             # Timing information is going to be inherently non-reproducible, so
             # exclude it.
             if "seconds: " in line or "nanos: " in line:
                 continue

             lines_for_target_file.append(line)
             if line.startswith("target_label: "):
                 target_label = target_label_pattern.match(line).group(1)
                 action_subdir_path = (
                     output_dir_path
                     / translate_target_label_to_file_name(target_label)
                 )
                 action_file_path = (
                     action_subdir_path
                     / translate_target_label_to_file_name(qualifier)
                 )
                 logging.info("action_file_path=%s", action_file_path)
             elif line.startswith("listed_outputs: "):
                 # Only use the first listed_output for a file as its qualifier
                 if not qualifier:
                     match = listed_outputs_pattern.match(line)
                     if match:
                         qualifier = match.group(1)


 def analyze_exec_logs(exec_log1_path: Path, exec_log2_path: Path) -> None:
     """Split up to two execution logs into separate files per action.

     This function takes up to two text execution logs from Bazel invocations and
     breaks them apart into separate files, under output directories with names
     derived from the input filenames.

     This facilitates future comparisons between two execution logs, which is
     very painful when the content is in a single 100+ GB file.

     Args:
         exec_log1_path: The Path to the first input execution log file.
         exec_log2_path: The Path to the second input execution log file.
     """
     exec_log1_output_dir_path = Path(
         str(exec_log1_path).split(".", maxsplit=1)[0] + "_actions"
     )
     split_exec_log(exec_log1_path, exec_log1_output_dir_path)

     if exec_log2_path:
         exec_log2_output_dir_path = Path(
             str(exec_log2_path).split(".", maxsplit=1)[0] + "_actions"
         )
         split_exec_log(exec_log2_path, exec_log2_output_dir_path)


 def _get_parser() -> commandline.ArgumentParser:
     """Build the argument parser."""

     parser = commandline.ArgumentParser()

     parser.add_argument(
         "--exec_log1",
         dest="exec_log1_filename",
         help="The path to the first exec log to analyze.",
     )

     parser.add_argument(
         "--exec_log2",
         dest="exec_log2_filename",
         help="The path to the second exec log to analyze.",
     )

     return parser


 def main(argv: Optional[List[str]]) -> Optional[int]:
     """Main."""
     parser = _get_parser()
     opts = parser.parse_args(argv)

     path1 = Path(opts.exec_log1_filename)
     path2 = None
     if opts.exec_log2_filename:
         path2 = Path(opts.exec_log2_filename)
     analyze_exec_logs(path1, path2)
	# Copyright 2023 The ChromiumOS Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Analyze the execution log files generated by Bazel."""

	import logging
	import os
	from pathlib import Path
	import re
	import shutil
	from typing import List, Optional

	from chromite.lib import commandline


	def translate_target_label_to_file_name(target_label: str) -> str:
	target_file_name = target_label
	target_file_name = target_file_name.replace("@", "at-")
	target_file_name = target_file_name.replace("/", "_")
	target_file_name = target_file_name.replace("~", "-")
	target_file_name = target_file_name.replace(".", "_")
	target_file_name = target_file_name.replace("(", "_")
	target_file_name = target_file_name.replace(")", "_")
	target_file_name = target_file_name.replace(" ", "_")
	target_file_name = target_file_name.replace(":", "___")
	return target_file_name


	def split_exec_log(input_file_path: Path, output_dir_path: Path) -> None:
	"""Split an execution log into separate files per action.

	This function takes a text execution log from a Bazel invocation and
	breaks it apart into separate files under the provided output directory.
	This facilitates future comparisons between two execution logs, which is
	very painful when the content is in a single 100+ GB file.

	Args:
	input_file_path: The Path to the input execution log file.
	output_dir_path: The Path to the dir to write output files.
	"""
	if output_dir_path.exists():
	shutil.rmtree(output_dir_path)
	output_dir_path.mkdir()
	target_label_pattern = re.compile(r'target_label: "(.*)"')
	listed_outputs_pattern = re.compile(r'listed_outputs: "(.*)"')

	lines_for_target_file = []
	target_label = None
	action_file_path = None
	qualifier = None

	logging.info("output_dir_path=%s", output_dir_path)

	with open(input_file_path, "r", encoding="utf-8") as input_file:
	while True:
	line = input_file.readline()
	if not line:
	break

	if line.startswith("-------"):
	if os.path.isfile(action_file_path):
	raise Exception(f"File {action_file_path} already exists.")
	action_subdir_path.mkdir(exist_ok=True)

	with open(
	action_file_path, "w", encoding="utf-8"
	) as target_file:
	target_file.writelines(lines_for_target_file)

	lines_for_target_file = []
	target_label = None
	action_subdir_path = None
	action_file_path = None
	qualifier = None
	continue

	# Timing information is going to be inherently non-reproducible, so
	# exclude it.
	if "seconds: " in line or "nanos: " in line:
	continue

	lines_for_target_file.append(line)
	if line.startswith("target_label: "):
	target_label = target_label_pattern.match(line).group(1)
	action_subdir_path = (
	output_dir_path
	/ translate_target_label_to_file_name(target_label)
	)
	action_file_path = (
	action_subdir_path
	/ translate_target_label_to_file_name(qualifier)
	)
	logging.info("action_file_path=%s", action_file_path)
	elif line.startswith("listed_outputs: "):
	# Only use the first listed_output for a file as its qualifier
	if not qualifier:
	match = listed_outputs_pattern.match(line)
	if match:
	qualifier = match.group(1)


	def analyze_exec_logs(exec_log1_path: Path, exec_log2_path: Path) -> None:
	"""Split up to two execution logs into separate files per action.

	This function takes up to two text execution logs from Bazel invocations and
	breaks them apart into separate files, under output directories with names
	derived from the input filenames.

	This facilitates future comparisons between two execution logs, which is
	very painful when the content is in a single 100+ GB file.

	Args:
	exec_log1_path: The Path to the first input execution log file.
	exec_log2_path: The Path to the second input execution log file.
	"""
	exec_log1_output_dir_path = Path(
	str(exec_log1_path).split(".", maxsplit=1)[0] + "_actions"
	)
	split_exec_log(exec_log1_path, exec_log1_output_dir_path)

	if exec_log2_path:
	exec_log2_output_dir_path = Path(
	str(exec_log2_path).split(".", maxsplit=1)[0] + "_actions"
	)
	split_exec_log(exec_log2_path, exec_log2_output_dir_path)


	def _get_parser() -> commandline.ArgumentParser:
	"""Build the argument parser."""

	parser = commandline.ArgumentParser()

	parser.add_argument(
	"--exec_log1",
	dest="exec_log1_filename",
	help="The path to the first exec log to analyze.",
	)

	parser.add_argument(
	"--exec_log2",
	dest="exec_log2_filename",
	help="The path to the second exec log to analyze.",
	)

	return parser


	def main(argv: Optional[List[str]]) -> Optional[int]:
	"""Main."""
	parser = _get_parser()
	opts = parser.parse_args(argv)

	path1 = Path(opts.exec_log1_filename)
	path2 = None
	if opts.exec_log2_filename:
	path2 = Path(opts.exec_log2_filename)
	analyze_exec_logs(path1, path2)