| # Copyright 2020 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Runs clang-tidy across the given files, dumping diagnostics to a JSON file. |
| |
| This script is intended specifically for use with Tricium (go/tricium). |
| """ |
| |
| # From an implementation perspective, it's good to note that this script |
| # cooperates with the toolchain's compiler wrapper. In particular, |
| # ${cros}/src/third_party/toolchain-utils/compiler_wrapper/clang_tidy_flag.go. |
| # |
| # When |WITH_TIDY=tricium| is set and the wrapper (which is already $CC/$CXX) |
| # is invoked, $CC will invoke clang-tidy _as well_ as the regular compiler. |
| # This clang-tidy invocation will result in a few files being dumped to |
| # |LINT_BASE| (below): |
| # - "${LINT_BASE}/some-prefix.yaml" -- a YAML file that represents |
| # clang-tidy's diagnostics for the file the compiler was asked to build |
| # - "${LINT_BASE}/some-prefix.json" -- metadata about how the above YAML file |
| # was generated, including clang-tidy's exit code, stdout, etc. See |
| # |InvocationMetadata| below. |
| # |
| # As one might expect, the compiler wrapper writes the JSON file only after |
| # clang-tidy is done executing. |
| # |
| # This directory might contain other files, as well; these are ignored by this |
| # script. |
| |
| import bisect |
| import json |
| import multiprocessing |
| import os |
| from pathlib import Path |
| import re |
| import subprocess |
| import sys |
| import tempfile |
| import traceback |
| from typing import (Any, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, |
| Union) |
| |
| import yaml # pylint: disable=import-error |
| from chromite.lib import commandline |
| from chromite.lib import cros_build_lib |
| from chromite.lib import cros_logging as logging |
| from chromite.lib import osutils |
| from chromite.lib import portage_util |
| from chromite.lib import workon_helper |
| |
| assert sys.version_info >= (3, 6), 'This module requires Python 3.6+' |
| |
| # The directory under which the compiler wrapper stores clang-tidy reports. |
| LINT_BASE = Path('/tmp/linting_output/clang-tidy') |
| |
| |
| class TidyReplacement(NamedTuple): |
| """Represents a replacement emitted by clang-tidy. |
| |
| File path is omitted, since these are intended to be associated with |
| TidyDiagnostics with identical paths. |
| """ |
| new_text: str |
| start_line: int |
| end_line: int |
| start_char: int |
| end_char: int |
| |
| |
| class TidyExpandedFrom(NamedTuple): |
| """Represents a macro expansion. |
| |
| When a diagnostic is inside of a macro expansion, clang-tidy emits |
| information about where said macro was expanded from. |TidyDiagnostic|s will |
| have one |TidyExpandedFrom| for each level of this expansion. |
| """ |
| file_path: Path |
| line_number: int |
| |
| def to_dict(self) -> Dict[str, Any]: |
| """Converts this |TidyExpandedFrom| to a dict serializeable as JSON.""" |
| return { |
| 'file_path': self.file_path.as_posix(), |
| 'line_number': self.line_number, |
| } |
| |
| |
| class Error(Exception): |
| """Base error class for tricium-clang-tidy.""" |
| |
| |
| class ClangTidyParseError(Error): |
| """Raised when clang-tidy parsing jobs fail.""" |
| |
| def __init__(self, failed_jobs: int, total_jobs: int): |
| super().__init__(f'{failed_jobs}/{total_jobs} parse jobs failed') |
| self.failed_jobs = failed_jobs |
| self.total_jobs = total_jobs |
| |
| |
| class TidyDiagnostic(NamedTuple): |
| """A diagnostic emitted by clang-tidy. |
| |
| Note that we shove these in a set for cheap deduplication, and we sort based |
| on the natural element order here. Sorting is mostly just for |
| deterministic/pretty output. |
| """ |
| file_path: Path |
| line_number: int |
| diag_name: str |
| message: str |
| replacements: Tuple[TidyReplacement] |
| expansion_locs: Tuple[TidyExpandedFrom] |
| |
| def normalize_paths_to(self, where: str) -> 'TidyDiagnostic': |
| """Creates a new TidyDiagnostic with all paths relative to |where|.""" |
| return self._replace( |
| # Use relpath because Path.relative_to requires that `self` is rooted |
| # at `where`. |
| file_path=Path(os.path.relpath(self.file_path, where)), |
| expansion_locs=tuple( |
| x._replace(file_path=Path(os.path.relpath(x.file_path, where))) |
| for x in self.expansion_locs)) |
| |
| def to_dict(self) -> Dict[str, Any]: |
| """Converts this |TidyDiagnostic| to a dict serializeable as JSON.""" |
| return { |
| 'file_path': self.file_path.as_posix(), |
| 'line_number': self.line_number, |
| 'diag_name': self.diag_name, |
| 'message': self.message, |
| 'replacements': [x._asdict() for x in self.replacements], |
| 'expansion_locs': [x.to_dict() for x in self.expansion_locs], |
| } |
| |
| |
| class ClangTidySchemaError(Error): |
| """Raised when we encounter malformed YAML.""" |
| |
| def __init__(self, err_msg: str): |
| super().__init__(err_msg) |
| self.err_msg = err_msg |
| |
| |
| class LineOffsetMap: |
| """Convenient API to turn offsets in a file into line numbers.""" |
| |
| def __init__(self, newline_locations: Iterable[int]): |
| line_starts = [x + 1 for x in newline_locations] |
| # The |bisect| logic in |get_line_number|/|get_line_offset| gets a bit |
| # complicated around the first and last lines of a file. Adding boundaries |
| # here removes some complexity from those implementations. |
| line_starts.append(0) |
| line_starts.append(sys.maxsize) |
| line_starts.sort() |
| |
| assert line_starts[0] == 0, line_starts[0] |
| assert line_starts[1] != 0, line_starts[1] |
| assert line_starts[-2] < sys.maxsize, line_starts[-2] |
| assert line_starts[-1] == sys.maxsize, line_starts[-1] |
| |
| self._line_starts = line_starts |
| |
| def get_line_number(self, char_number: int) -> int: |
| """Given a char offset into a file, returns its line number.""" |
| assert 0 <= char_number < sys.maxsize, char_number |
| return bisect.bisect_right(self._line_starts, char_number) |
| |
| def get_line_offset(self, char_number: int) -> int: |
| """Given a char offset into a file, returns its column number.""" |
| assert 0 <= char_number < sys.maxsize, char_number |
| line_start_index = bisect.bisect_right(self._line_starts, char_number) - 1 |
| return char_number - self._line_starts[line_start_index] |
| |
| @staticmethod |
| def for_text(data: str) -> 'LineOffsetMap': |
| """Creates a LineOffsetMap for the given string.""" |
| return LineOffsetMap(m.start() for m in re.finditer(r'\n', data)) |
| |
| |
| def parse_tidy_fixes_file(tidy_invocation_dir: Path, |
| yaml_data: Any) -> Iterable[TidyDiagnostic]: |
| """Parses a clang-tidy YAML file. |
| |
| Args: |
| yaml_data: The parsed YAML data from clang-tidy's fixits file. |
| tidy_invocation_dir: The directory clang-tidy was run in. |
| |
| Returns: |
| A generator of |TidyDiagnostic|s. |
| """ |
| assert tidy_invocation_dir.is_absolute(), tidy_invocation_dir |
| |
| if yaml_data is None: |
| return |
| |
| # A cache of file_path => LineOffsetMap so we only need to load offsets once |
| # per file per |parse_tidy_fixes_file| invocation. |
| cached_line_offsets = {} |
| |
| def get_line_offsets(file_path: Optional[Path]) -> LineOffsetMap: |
| """Gets a LineOffsetMap for the given |file_path|.""" |
| assert not file_path or file_path.is_absolute(), file_path |
| |
| if file_path in cached_line_offsets: |
| return cached_line_offsets[file_path] |
| |
| # Sometimes tidy will give us empty file names; they don't map to any file, |
| # and are generally issues it has with CFLAGS, etc. File offsets don't |
| # matter in those, so use an empty map. |
| if file_path: |
| offsets = LineOffsetMap.for_text(file_path.read_text(encoding='utf-8')) |
| else: |
| offsets = LineOffsetMap(()) |
| cached_line_offsets[file_path] = offsets |
| return offsets |
| |
| # Rarely (e.g., in the case of missing |#include|s, clang will emit relative |
| # file paths for diagnostics. This fixes those. |
| def makeabs(file_path: str) -> Path: |
| """Resolves a |file_path| emitted by clang-tidy to an absolute path.""" |
| if not file_path: |
| return None |
| path = Path(file_path) |
| if not path.is_absolute(): |
| path = tidy_invocation_dir / path |
| return path.resolve() |
| |
| try: |
| for diag in yaml_data['Diagnostics']: |
| message = diag['DiagnosticMessage'] |
| file_path = message['FilePath'] |
| |
| absolute_file_path = makeabs(file_path) |
| line_offsets = get_line_offsets(absolute_file_path) |
| |
| replacements = [] |
| for replacement in message.get('Replacements', ()): |
| replacement_file_path = makeabs(replacement['FilePath']) |
| |
| # FIXME(gbiv): This happens in practice with things like |
| # hicpp-member-init. Supporting it should be simple, but I'd like to |
| # get the basics running first. |
| if replacement_file_path != absolute_file_path: |
| logging.warning( |
| "Replacement %r wasn't in original file %r (diag: %r)", |
| replacement_file_path, file_path, diag) |
| continue |
| |
| start_offset = replacement['Offset'] |
| end_offset = start_offset + replacement['Length'] |
| replacements.append( |
| TidyReplacement( |
| new_text=replacement['ReplacementText'], |
| start_line=line_offsets.get_line_number(start_offset), |
| end_line=line_offsets.get_line_number(end_offset), |
| start_char=line_offsets.get_line_offset(start_offset), |
| end_char=line_offsets.get_line_offset(end_offset), |
| )) |
| |
| expansion_locs = [] |
| for note in diag.get('Notes', ()): |
| if not note['Message'].startswith('expanded from macro '): |
| continue |
| |
| absolute_note_path = makeabs(note['FilePath']) |
| note_offsets = get_line_offsets(absolute_note_path) |
| expansion_locs.append( |
| TidyExpandedFrom( |
| file_path=absolute_note_path, |
| line_number=note_offsets.get_line_number(note['FileOffset']), |
| )) |
| |
| yield TidyDiagnostic( |
| diag_name=diag['DiagnosticName'], |
| message=message['Message'], |
| file_path=absolute_file_path, |
| line_number=line_offsets.get_line_number(message['FileOffset']), |
| replacements=tuple(replacements), |
| expansion_locs=tuple(expansion_locs), |
| ) |
| except KeyError as k: |
| key_name = k.args[0] |
| raise ClangTidySchemaError(f'Broken yaml: missing key {key_name!r}') |
| |
| |
| # Represents metadata about a clang-tidy invocation. |
| class InvocationMetadata(NamedTuple): |
| """Metadata describing a singular invocation of clang-tidy.""" |
| exit_code: int |
| invocation: List[str] |
| lint_target: str |
| stdstreams: str |
| wd: str |
| |
| |
| class ExceptionData: |
| """Info about an exception that can be sent across processes.""" |
| |
| def __init__(self): |
| """Builds an instance; only intended to be called from `except` blocks.""" |
| self._str = traceback.format_exc() |
| |
| def __str__(self): |
| return self._str |
| |
| |
| def parse_tidy_invocation( |
| json_file: Path, |
| ) -> Union[ExceptionData, Tuple[InvocationMetadata, List[TidyDiagnostic]]]: |
| """Parses a clang-tidy invocation result based on a JSON file. |
| |
| This is intended to be run in a separate process, which Exceptions and |
| locking and such work notoriously poorly over, so it's never intended to |
| |raise| (except under a KeyboardInterrupt or similar). |
| |
| Args: |
| json_file: The JSON invocation metadata file to parse. |
| |
| Returns: |
| An |ExceptionData| instance on failure. On success, it returns a |
| (InvocationMetadata, [TidyLint]). |
| """ |
| try: |
| assert json_file.suffix == '.json', json_file |
| |
| with json_file.open(encoding='utf-8') as f: |
| raw_meta = json.load(f) |
| |
| meta = InvocationMetadata( |
| exit_code=raw_meta['exit_code'], |
| invocation=[raw_meta['executable']] + raw_meta['args'], |
| lint_target=raw_meta['lint_target'], |
| stdstreams=raw_meta['stdstreams'], |
| wd=raw_meta['wd'], |
| ) |
| |
| raw_crash_output = raw_meta.get('crash_output') |
| if raw_crash_output: |
| crash_reproducer_path = raw_crash_output['crash_reproducer_path'] |
| output = raw_crash_output['stdstreams'] |
| raise RuntimeError(f"""\ |
| Clang-tidy apparently crashed; dumping lots of invocation info: |
| ## Tidy JSON file target: {json_file} |
| ## Invocation: {meta.invocation} |
| ## Target: {meta.lint_target} |
| ## Crash reproducer is at: {crash_reproducer_path} |
| ## Output producing reproducer: |
| {output} |
| ## Output from the crashing invocation: |
| {meta.stdstreams} |
| """) |
| |
| yaml_file = json_file.with_suffix('.yaml') |
| # If this happened, clang-tidy was probably killed. Dump output as part of |
| # the exception so it's easier to reason about what happened. |
| if not yaml_file.exists(): |
| raise RuntimeError("clang-tidy didn't produce an output file for " |
| f'{json_file}. Output:\n{meta.stdstreams}') |
| |
| with yaml_file.open('rb') as f: |
| yaml_data = yaml.load(f) |
| return meta, list(parse_tidy_fixes_file(Path(meta.wd), yaml_data)) |
| except Exception: |
| return ExceptionData() |
| |
| |
| def generate_lints(board: str, ebuild_path: str) -> Path: |
| """Collects the lints for a given package on a given board. |
| |
| Args: |
| board: the board to collect lints for. |
| ebuild_path: the path to the ebuild to collect lints for. |
| |
| Returns: |
| The path to a tmpdir that all of the lint YAML files (if any) will be in. |
| This will also be populated by JSON files containing InvocationMetadata. |
| The generation of this is handled by our compiler wrapper. |
| """ |
| logging.info('Running lints for %r on board %r', ebuild_path, board) |
| |
| osutils.RmDir(LINT_BASE, ignore_missing=True, sudo=True) |
| osutils.SafeMakedirs(LINT_BASE, 0o777, sudo=True) |
| |
| # FIXME(gbiv): |test| might be better here? |
| result = cros_build_lib.run( |
| [f'ebuild-{board}', ebuild_path, 'clean', 'compile'], |
| check=False, |
| print_cmd=True, |
| extra_env={'WITH_TIDY': 'tricium'}, |
| capture_output=True, |
| encoding='utf-8', |
| errors='replace', |
| ) |
| |
| if result.returncode: |
| status = f'failed with code {result.returncode}; output:\n{result.stdout}' |
| log_fn = logging.warning |
| else: |
| status = 'succeeded' |
| log_fn = logging.info |
| |
| log_fn('Running |ebuild| on %s %s', ebuild_path, status) |
| lint_tmpdir = tempfile.mkdtemp(prefix='tricium_tidy') |
| osutils.CopyDirContents(LINT_BASE, lint_tmpdir) |
| return Path(lint_tmpdir) |
| |
| |
| def collect_lints(lint_tmpdir: Path, |
| yaml_pool: multiprocessing.Pool) -> Set[TidyDiagnostic]: |
| """Collects the lints for a given directory filled with linting artifacts.""" |
| json_files = list(lint_tmpdir.glob('*.json')) |
| pending_parses = yaml_pool.imap(parse_tidy_invocation, json_files) |
| |
| parses_failed = 0 |
| all_complaints = set() |
| for path, parse in zip(json_files, pending_parses): |
| if isinstance(parse, ExceptionData): |
| parses_failed += 1 |
| logging.error('Parsing %r failed with an exception\n%s', path, parse) |
| continue |
| |
| meta, complaints = parse |
| if meta.exit_code: |
| logging.warning( |
| 'Invoking clang-tidy on %r with flags %r exited with code %d; ' |
| 'output:\n%s', |
| meta.lint_target, |
| meta.invocation, |
| meta.exit_code, |
| meta.stdstreams, |
| ) |
| |
| all_complaints.update(complaints) |
| |
| if parses_failed: |
| raise ClangTidyParseError(parses_failed, len(json_files)) |
| |
| return all_complaints |
| |
| |
| def setup_tidy(board: str, ebuild_list: List[portage_util.EBuild]): |
| """Sets up to run clang-tidy on the given ebuilds for the given board.""" |
| packages = [x.package for x in ebuild_list] |
| logging.info('Setting up to lint %r', packages) |
| |
| workon = workon_helper.WorkonHelper(cros_build_lib.GetSysroot(board)) |
| workon.StopWorkingOnPackages(packages=[], use_all=True) |
| workon.StartWorkingOnPackages(packages) |
| |
| # We're going to be hacking with |ebuild| later on, so having all |
| # dependencies in place is necessary so one |ebuild| won't stomp on another. |
| cmd = [ |
| f'emerge-{board}', |
| '--onlydeps', |
| # Since each `emerge` may eat up to `ncpu` cores, limit the maximum |
| # concurrency we can get here to (arbitrarily) 8 jobs. Having |
| # `configure`s and such run in parallel is nice. |
| f'-j{min(8, multiprocessing.cpu_count())}', |
| ] |
| cmd += packages |
| result = cros_build_lib.run(cmd, print_cmd=True, check=False) |
| if result.returncode: |
| logging.error('Setup failed with exit code %d; some lints may fail.', |
| result.returncode) |
| |
| |
| def run_tidy(board: str, ebuild_list: List[portage_util.EBuild], |
| keep_dirs: bool, |
| parse_errors_are_nonfatal: bool) -> Set[TidyDiagnostic]: |
| """Runs clang-tidy on the given ebuilds for the given board. |
| |
| Returns the set of |TidyDiagnostic|s produced by doing so. |
| """ |
| # Since we rely on build actions _actually_ running, we can't live with a |
| # cache. |
| osutils.RmDir( |
| Path(cros_build_lib.GetSysroot(board)) / 'var' / 'cache' / 'portage', |
| ignore_missing=True, |
| sudo=True, |
| ) |
| |
| results = set() |
| # If clang-tidy dumps a lot of diags, it can take 1-10secs of CPU while |
| # holding the GIL to |yaml.load| on my otherwise-idle dev box. |yaml_pool| |
| # lets us do this in parallel. |
| with multiprocessing.pool.Pool() as yaml_pool: |
| for ebuild in ebuild_list: |
| lint_tmpdir = generate_lints(board, ebuild.ebuild_path) |
| try: |
| results |= collect_lints(lint_tmpdir, yaml_pool) |
| except ClangTidyParseError: |
| if not parse_errors_are_nonfatal: |
| raise |
| logging.exception('Working on %r', ebuild) |
| finally: |
| if keep_dirs: |
| logging.info('Lints for %r are in %r', ebuild.ebuild_path, |
| lint_tmpdir) |
| else: |
| osutils.RmDir(lint_tmpdir, ignore_missing=True, sudo=True) |
| return results |
| |
| |
| def resolve_package_ebuilds(board: str, |
| package_names: Iterable[str]) -> List[str]: |
| """Figures out ebuild paths for the given package names.""" |
| |
| def resolve_package(package_name_or_ebuild): |
| """Resolves a single package name an ebuild path.""" |
| if package_name_or_ebuild.endswith('.ebuild'): |
| return package_name_or_ebuild |
| return cros_build_lib.run([f'equery-{board}', 'w', package_name_or_ebuild], |
| check=True, |
| stdout=subprocess.PIPE, |
| encoding='utf-8').stdout.strip() |
| |
| # Resolving ebuilds takes time. If we get more than one (like when I'm tesing |
| # on 50 of them), parallelism speeds things up quite a bit. |
| with multiprocessing.pool.ThreadPool() as pool: |
| return pool.map(resolve_package, package_names) |
| |
| |
| def filter_tidy_lints(only_files: Optional[Set[Path]], |
| git_repo_base: Optional[Path], |
| diags: Iterable[TidyDiagnostic]) -> List[TidyDiagnostic]: |
| """Transforms and filters the given TidyDiagnostics. |
| |
| Args: |
| only_files: a set of file paths, or None; if this is not None, only |
| |TidyDiagnostic|s in these files will be kept. |
| git_repo_base: if not None, only files in the given directory will be kept. |
| All paths of the returned diagnostics will be made relative to |
| |git_repo_base|. |
| diags: diagnostics to transform/filter. |
| |
| Returns: |
| A sorted list of |TidyDiagnostic|s. |
| """ |
| result_diags = [] |
| total_diags = 0 |
| |
| for diag in diags: |
| total_diags += 1 |
| |
| if not diag.file_path: |
| # Things like |-DFOO=1 -DFOO=2| can trigger diagnostics ("oh no you're |
| # redefining |FOO| with a different value") in 'virtual' files; these |
| # receive no name in clang. |
| logging.info('Dropping diagnostic %r, since it has no associated file', |
| diag) |
| continue |
| |
| file_path = Path(diag.file_path) |
| if only_files and file_path not in only_files: |
| continue |
| |
| if git_repo_base: |
| if git_repo_base not in file_path.parents: |
| continue |
| diag = diag.normalize_paths_to(git_repo_base) |
| |
| result_diags.append(diag) |
| |
| logging.info('Dropped %d/%d diags', total_diags - len(result_diags), |
| total_diags) |
| |
| result_diags.sort() |
| return result_diags |
| |
| |
| def get_parser() -> commandline.ArgumentParser: |
| """Creates an argument parser for this script.""" |
| parser = commandline.ArgumentParser(description=__doc__) |
| parser.add_argument( |
| '--output', required=True, type='path', help='File to write results to.') |
| parser.add_argument( |
| '--git-repo-base', |
| type='path', |
| help="Base directory of the git repo we're looking at. If specified, " |
| 'only diagnostics in files in this directory will be emitted. All ' |
| 'diagnostic file paths will be made relative to this directory.') |
| parser.add_argument('--board', required=True, help='Board to run under.') |
| parser.add_argument( |
| '--package', |
| action='append', |
| required=True, |
| help='Package(s) to build and lint. Required.') |
| parser.add_argument( |
| '--keep-lint-dirs', |
| action='store_true', |
| help='Keep directories with tidy lints around; meant primarily for ' |
| 'debugging.') |
| parser.add_argument( |
| '--nonfatal-parse-errors', |
| action='store_true', |
| help="Keep going even if clang-tidy's output is impossible to parse.") |
| parser.add_argument( |
| 'file', |
| nargs='*', |
| type='path', |
| help='File(s) to output lints for. If none are specified, this tool ' |
| 'outputs all lints that clang-tidy emits after applying filtering ' |
| 'from |--git-repo-base|, if applicable.') |
| return parser |
| |
| |
| def main(argv: List[str]) -> None: |
| cros_build_lib.AssertInsideChroot() |
| parser = get_parser() |
| opts = parser.parse_args(argv) |
| opts.Freeze() |
| |
| only_files = {Path(f).resolve() for f in opts.file} |
| |
| git_repo_base = opts.git_repo_base |
| if git_repo_base: |
| git_repo_base = Path(opts.git_repo_base) |
| if not (git_repo_base / '.git').exists(): |
| # This script doesn't strictly care if there's a .git dir there; more of |
| # a smoke check. |
| parser.error(f'Given git repo base ({git_repo_base}) has no .git dir') |
| |
| package_ebuilds = [ |
| portage_util.EBuild(x) |
| for x in resolve_package_ebuilds(opts.board, opts.package) |
| ] |
| |
| setup_tidy(opts.board, package_ebuilds) |
| lints = filter_tidy_lints( |
| only_files, |
| git_repo_base, |
| diags=run_tidy(opts.board, package_ebuilds, opts.keep_lint_dirs, |
| opts.nonfatal_parse_errors)) |
| |
| osutils.WriteFile( |
| opts.output, |
| json.dumps({'tidy_diagnostics': [x.to_dict() for x in lints]}), |
| atomic=True) |