llvm_tools: add lint_llvm_patches.py

This checks for simple errors with PATCHES.json, and gets run during
presubmits.

BUG=b:406239688
TEST=Ran the script with:
TEST=  - malformed JSON
TEST=  - unused patch files
TEST=  - nonexistent patch files
TEST=Also, checked `repo upload` output to verify the check gets run.

Change-Id: Ic5a545855a11f6f2eed4b3029a50f116f814edad
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/6394156
Reviewed-by: Jordan Abrahams-Whitehead <ajordanr@google.com>
Tested-by: George Burgess <gbiv@chromium.org>
Commit-Queue: George Burgess <gbiv@chromium.org>
diff --git a/llvm_tools/lint_llvm_patches.py b/llvm_tools/lint_llvm_patches.py
new file mode 100644
index 0000000..71e4656
--- /dev/null
+++ b/llvm_tools/lint_llvm_patches.py
@@ -0,0 +1,107 @@
+# Copyright 2025 The ChromiumOS Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""A script used to lint patches in llvm_patches/.
+
+Meant to be called from presubmit hooks in toolchain-utils.
+"""
+
+import argparse
+import json
+import logging
+import os
+from pathlib import Path
+import sys
+from typing import List
+
+from cros_utils import cros_paths
+from llvm_tools import patch_utils
+
+
+def load_patches_json(llvm_patches: Path) -> List[patch_utils.PatchEntry]:
+    patches_json = (
+        llvm_patches / cros_paths.DEFAULT_PATCHES_PATH_IN_TOOLCHAIN_UTILS.name
+    )
+    try:
+        with patches_json.open(encoding="utf-8") as f:
+            return patch_utils.json_to_patch_entries(
+                workdir=llvm_patches, json_fd=f
+            )
+    except FileNotFoundError:
+        sys.exit(f"error: PATCHES.json not found at {patches_json}")
+    except json.JSONDecodeError:
+        sys.exit(f"error: PATCHES.json at {patches_json} is ill-formed")
+
+
+def extract_all_patch_paths_from_patches_json(
+    patches_json: List[patch_utils.PatchEntry],
+) -> List[Path]:
+    return [x.workdir / x.rel_patch_path for x in patches_json]
+
+
+def find_all_patch_files_in(base_dir: Path) -> List[Path]:
+    results = []
+    for root, _, files in os.walk(base_dir):
+        proot = Path(root)
+        for file in files:
+            if file.endswith(".patch"):
+                results.append(proot / file)
+    return results
+
+
+def main(argv: List[str]) -> None:
+    toolchain_utils = cros_paths.script_toolchain_utils_root()
+
+    logging.basicConfig(
+        format=">> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: "
+        "%(message)s",
+        level=logging.INFO,
+    )
+
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    # No args to parse for the moment, but responding to `--help` is nice.
+    _ = parser.parse_args(argv)
+
+    llvm_patches = (
+        toolchain_utils
+        / cros_paths.DEFAULT_PATCHES_PATH_IN_TOOLCHAIN_UTILS.parent
+    )
+    referenced_paths = set(
+        extract_all_patch_paths_from_patches_json(
+            load_patches_json(llvm_patches)
+        )
+    )
+    available_paths = set(find_all_patch_files_in(llvm_patches))
+
+    fail = False
+    refed_but_not_available = referenced_paths - available_paths
+    if refed_but_not_available:
+        fail = True
+        print(
+            "Patches are referenced from PATCHES.json, but not present:",
+            file=sys.stderr,
+        )
+        for p in sorted(refed_but_not_available):
+            print(f"  - {p}", file=sys.stderr)
+
+    available_but_not_refed = available_paths - referenced_paths
+    if available_but_not_refed:
+        fail = True
+        print(
+            "Patches are present, but not referenced from PATCHES.json:",
+            file=sys.stderr,
+        )
+        for p in sorted(available_but_not_refed):
+            print(f"  - {p}", file=sys.stderr)
+
+    if fail:
+        sys.exit(1)
+
+    print(
+        "All looks good! PATCHES.json parses, and present `.patch` files all "
+        "correspond to it."
+    )
diff --git a/py/bin/llvm_tools/lint_llvm_patches.py b/py/bin/llvm_tools/lint_llvm_patches.py
new file mode 120000
index 0000000..0f1ca49
--- /dev/null
+++ b/py/bin/llvm_tools/lint_llvm_patches.py
@@ -0,0 +1 @@
+../../../python_wrapper.py
\ No newline at end of file
diff --git a/toolchain_utils_githooks/check-presubmit.py b/toolchain_utils_githooks/check-presubmit.py
index 3919ad8..724ee34 100755
--- a/toolchain_utils_githooks/check-presubmit.py
+++ b/toolchain_utils_githooks/check-presubmit.py
@@ -82,6 +82,9 @@
     "toolchain_utils_githooks/check-presubmit.py",
 )
 
+# Path to the script that lints changes to ${toolchain_utils}/llvm_patches.
+LINT_LLVM_PATCHES_SCRIPT = "llvm_tools/lint_llvm_patches.py"
+
 
 def run_command_unchecked(
     command: Command,
@@ -1068,6 +1071,35 @@
     ]
 
 
+def files_that_modify_patches_checks(
+    toolchain_utils_root: str, files: List[str]
+) -> List[str]:
+    llvm_patches = os.path.join(toolchain_utils_root, "llvm_patches/")
+    return [
+        x
+        for x in files
+        if x.startswith(llvm_patches) or x.endswith(LINT_LLVM_PATCHES_SCRIPT)
+    ]
+
+
+def check_patches_subdir(
+    toolchain_utils_root: str,
+    _thread_pool: multiprocessing.pool.ThreadPool,
+    _files: Iterable[str],
+) -> CheckResult:
+    check_script = (
+        Path(toolchain_utils_root) / "py" / "bin" / LINT_LLVM_PATCHES_SCRIPT
+    )
+    return_code, stdstreams = run_command_unchecked(
+        [check_script], cwd=toolchain_utils_root
+    )
+    return CheckResult(
+        ok=return_code == 0,
+        output=stdstreams,
+        autofix_commands=[],
+    )
+
+
 def main(argv: List[str]) -> int:
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument(
@@ -1172,6 +1204,9 @@
         ),
     ]
 
+    if x := files_that_modify_patches_checks(toolchain_utils_root, files):
+        checks.append(("check_patches_subdir", check_patches_subdir, x))
+
     # NOTE: As mentioned above, checks can block on threads they spawn in this
     # pool, so we need at least len(checks)+1 threads to avoid deadlock. Use *2
     # so all checks can make progress at a decent rate.