sbom_info_lib: add support for license.json

ChromeOS moved from license.yaml to license.json.

We still support both in the install hook for now, because we may
install binary packages that were built before the license.json switch.

BUG=b/307995090
TEST=presubmit
RELEASE_NOTE=None

Change-Id: I74ff4fb16fdcc4d33b2ec276ba0e6471aea4391d
Reviewed-on: https://cos-review.googlesource.com/c/third_party/platform/crosutils/+/64715
Tested-by: Cusky Presubmit Bot <presubmit@cos-infra-prod.iam.gserviceaccount.com>
Reviewed-by: He Gao <hegao@google.com>
diff --git a/hooks/install/sbom_info_lib/licenses.py b/hooks/install/sbom_info_lib/licenses.py
index d430a0a..f219748 100644
--- a/hooks/install/sbom_info_lib/licenses.py
+++ b/hooks/install/sbom_info_lib/licenses.py
@@ -12,8 +12,9 @@
 
 # This script is used to parse licenses of a package.
 
-import re
+import json
 import os
+import re
 from chromite.lib import cros_build_lib
 from chromite.licensing import licenses_lib
 from sbom_info_lib import license_data
@@ -131,6 +132,35 @@
     return found, saved_scanned_txt, saved_license_files
 
 
+# Parse license.json.
+def parse_license_json(data, pkg_name):
+    parsed_data = json.loads(data)
+    # Try to find scanned license text in license.json.
+    saved_scanned_txt = ", ".join(parsed_data["license_text_scanned"])
+    saved_license_files = []
+    license_file_match = re.findall(
+        REGEX_LICENSE_FILE_NAME, saved_scanned_txt, re.DOTALL
+    )
+    for n in license_file_match:
+        saved_license_files.append(n.strip())
+
+    # Try to find scanned license names in license.json.
+    found = []
+    licenses = parsed_data["license_names"]
+    for license in licenses:
+        # Being in the public domain is not a license.
+        if not license or license == "public-domain" or license == "metapackage":
+            continue
+        if license in LICENSE_MAP:
+            license = LICENSE_MAP[license]
+        found.append(license)
+    # There are cases where license.yaml contains no license
+    # but only sanned license text e.g. dev-libs/libpcre.
+    if not found and saved_license_files:
+        found.append(pkg_name)
+    return found, saved_scanned_txt, saved_license_files
+
+
 def extract_other_licenses(licenses, src_path, saved_scanned_txt, saved_license_files):
     # other_license_list format: [
     # {
@@ -262,12 +292,20 @@
 
 
 def get_licenses(build_info_dir, src_path, pkg_name):
-    if not os.path.exists(os.path.join(build_info_dir, "license.yaml")):
+    has_yaml = os.path.exists(os.path.join(build_info_dir, "license.yaml"))
+    has_json = os.path.exists(os.path.join(build_info_dir, "license.json"))
+    if not has_yaml and not has_json:
         return ""
-    with open(os.path.join(build_info_dir, "license.yaml"), "r") as l:
-        licenses, saved_scanned_txt, saved_license_files = parse_license_yaml(
-            l.read(), pkg_name
-        )
+    if has_json:
+        with open(os.path.join(build_info_dir, "license.json"), "r") as l:
+            licenses, saved_scanned_txt, saved_license_files = parse_license_json(
+                l.read(), pkg_name
+            )
+    elif has_yaml:
+        with open(os.path.join(build_info_dir, "license.yaml"), "r") as l:
+            licenses, saved_scanned_txt, saved_license_files = parse_license_yaml(
+                l.read(), pkg_name
+            )
 
     other_license_list = extract_other_licenses(
         licenses, src_path, saved_scanned_txt, saved_license_files