Update install hook for SBOM pkg info

After ChromeOS rebase, several ebuild files
have been updated and the hook cannot find
the download url for them. This CL adds
some allowed env vars and marks packages
with license "metapackage" as virtual packages.

BUG=b/254334533
TEST=presubmit
RELEASE_NOTE=None

Change-Id: I226fdfc343211a7b2df7852a9332fdc90198702d
Reviewed-on: https://cos-review.googlesource.com/c/third_party/platform/crosutils/+/41414
Tested-by: Cusky Presubmit Bot <presubmit@cos-infra-prod.iam.gserviceaccount.com>
Reviewed-by: Robert Kolchmeyer <rkolchmeyer@google.com>
diff --git a/hooks/install/gen-sbom-package-info.py b/hooks/install/gen-sbom-package-info.py
index 05bce2a..29bdecd 100755
--- a/hooks/install/gen-sbom-package-info.py
+++ b/hooks/install/gen-sbom-package-info.py
@@ -56,7 +56,12 @@
         category = f.read().strip()
     with open(os.path.join(build_info_dir, "PF"), "r") as f:
         pf = f.read().strip()
-    return repository, category, pf
+    license_path = os.path.join(build_info_dir, "LICENSE")
+    license = ""
+    if os.path.exists(license_path):
+        with open(license_path, "r") as f:
+            license = f.read().strip()
+    return repository, category, pf, license
 
 
 def main():
@@ -67,13 +72,13 @@
     try:
         package_name = os.path.basename(package_dir)
         ebuild = os.path.join(build_info_dir, package_name + ".ebuild")
-        repository, category, pf = read_build_info(build_info_dir)
+        repository, category, pf, license = read_build_info(build_info_dir)
         if "private" in repository:
             # Skip private packages.
             private = True
             return
         sbom_pkg_info.download_url = download_url.get_download_url(
-            ebuild, repository, category, pf
+            ebuild, repository, category, pf, license
         )
         sbom_pkg_info.licenses = licenses.get_licenses(build_info_dir)
         sbom_pkg_info.go_dep = go_dep.get_go_dep(
diff --git a/hooks/install/sbom_info_lib/download_url.py b/hooks/install/sbom_info_lib/download_url.py
index 37b3999..947f042 100644
--- a/hooks/install/sbom_info_lib/download_url.py
+++ b/hooks/install/sbom_info_lib/download_url.py
@@ -48,12 +48,15 @@
     "CROS_WORKON_REPO",
     "GNOME_ORG_MODULE",
     "GRUB2_COMMIT",
+    "MY_CODE_VER",
+    "MY_DATA_VER",
 }
 # For packages whose package names are hard to parse or not defined in ebuilds.
 PN_REPLACE_DICT = {
     "Locale-gettext": lambda x: "gettext",
     "systemd": lambda x: "systemd-stable" if "." in x else "systemd",
-    "perf": lambda x: "patch",
+    "perf": lambda x: "linux",
+    "gcc-libs": lambda x: "gcc",
 }
 SRC_URI_VARS = ["SRC_URI", "EGIT_REPO_URI"]
 COMMIT_VARS = ["GIT_COMMIT", "EGIT_COMMIT", "LLVM_HASH"]
@@ -73,10 +76,12 @@
 REGEX_FIND_STRING = '"(.*?)"'
 # REGEX_EBUILD_REPLACE finds ebuild replacement string `(ver_rs 1- some_string)`.
 REGEX_EBUILD_REPLACE = "\$\(ver_rs 1- (.*?)\)"
+REGEX_INLINE_COMMENT = "( #[^\n]*?)\n"
 GNOME_PN = "GNOME_ORG_MODULE"
 GO_SOURCE = "CROS_GO_SOURCE"
 CROS_REPO = "CROS_WORKON_REPO"
 CROS_COMMIT = "CROS_WORKON_COMMIT"
+LLVM_COMMIT = "LLVM_HASH"
 CROS_PROJECT = "CROS_WORKON_PROJECT"
 CROS_SUBTREE = "CROS_WORKON_SUBTREE"
 CROS_GIT_HOST_URL = "https://chromium.googlesource.com"
@@ -221,12 +226,13 @@
     return p_name, {f"{p}.tar.gz", f"{p}.tar.xz", f"{p}.tgz", f"{p}.xz"}
 
 
-def search_pkg_from_gob(repository, category, p_name):
+def search_pkg_from_gob(repository, category, p_name, license):
     pkg = f"{category}/{p_name}"
     if (
         pkg in PACKAGES_FROM_GOB
         or category == "virtual"
         or repository == "eclass-overlay"
+        or license == "metapackage"
     ):
         uri = os.path.join(GOB_REPO_DICT[repository], pkg)
         if is_uri_valid(uri):
@@ -278,6 +284,10 @@
     gcs_names = set()
     with open(ebuild) as eb:
         content = eb.read()
+        # remove inline comments.
+        match = re.findall(REGEX_INLINE_COMMENT, content, re.DOTALL)
+        for m in match:
+            content = content.replace(m, "")
         env_set = parse_vars_in_ebuild(content)
         cros_uri = find_cros_uri()
         for keyword in KEYWORDS_FOR_MODULE_VERSION:
@@ -287,11 +297,8 @@
         gnome_pn = os.getenv(GNOME_PN)
         if gnome_pn:
             gcs_names.add(f'{gnome_pn}-{os.getenv("PV")}.tar.xz')
-        gcs_names_src = get_gcs_name_from_src_uri(REGEX_SRC_URI, content)
-        if gcs_names_src:
-            gcs_names.update(gcs_names_src)
-        else:
-            gcs_names.update(get_gcs_name_from_src_uri(REGEX_SRC_URI_PLUS, content))
+        gcs_names.update(get_gcs_name_from_src_uri(REGEX_SRC_URI, content))
+        gcs_names.update(get_gcs_name_from_src_uri(REGEX_SRC_URI_PLUS, content))
         return env_set, cros_uri, gcs_names
 
 
@@ -345,6 +352,10 @@
         if commit:
             return f"{homepage}@{commit}"
         return homepage
+    # Special case for sys-libs/libcxxabi.
+    if homepage == "http://libcxxabi.llvm.org/":
+        commit = os.getenv(LLVM_COMMIT)
+        return f"https://chromium.googlesource.com/external/github.com/llvm/llvm-project@{commit}"
     return ""
 
 
@@ -371,13 +382,13 @@
         os.environ[var] = ""
 
 
-def get_download_url(ebuild, repository, category, pf):
+def get_download_url(ebuild, repository, category, pf, license):
     if repository == "private-overlays":
         return ""
     os.environ["CROS_GIT_HOST_URL"] = CROS_GIT_HOST_URL
     os.environ["CROS_GIT_AOSP_URL"] = CROS_GIT_AOSP_URL
     p_name, gcs_names = parse_pkg_name(pf)
-    gob_res = search_pkg_from_gob(repository, category, p_name)
+    gob_res = search_pkg_from_gob(repository, category, p_name, license)
     if gob_res:
         return gob_res
     env_set, cros_uri, gcs_names_ebuild = parse_ebuild(ebuild)