Add SBOM info generation to install hook
It wirtes the info to file `sbom-pkg-info`
for each package merged.
Example output: https://paste.googleplex.com/5945476472897536
chromite/lib/gs.py cannot be used because the class
cannot be initialized due to permission denied when creating
cache dir.
BUG=b/254334533
TEST=presbumit
RELEASE_NOTE=None
Change-Id: Iaafc26f1d9726f41d342376c971955cf2dc7c68d
Reviewed-on: https://cos-review.googlesource.com/c/third_party/platform/crosutils/+/39267
Reviewed-by: Robert Kolchmeyer <rkolchmeyer@google.com>
Tested-by: Cusky Presubmit Bot <presubmit@cos-infra-prod.iam.gserviceaccount.com>
diff --git a/hooks/install/gen-sbom-package-info.py b/hooks/install/gen-sbom-package-info.py
new file mode 100755
index 0000000..59e5ffd
--- /dev/null
+++ b/hooks/install/gen-sbom-package-info.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+#
+# Copyright 2022 Google LLC
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# version 2 as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+
+# This script is used to automatically generate package
+# information for SBOM of COS image bundled dependencies.
+
+import os
+import sys
+from sbom_info_lib import download_url
+from sbom_info_lib import go_dep
+from sbom_info_lib import licenses
+
+SBOM_INFO_FILE_NAME = "sbom-pkg-info"
+
+
+class SbomPackageInfo:
+ def __init__(self, url, license, go_dep):
+ self.download_url = url
+ self.licenses = license
+ self.go_dep = go_dep
+
+ def write_to_build_info(self, build_info_dir):
+ with open(f"{build_info_dir}/{SBOM_INFO_FILE_NAME}", "w") as f:
+ f.write(f"download-url:{self.download_url}\n")
+ f.write(f"licenses:{self.licenses}\n")
+ f.write(f"go-dep:{self.go_dep}\n")
+
+
+class SBOMPkgInfoError(Exception):
+ def __init__(self, msg):
+ super().__init__(msg)
+
+
+def main():
+ package_dir = os.getenv("PORTAGE_BUILDDIR")
+ build_info_dir = os.path.join(package_dir, "build-info")
+ package_name = os.path.basename(package_dir)
+ ebuild = os.path.join(build_info_dir, package_name + ".ebuild")
+ url = download_url.get_download_url(build_info_dir, ebuild)
+ sbom_pkg_info = SbomPackageInfo(
+ url,
+ licenses.get_licenses(build_info_dir),
+ go_dep.get_go_dep(url, build_info_dir),
+ )
+ if not sbom_pkg_info.download_url and "private-overlays" not in ebuild:
+ raise SBOMPkgInfoError(f"download url not found")
+ if not sbom_pkg_info.licenses:
+ raise SBOMPkgInfoError(f"license not found")
+ sbom_pkg_info.write_to_build_info(build_info_dir)
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/hooks/install/sbom_info_lib/download_url.py b/hooks/install/sbom_info_lib/download_url.py
new file mode 100644
index 0000000..9df3912
--- /dev/null
+++ b/hooks/install/sbom_info_lib/download_url.py
@@ -0,0 +1,389 @@
+# Copyright 2022 Google LLC
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# version 2 as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+
+# get_download_url() in this script is used to
+# find download location for a COS package.
+
+import subprocess
+import re
+import os
+import requests
+
+
+CROS_GCS_MIRRORS = [
+ "gs://chromeos-mirror/gentoo/distfiles/",
+ "gs://chromeos-localmirror/distfiles/",
+]
+# An allow-list for variables parsed in an ebuild file.
+EBUILD_VARS = {
+ "MY_P",
+ "MY_PV",
+ "MY_PN",
+ "PARCH",
+ "SRC_PV",
+ "code_ver",
+ "RE2_VER",
+ "MODULE_VERSION",
+ "GIT_COMMIT",
+ "SRC_URI",
+ "EGIT_REPO_URI",
+ "EGIT_COMMIT",
+ "CROS_WORKON_COMMIT",
+ "CROS_WORKON_PROJECT",
+ "CROS_WORKON_SUBTREE",
+ "HOMEPAGE",
+ "CROS_GO_SOURCE",
+ "GN_X64_SHA1",
+ "LLVM_HASH",
+ "CROS_WORKON_REPO",
+ "GNOME_ORG_MODULE",
+}
+# For packages whose package names are hard to parse or not defined in ebuilds.
+PN_REPLACE_DICT = {
+ "Locale-gettext": lambda x: "gettext",
+ "systemd": lambda x: "systemd-stable" if "." in x else "systemd",
+ "perf": lambda x: "patch",
+}
+SRC_URI_VARS = ["SRC_URI", "EGIT_REPO_URI"]
+COMMIT_VARS = ["GIT_COMMIT", "EGIT_COMMIT", "LLVM_HASH"]
+# REGEX_STRING_VAR finds `var_name=var_value` and `var_name="var_value"` (no new line) in ebuilds.
+REGEX_STRING_VAR = '([^\n]*?)="?([^\n]*?)"?\n'
+# REGEX_ARRAY_VAR finds `var_name=("var_value1" "var_value2" ...)` (allow new lines) in ebuilds.
+REGEX_ARRAY_VAR = "([^\n]*?)=(\(.*?\))"
+# REGEX_SRC_URI finds `SRC_URI="uri1 uri2 ..."` (allow new lines) in ebuilds.
+REGEX_SRC_URI = 'SRC_URI="(.*?)"'
+# REGEX_SRC_URI_PLUS finds `SRC_URI+="uri1 uri2 ..."` (allow new lines) in ebuilds.
+REGEX_SRC_URI_PLUS = 'SRC_URI\+="(.*?)"'
+# REGEX_PKG_REVISION finds package revision like `-r12` in package full name.
+REGEX_PKG_REVISION = "-r[0-9]+$"
+# REGEX_PKG_REVISION finds package version like `-1` or `-1.2.3.4` in package full name.
+REGEX_PKG_VERSION = "-[0-9]+(\.[0-9]*)*"
+# REGEX_FIND_STRING finds string inside double quotes like "string1".
+REGEX_FIND_STRING = '"(.*?)"'
+# REGEX_EBUILD_REPLACE finds ebuild replacement string `(ver_rs 1- some_string)`.
+REGEX_EBUILD_REPLACE = "\$\(ver_rs 1- (.*?)\)"
+GNOME_PN = "GNOME_ORG_MODULE"
+GO_SOURCE = "CROS_GO_SOURCE"
+CROS_REPO = "CROS_WORKON_REPO"
+CROS_COMMIT = "CROS_WORKON_COMMIT"
+CROS_PROJECT = "CROS_WORKON_PROJECT"
+CROS_SUBTREE = "CROS_WORKON_SUBTREE"
+CROS_GIT_HOST_URL = "https://chromium.googlesource.com"
+CROS_GIT_AOSP_URL = "https://android.googlesource.com"
+CROS_HOMEPAGE = "HOMEPAGE"
+GOB_REPO_DICT = {
+ "project-lakitu": "https://cos.googlesource.com/cos/overlays/board-overlays/+/master/project-lakitu/",
+ "chromiumos-overlay": "https://cos.googlesource.com/third_party/overlays/chromiumos-overlay/+/master/",
+ "portage-stable": "https://cos.googlesource.com/third_party/overlays/portage-stable/+/master/",
+ "eclass-overlay": "https://cos.googlesource.com/third_party/overlays/eclass-overlay/+/master/",
+}
+# Packages that use `MODULE_VERSION` as package version.
+KEYWORDS_FOR_MODULE_VERSION = ["dev-perl", "perl-core"]
+PACKAGES_FROM_GOB = {
+ # portage-stable
+ "dev-util/meson-format-array",
+ "sys-devel/autoconf-wrapper",
+ "sys-devel/automake-wrapper",
+ "dev-python/namespace-zope",
+ # project-lakitu
+ "app-admin/cgroup-helper",
+ "app-admin/extensions-manager",
+ "app-admin/kdump-helper",
+ "app-admin/stackdriver",
+ "app-admin/toolbox-config",
+ "app-emulation/cloud-init-config",
+ "chromeos-base/chromeos-auth-config-lakitu",
+ "chromeos-base/chromeos-base",
+ "chromeos-base/chromeos-bsp-lakitu-common",
+ "chromeos-base/chromeos-firewall-init-lakitu",
+ "chromeos-base/chromeos-init-systemd",
+ "chromeos-base/cloud-audit-config",
+ "chromeos-base/cloud-filesystem-init",
+ "chromeos-base/cloud-network-init",
+ "net-misc/chrony-config",
+ "sys-apps/loadpin-trigger",
+ "sys-apps/system-sysdaemons",
+ "sys-libs/lakitu-custom-locales",
+ # chromiumos-overlay
+ "chromeos-base/chromeos-ca-certificates",
+ "chromeos-base/chromeos-sshd-init",
+ "chromeos-base/tty",
+ "chromeos-base/update-policy-embedded",
+ "dev-util/glib-utils",
+ "chromeos-base/openssh-server-init",
+}
+
+
+def is_uri_valid(uri):
+ if not uri.strip().startswith("http"):
+ return False
+ request = requests.get(uri, stream=True)
+ if request.status_code == 200:
+ return True
+ return False
+
+
+def parse_var(s):
+ # avoid downloading packages.
+ parts = s.split("->")
+ if len(parts) > 1:
+ s = parts[0]
+ # do not evaluate commands.
+ if s.startswith("("):
+ s = f"'{s}'"
+ cmd = f"echo {s}"
+ res = subprocess.run(
+ ["bash", "-c", cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE
+ )
+ if res.stderr:
+ return ""
+ return res.stdout.decode("utf-8").rstrip()
+
+
+# Parse an environment variable and return a list.
+def parse_var_from_env(key):
+ val = os.getenv(key)
+ if not val:
+ return []
+ if val.startswith("("):
+ res = []
+ match = re.findall(REGEX_FIND_STRING, val, re.DOTALL)
+ # in some cases, go src version cannot be parsed in array
+ # e.g. chromiumos-overlay/dev-go/protobuf
+ for m in match:
+ res.append(parse_var(m))
+ return res
+ return [val]
+
+
+def find_var_and_set_env(regex, content):
+ env_set = set()
+ match = re.findall(regex, content, re.DOTALL)
+ for m in match:
+ key = m[0].strip()
+ if key not in EBUILD_VARS:
+ continue
+ val = parse_var(m[1]).strip()
+ if val:
+ os.environ[key] = val
+ env_set.add(key)
+ return env_set
+
+
+def parse_vars_in_ebuild(content):
+ env_set = set()
+ # Replace ebuild replacement gramma with bash format.
+ match = re.findall(REGEX_EBUILD_REPLACE, content, re.DOTALL)
+ if match:
+ for m in match:
+ content = content.replace(f"$(ver_rs 1- {m})", f"${{PV//./{m}}}")
+ env_set.update(find_var_and_set_env(REGEX_STRING_VAR, content))
+ env_set.update(find_var_and_set_env(REGEX_ARRAY_VAR, content))
+ return env_set
+
+
+def parse_pkg_name(pf):
+ match = re.search(REGEX_PKG_REVISION, pf)
+ if match:
+ p = pf[: match.start()]
+ else:
+ p = pf
+ match = re.search(REGEX_PKG_VERSION, p)
+ pn = p[: match.start()]
+ p_name = pn
+ pv = p[match.start() + 1 :]
+ if pn in PN_REPLACE_DICT:
+ pn = PN_REPLACE_DICT[pn](pv)
+ p = f"{pn}-{pv}"
+ os.environ["PN"] = pn
+ os.environ["PV"] = pv
+ os.environ["P"] = p
+ # possbile package names in CROS GCS mirror buckets.
+ return p_name, {f"{p}.tar.gz", f"{p}.tar.xz", f"{p}.tgz", f"{p}.xz"}
+
+
+def search_pkg_from_gob(repository, category, p_name):
+ pkg = f"{category}/{p_name}"
+ if (
+ pkg in PACKAGES_FROM_GOB
+ or category == "virtual"
+ or repository == "eclass-overlay"
+ ):
+ uri = os.path.join(GOB_REPO_DICT[repository], pkg)
+ if is_uri_valid(uri):
+ return uri
+ return ""
+ return ""
+
+
+def find_cros_uri():
+ res = []
+ cros_repo = parse_var_from_env(CROS_REPO)
+ cros_proj = parse_var_from_env(CROS_PROJECT)
+ cros_subtree = parse_var_from_env(CROS_SUBTREE)
+ cros_commit = parse_var_from_env(CROS_COMMIT)
+ if not cros_repo:
+ cros_repo = [CROS_GIT_HOST_URL] * len(cros_proj)
+ if len(cros_proj) != len(cros_commit):
+ return res
+ for i in range(len(cros_proj)):
+ uri = os.path.join(cros_repo[i], cros_proj[i])
+ if not is_uri_valid(uri):
+ continue
+ if cros_subtree and cros_subtree[i]:
+ subtrees = cros_subtree[i].split(" ")
+ for subtree in subtrees:
+ res.append(f"{uri}@{cros_commit[i]}#{subtree}")
+ else:
+ res.append(f"{uri}@{cros_commit[i]}")
+ return res
+
+
+def get_gcs_name_from_src_uri(regex, content):
+ gcs_names = set()
+ match = re.findall(regex, content, re.DOTALL)
+ if match:
+ for src_uri_group in match:
+ for uri_line in src_uri_group.split("\n"):
+ for uri in uri_line.split(" "):
+ if uri == "->":
+ continue
+ gcs_names.add(os.path.basename(parse_var(uri)))
+ return gcs_names
+
+
+# Parse ebuild and set environment variables.
+# Find possible CROS gcs mirror package names,
+# and cros download url.
+def parse_ebuild(ebuild):
+ gcs_names = set()
+ with open(ebuild) as eb:
+ content = eb.read()
+ env_set = parse_vars_in_ebuild(content)
+ cros_uri = find_cros_uri()
+ for keyword in KEYWORDS_FOR_MODULE_VERSION:
+ if keyword in ebuild:
+ gcs_names.add(f'{os.getenv("PN")}-{os.getenv("MODULE_VERSION")}.tar.gz')
+ break
+ gnome_pn = os.getenv(GNOME_PN)
+ if gnome_pn:
+ gcs_names.add(f'{gnome_pn}-{os.getenv["PV"]}')
+ gcs_names_src = get_gcs_name_from_src_uri(REGEX_SRC_URI, content)
+ if gcs_names:
+ gcs_names.update(gcs_names_src)
+ else:
+ gcs_names.update(get_gcs_name_from_src_uri(REGEX_SRC_URI_PLUS, content))
+ return env_set, cros_uri, gcs_names
+
+
+def search_mirror_gcs(gcs_names):
+ for name in gcs_names:
+ name = name.replace("?", "%3f")
+ for bucket in CROS_GCS_MIRRORS:
+ link = os.path.join(bucket, name)
+ res = subprocess.run(
+ ["gsutil", "ls", link], stdout=subprocess.PIPE, stderr=subprocess.PIPE
+ )
+ if res.stderr:
+ continue
+ else:
+ return res.stdout.decode("utf-8").rstrip()
+ return ""
+
+
+def search_src_uri():
+ for uri_name in SRC_URI_VARS:
+ uri = os.getenv(uri_name)
+ if uri and is_uri_valid(uri):
+ for commit_name in COMMIT_VARS:
+ commit = os.getenv(commit_name)
+ if commit:
+ return f"{uri}@{commit}"
+ return uri
+ return ""
+
+
+def search_go_source(category):
+ res = []
+ go_src = parse_var_from_env(GO_SOURCE)
+ for src in go_src:
+ parts = src.split(" ")
+ if len(parts) == 2:
+ version = parts[1]
+ sources = parts[0].split(":")
+ for uri in sources:
+ uri = "https://" + uri
+ if is_uri_valid(uri):
+ res.append(f"{uri}@{version}")
+ break
+ return ",".join(res)
+
+
+def search_homepage():
+ homepage = os.getenv(CROS_HOMEPAGE)
+ if "chromium.googlesource.com" in homepage and is_uri_valid(homepage):
+ commit = os.getenv(CROS_COMMIT)
+ if commit:
+ return f"{homepage}@{commit}"
+ return homepage
+ return ""
+
+
+def search_download_location(gcs_names, category, cros_uri):
+ res = search_mirror_gcs(gcs_names)
+ if res:
+ return res
+ res = search_src_uri()
+ if res:
+ return res
+ if cros_uri:
+ return cros_uri
+ res = search_go_source(category)
+ if res:
+ return res
+ res = search_homepage()
+ if res:
+ return res
+ return ""
+
+
+def unset_env(env_set):
+ for var in env_set:
+ os.environ[var] = ""
+
+
+def read_build_info(build_info_dir):
+ with open(os.path.join(build_info_dir, "repository"), "r") as f:
+ repository = f.read().strip()
+ with open(os.path.join(build_info_dir, "CATEGORY"), "r") as f:
+ category = f.read().strip()
+ with open(os.path.join(build_info_dir, "PF"), "r") as f:
+ pf = f.read().strip()
+ return repository, category, pf
+
+
+def get_download_url(build_info_dir, ebuild):
+ repository, category, pf = read_build_info(build_info_dir)
+ if repository == "private-overlays":
+ return ""
+ os.environ["CROS_GIT_HOST_URL"] = CROS_GIT_HOST_URL
+ os.environ["CROS_GIT_AOSP_URL"] = CROS_GIT_AOSP_URL
+ p_name, gcs_names = parse_pkg_name(pf)
+ gob_res = search_pkg_from_gob(repository, category, p_name)
+ if gob_res:
+ return gob_res
+ env_set, cros_uri, gcs_names_ebuild = parse_ebuild(ebuild)
+ gcs_names.update(gcs_names_ebuild)
+ gcs_names.discard("")
+ res = search_download_location(gcs_names, category, cros_uri)
+ unset_env(env_set)
+ return res
diff --git a/hooks/install/sbom_info_lib/go_dep.py b/hooks/install/sbom_info_lib/go_dep.py
new file mode 100644
index 0000000..da05c9c
--- /dev/null
+++ b/hooks/install/sbom_info_lib/go_dep.py
@@ -0,0 +1,70 @@
+# Copyright 2022 Google LLC
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# version 2 as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+
+# This script is used to find go dependencies
+# of a go pacakge. It reads 'go.mod', 'vendor.mod'
+# or 'vendor.conf' in the source code.
+
+import os
+import re
+import subprocess
+import tarfile
+import requests
+
+# REGEX_GO_MOD_DEP finds
+# `require (
+# go-pkg1 v1.2.3
+# go-pkg2 v4.5.6 ...
+# )` in go.mod or other mod file.
+REGEX_GO_MOD_DEP = "require \((.*?)\)"
+GO_MOD_DEP_FILE = ["go.mod", "vendor.mod", "vendor.conf"]
+
+
+def download_src_code(url, build_info_dir):
+ filepath = os.path.join(build_info_dir, os.path.basename(url))
+ if url.startswith("gs://"):
+ subprocess.run(["gsutil", "cp", url, filepath])
+ else:
+ if url.startswith("https://github.com"):
+ url = f'{url.replace("@","/archive/")}.tar.gz'
+ else:
+ url = f'{url.replace("@","/+archive/").replace("#","/")}.tar.gz'
+ response = requests.get(url)
+ open(filepath, "wb").write(response.content)
+ return filepath
+
+
+def get_go_dep(download_url, build_info_dir):
+ res = set()
+ for url in download_url.split(","):
+ if url.endswith(".gn"):
+ continue
+ filepath = download_src_code(url, build_info_dir)
+ try:
+ t = tarfile.open(filepath, "r:gz")
+ for filename in t.getnames():
+ if os.path.basename(filename) not in GO_MOD_DEP_FILE:
+ continue
+ f = t.extractfile(filename)
+ content = f.read()
+ match = re.findall(REGEX_GO_MOD_DEP, content.decode("utf-8"), re.DOTALL)
+ for req in match:
+ deps = req.strip().split("\n")
+ for dep in deps:
+ # remove comments.
+ dep = dep.split("//")[0].strip()
+ if dep:
+ res.add(dep)
+ except:
+ print(f"{url} is not a .gz file.")
+ os.remove(filepath)
+ return ",".join(res)
diff --git a/hooks/install/sbom_info_lib/licenses.py b/hooks/install/sbom_info_lib/licenses.py
new file mode 100644
index 0000000..cbf379d
--- /dev/null
+++ b/hooks/install/sbom_info_lib/licenses.py
@@ -0,0 +1,71 @@
+# Copyright 2022 Google LLC
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# version 2 as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+
+# This script is used to parse licenses of a package.
+
+import re
+import os
+
+# Parse LICENSE is a ebuild.
+def parse_gentoo_license(line):
+ license_set = set()
+ use_or = False
+ res = ""
+ for e in line.strip().split(" "):
+ if e == "||":
+ use_or = True
+ elif e == "(":
+ if res:
+ res += "AND ("
+ else:
+ res += "("
+ elif e == ")":
+ res = f"{res[:-1]}) "
+ use_or = False
+ else:
+ license_set.add(e)
+ if not res or res.endswith("("):
+ res += f"{e} "
+ elif use_or:
+ res += f"OR {e} "
+ else:
+ res += f"AND {e} "
+ return res.strip(), license_set
+
+
+# If a license is in license.yaml but not LICENSE,
+# add it to the result.
+def parse_license_yaml(yaml, res, license_set):
+ lines = yaml.strip().split("\n")
+ idx = lines.index(" - license_names") + 1
+ match = re.findall("\{(.*?)\}", lines[idx], re.DOTALL)
+ if not match:
+ return res
+ found = []
+ for m in match:
+ for part in m.split(","):
+ found.append(part.split(":")[0])
+ for license in found:
+ license = license.strip()
+ if license and not license in license_set:
+ license_set.add(license)
+ res += f" AND {license}"
+ return res
+
+
+def get_licenses(build_info_dir):
+ if not os.path.exists(os.path.join(build_info_dir, "LICENSE")):
+ return ""
+ with open(os.path.join(build_info_dir, "LICENSE"), "r") as f:
+ res, license_set = parse_gentoo_license(f.read())
+ with open(os.path.join(build_info_dir, "license.yaml"), "r") as y:
+ return parse_license_yaml(y.read(), res, license_set)