LAKITU: Revert "scripts: cros_extract_deps: drop cpe support"

This reverts commit a994fc1a7c1ceb5ad05683d7cc39187f06c165dd.

COS still depends on generated CPE files.

BUG=b/307995090
TEST=presubmit
RELEASE_NOTE=None

Change-Id: I3f82ce5a4be7789d2e2ddbfcd234be7f1c825857
diff --git a/scripts/cros_extract_deps.py b/scripts/cros_extract_deps.py
index 59cfaa4..42690e1 100644
--- a/scripts/cros_extract_deps.py
+++ b/scripts/cros_extract_deps.py
@@ -9,16 +9,19 @@
 
 from __future__ import absolute_import
 
+import logging
 import sys
 
 from chromite.lib import build_target_lib
 from chromite.lib import commandline
+from chromite.lib import cros_build_lib
+from chromite.lib import sysroot_lib
 from chromite.lib.depgraph import DepGraphGenerator
 from chromite.lib.parser import package_info
 from chromite.utils import pformat
 
 
-def FlattenDepTree(deptree, pkgtable=None, parentcpv=None):
+def FlattenDepTree(deptree, pkgtable=None, parentcpv=None, get_cpe=False):
     """Simplify dependency json.
 
     Turn something like this (the parallel_emerge DepsTree format):
@@ -61,6 +64,7 @@
         deptree: The dependency tree.
         pkgtable: The package table to update. If None, create a new one.
         parentcpv: The parent CPV.
+        get_cpe: If set True, include CPE in the flattened dependency tree.
 
     Returns:
         A flattened dependency tree.
@@ -77,8 +81,15 @@
                 "category": pkg_info.category,
                 "version": pkg_info.vr,
                 "full_name": cpv,
+                "cpes": [],
                 "action": record["action"],
             }
+            if get_cpe:
+                pkgtable[cpv]["cpes"].extend(
+                    GetCPEFromCPV(
+                        pkg_info.category, pkg_info.package, pkg_info.version
+                    )
+                )
 
         # If we have a parent, that is a rev_dep for the current package.
         if parentcpv:
@@ -87,13 +98,129 @@
         for childcpv in record["deps"]:
             pkgtable[cpv]["deps"].append(childcpv)
         # Visit the subtree recursively as well.
-        FlattenDepTree(record["deps"], pkgtable=pkgtable, parentcpv=cpv)
+        FlattenDepTree(
+            record["deps"], pkgtable=pkgtable, parentcpv=cpv, get_cpe=get_cpe
+        )
         # Sort 'deps' & 'rev_deps' alphabetically to make them more readable.
         pkgtable[cpv]["deps"].sort()
         pkgtable[cpv]["rev_deps"].sort()
     return pkgtable
 
 
+def GetCPEFromCPV(category, package, version):
+    """Look up the CPE for a specified Portage package.
+
+    Args:
+        category: The Portage package's category, e.g. "net-misc"
+        package: The Portage package's name, e.g. "curl"
+        version: The Portage version, e.g. "7.30.0"
+
+    Returns:
+        A list of CPE Name strings, e.g.
+        ["cpe:/a:curl:curl:7.30.0", "cpe:/a:curl:libcurl:7.30.0"]
+    """
+    equery_cmd = ["equery", "m", "-U", "%s/%s" % (category, package)]
+    lines = cros_build_lib.run(
+        equery_cmd, check=False, print_cmd=False, stdout=True, encoding="utf-8"
+    ).stdout.splitlines()
+    # Look for lines like "Remote-ID:   cpe:/a:kernel:linux-pam ID: cpe"
+    # and extract the cpe URI.
+    cpes = []
+    for line in lines:
+        if "ID: cpe" not in line:
+            continue
+        cpes.append("%s:%s" % (line.split()[1], version.replace("_", "")))
+    # Note that we're assuming we can combine the root of the CPE, taken
+    # from metadata.xml, and tack on the version number as used by
+    # Portage, and come up with a legitimate CPE. This works so long as
+    # Portage and CPE agree on the precise formatting of the version
+    # number, which they almost always do. The major exception we've
+    # identified thus far is that our ebuilds have a pattern of inserting
+    # underscores prior to patchlevels, that neither upstream nor CPE
+    # use. For example, our code will decide we have
+    # cpe:/a:todd_miller:sudo:1.8.6_p7 yet the advisories use a format
+    # like cpe:/a:todd_miller:sudo:1.8.6p7, without the underscore. (CPE
+    # is "right" in this example, in that it matches www.sudo.ws.)
+    #
+    # Removing underscores seems to improve our chances of correctly
+    # arriving at the CPE used by NVD. However, at the end of the day,
+    # ebuild version numbers are rev'd by people who don't have "try to
+    # match NVD" as one of their goals, and there is always going to be
+    # some risk of minor formatting disagreements at the version number
+    # level, if not from stray underscores then from something else.
+    #
+    # This is livable so long as you do some fuzzy version number
+    # comparison in your vulnerability monitoring, between what-we-have
+    # and what-the-advisory-says-is-affected.
+    return cpes
+
+
+def GenerateCPEList(deps_list, sysroot):
+    """Generate all CPEs for the packages included in deps_list and SDK packages
+
+    Args:
+        deps_list: A flattened dependency tree (cros_extract_deps format).
+        sysroot: The board directory to use when finding SDK packages.
+
+    Returns:
+        A list of CPE info for packages in deps_list and SDK packages, e.g.
+        [
+            {
+                "ComponentName": "app-admin/sudo",
+                "Repository": "cros",
+                "Targets": [
+                    "cpe:/a:todd_miller:sudo:1.8.19p2"
+                ]
+            },
+            {
+                "ComponentName": "sys-libs/glibc",
+                "Repository": "cros",
+                "Targets": [
+                    "cpe:/a:gnu:glibc:2.23"
+                ]
+            }
+        ]
+    """
+    cpe_dump = []
+
+    # Generate CPEs for SDK packages.
+    for pkg_info in sorted(
+        sysroot_lib.get_sdk_provided_packages(sysroot), key=lambda x: x.cpvr
+    ):
+        # Only add CPE for SDK CPVs missing in deps_list.
+        if deps_list.get(pkg_info.cpvr) is not None:
+            continue
+
+        cpes = GetCPEFromCPV(
+            pkg_info.category, pkg_info.package, pkg_info.version
+        )
+        if cpes:
+            cpe_dump.append(
+                {
+                    "ComponentName": "%s" % pkg_info.atom,
+                    "Repository": "cros",
+                    "Targets": sorted(cpes),
+                }
+            )
+        else:
+            logging.warning("No CPE entry for %s", pkg_info.cpvr)
+
+    # Generate CPEs for packages in deps_list.
+    for cpv, record in sorted(deps_list.items()):
+        if record["cpes"]:
+            name = "%s/%s" % (record["category"], record["name"])
+            cpe_dump.append(
+                {
+                    "ComponentName": name,
+                    "Repository": "cros",
+                    "Targets": sorted(record["cpes"]),
+                }
+            )
+        else:
+            logging.warning("No CPE entry for %s", cpv)
+    return sorted(cpe_dump, key=lambda k: k["ComponentName"])
+
+
 def ParseArgs(argv):
     """Parse command line arguments."""
     parser = commandline.ArgumentParser(description=__doc__)
@@ -102,6 +229,12 @@
     target.add_argument("--board", help="Board name.")
 
     parser.add_argument(
+        "--format",
+        default="deps",
+        choices=["deps", "cpe"],
+        help="Output either traditional deps or CPE-only JSON.",
+    )
+    parser.add_argument(
         "--output-path", default=None, help="Write output to the given path."
     )
     parser.add_argument("pkgs", nargs="*")
@@ -131,6 +264,7 @@
 def ExtractDeps(
     sysroot,
     package_list,
+    formatting="deps",
     include_bdepend=True,
     backtrack=True,
 ):
@@ -147,6 +281,9 @@
             PORTAGE_CONFIGROOT.
         package_list: the list of packages (CP string) to extract their
             dependencies from.
+        formatting: can either be 'deps' or 'cpe'. For 'deps', see the return
+            format in docstring of FlattenDepTree, for 'cpe', see the return
+            format in docstring of GenerateCPEList.
         include_bdepend: Controls whether BDEPEND packages that would be
             installed to BROOT (usually "/" instead of ROOT) are included in the
             output.
@@ -156,7 +293,7 @@
             error instead of trying other candidates.
 
     Returns:
-        A JSON-izable object.
+        A JSON-izable object that either follows 'deps' or 'cpe' format.
     """
     lib_argv = ["--quiet", "--pretend", "--emptytree"]
     if include_bdepend:
@@ -172,7 +309,9 @@
     deps_tree, _deps_info, bdeps_tree = deps.GenDependencyTree()
     trees = (deps_tree, bdeps_tree)
 
-    flattened_trees = tuple(FlattenDepTree(x) for x in trees)
+    flattened_trees = tuple(
+        FlattenDepTree(tree, get_cpe=(formatting == "cpe")) for tree in trees
+    )
 
     # Workaround: since emerge doesn't honor the --emptytree flag, for now we
     # need to manually filter out packages that are obsolete (meant to be
@@ -182,6 +321,10 @@
     for tree in flattened_trees:
         FilterObsoleteDeps(tree)
 
+    if formatting == "cpe":
+        flattened_trees = tuple(
+            GenerateCPEList(tree, sysroot) for tree in flattened_trees
+        )
     return flattened_trees
 
 
diff --git a/scripts/cros_extract_deps_unittest.py b/scripts/cros_extract_deps_unittest.py
index a063d58..82cec7a 100644
--- a/scripts/cros_extract_deps_unittest.py
+++ b/scripts/cros_extract_deps_unittest.py
@@ -51,6 +51,7 @@
                 "name": "blasterpistol",
                 "deps": [],
                 "action": "merge",
+                "cpes": [],
                 "full_name": "weapon/blasterpistol-2.1",
             },
             "deathstar/darthvader-2.3": {
@@ -64,6 +65,7 @@
                     "deathstar/trooper-1.2",
                 ],
                 "action": "merge",
+                "cpes": [],
                 "full_name": "deathstar/darthvader-2.3",
             },
             "deathstar/pilot-2.3": {
@@ -73,6 +75,7 @@
                 "name": "pilot",
                 "deps": [],
                 "action": "merge",
+                "cpes": [],
                 "full_name": "deathstar/pilot-2.3",
             },
             "deathstar/commander-2.3": {
@@ -82,6 +85,7 @@
                 "name": "commander",
                 "deps": [],
                 "action": "merge",
+                "cpes": [],
                 "full_name": "deathstar/commander-2.3",
             },
             "deathstar/trooper-1.2": {
@@ -91,9 +95,36 @@
                 "name": "trooper",
                 "deps": ["weapon/blasterpistol-2.1"],
                 "action": "merge",
+                "cpes": [],
                 "full_name": "deathstar/trooper-1.2",
             },
         }
         self.assertEqual(
             cros_extract_deps.FlattenDepTree(dep_tree), flatten_dep_tree
         )
+
+
+class GetCPEFromCPVTest(cros_test_lib.RunCommandTestCase):
+    """Tests for cros_extract_deps.GetCPEFromCPV."""
+
+    def testGetCPEFromCPV(self):
+        """Check GetCPEFromCPV behavior."""
+        stdout = """Remote-ID:   cpe:/a:curl:curl ID: cpe
+Remote-ID:   cpe:/a:curl:libcurl ID: cpe
+Remote-ID:   cpe:/a:haxx:curl ID: cpe
+Remote-ID:   cpe:/a:haxx:libcurl ID: cpe
+Homepage:    https://curl.haxx.se/
+License:     MIT
+"""
+        self.rc.AddCmdResult(
+            ["equery", "m", "-U", "net-misc/curl"], stdout=stdout
+        )
+        self.assertEqual(
+            [
+                "cpe:/a:curl:curl:7.3.0",
+                "cpe:/a:curl:libcurl:7.3.0",
+                "cpe:/a:haxx:curl:7.3.0",
+                "cpe:/a:haxx:libcurl:7.3.0",
+            ],
+            cros_extract_deps.GetCPEFromCPV("net-misc", "curl", "7.3.0"),
+        )