cvetriager: webscraper: display cve description

The CVE description from the CVE site will be recorded and logged to add
more context when using the triaging tool.

BUG=chromium:1093363
TEST=python setup.py test

Change-Id: I3eff30cd2635e23b0f3a407b978be2d8621d3d3e
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/dev-util/+/2313436
Reviewed-by: Zubin Mithra <zsm@chromium.org>
Commit-Queue: Wanda Mora <morawand@chromium.org>
Tested-by: Wanda Mora <morawand@chromium.org>
diff --git a/contrib/cvetriager/cvelib/webscraper.py b/contrib/cvetriager/cvelib/webscraper.py
index c540f39..f1a84fd 100644
--- a/contrib/cvetriager/cvelib/webscraper.py
+++ b/contrib/cvetriager/cvelib/webscraper.py
@@ -18,7 +18,7 @@
 
 CVE_URL = 'https://cve.mitre.org/cgi-bin/cvename.cgi'
 KERNEL_ORG = 'git.kernel.org'
-KERNEL_PATH = '/cgit/linux/kernel/git/torvalds'
+KERNEL_PATH = ['/cgit/linux/kernel/git/torvalds', '/pub/scm/linux/kernel/git/torvalds/']
 GITHUB_COM = 'github.com'
 GITHUB_PATH = '/torvalds/linux/'
 
@@ -48,8 +48,40 @@
     return r
 
 
+def is_kernel_org(netloc, path):
+    """Check if is useful git.kernel.org link."""
+    if netloc != KERNEL_ORG:
+        return False
+
+    for link_path in KERNEL_PATH:
+        if path.startswith(link_path):
+            return True
+
+    return False
+
+
+def is_github_com(netloc, path):
+    """Check if is useful github.com link."""
+    return netloc == GITHUB_COM and path.startswith(GITHUB_PATH)
+
+
+def find_cve_description(cve_html):
+    """Returns given CVE's description."""
+    soup = BeautifulSoup(cve_html, 'html.parser')
+
+    tag = soup.find('div', attrs={'id': 'GeneratedTable'})
+
+    for t in tag.descendants:
+        if t.name == 'th' and t.text == 'Description':
+            description = t.parent.find_next_sibling().get_text()
+
+    return description.replace('\n', '')
+
+
 def find_commit_links(cve_html):
     """Returns commit links from given CVE's webpage."""
+    # TODO: Additional pattern to look for might be:
+    # https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-19076
     commits = []
     soup = BeautifulSoup(cve_html, 'html.parser')
 
@@ -61,10 +93,10 @@
             parsed_link = urlparse(link)
             netloc, path = parsed_link.netloc, parsed_link.path
 
-            if netloc == KERNEL_ORG and path.startswith(KERNEL_PATH):
+            if is_kernel_org(netloc, path):
                 commits.append(link)
 
-            elif netloc == GITHUB_COM and path.startswith(GITHUB_PATH):
+            elif is_github_com(netloc, path):
                 commits.append(link)
 
     return commits
@@ -84,13 +116,13 @@
 
     sha = None
 
-    if netloc == KERNEL_ORG and path.startswith(KERNEL_PATH):
+    if is_kernel_org(netloc, path):
         try:
             sha = parse_qs(parsed_link.query)['id'][0]
         except KeyError:
             LOGGER.error(f'Sha not found in {link}')
 
-    elif netloc == GITHUB_COM and path.startswith(GITHUB_PATH):
+    elif is_github_com(netloc, path):
         sha = os.path.basename(path)
 
     return sha if is_valid(sha) else None
@@ -101,6 +133,10 @@
     commits = set()
 
     req = make_cve_request(cve_number)
+
+    cve_description = find_cve_description(req.text)
+    LOGGER.info(f'CVE Description: {cve_description}')
+
     commit_links = find_commit_links(req.text)
 
     # Collects fix commit sha(s) from links.
diff --git a/contrib/cvetriager/tests/webscraper_test.py b/contrib/cvetriager/tests/webscraper_test.py
index 615ce1d..4921db3 100644
--- a/contrib/cvetriager/tests/webscraper_test.py
+++ b/contrib/cvetriager/tests/webscraper_test.py
@@ -5,6 +5,7 @@
 """Testing script for cvelib/webscraper.py."""
 
 import unittest
+from urllib.parse import urlparse
 
 from cvelib import webscraper
 
@@ -24,6 +25,13 @@
         f'https://github.com/torvalds/linux/commit/{SHA}'
     ]
 
+    CVE_DESCRIPTION = (
+        'The tcpmss_mangle_packet function in net/netfilter/xt_TCPMSS.c in the Linux kernel before '
+        '4.11, and 4.9.x before 4.9.36, allows remote attackers to cause a denial of service '
+        '(use-after-free and memory corruption) or possibly have unspecified other impact by '
+        'leveraging the presence of xt_TCPMSS in an iptables action.'
+    )
+
     def test_make_cve_request(self):
         """Tests that url request was made."""
         req = webscraper.make_cve_request(TestWebScraper.CVE_NUMBER)
@@ -33,6 +41,14 @@
         # Check if proper url was fetched.
         self.assertEqual(req.url, expected)
 
+    def test_find_cve_description(self):
+        """Tests that CVE description was returned."""
+        req = webscraper.make_cve_request(TestWebScraper.CVE_NUMBER)
+
+        description = webscraper.find_cve_description(req.text)
+
+        self.assertEqual(description, TestWebScraper.CVE_DESCRIPTION)
+
     def test_find_commit_links(self):
         """Tests that correct commit links were found."""
         req = webscraper.make_cve_request(TestWebScraper.CVE_NUMBER)
@@ -112,3 +128,24 @@
     def test_valid_sha(self):
         """Tests that the sha found is a hexidecimal string."""
         self.assertTrue(webscraper.is_valid(TestWebScraper.SHA))
+
+    def test_is_kernel_org(self):
+        """Unit test for is_kernel_org."""
+        parsed_link = urlparse(TestWebScraper.LINKS[0])
+        netloc, path = parsed_link.netloc, parsed_link.path
+
+        self.assertTrue(webscraper.is_kernel_org(netloc, path))
+
+        pub_scm_torvalds_link = ('https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/'
+                                 'linux.git/commit/?id=f2d67fec0b43edce8c416101cdc52e71145b5fef')
+
+        parsed_link2 = urlparse(pub_scm_torvalds_link)
+        netloc2, path2 = parsed_link2.netloc, parsed_link2.path
+        self.assertTrue(webscraper.is_kernel_org(netloc2, path2))
+
+    def test_is_github_com(self):
+        """Unit test for is_github_com."""
+        parsed_link = urlparse(TestWebScraper.LINKS[1])
+        netloc, path = parsed_link.netloc, parsed_link.path
+
+        self.assertTrue(webscraper.is_github_com(netloc, path))