cvetriager: webscraper: display cve description
The CVE description from the CVE site will be recorded and logged to add
more context when using the triaging tool.
BUG=chromium:1093363
TEST=python setup.py test
Change-Id: I3eff30cd2635e23b0f3a407b978be2d8621d3d3e
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/dev-util/+/2313436
Reviewed-by: Zubin Mithra <zsm@chromium.org>
Commit-Queue: Wanda Mora <morawand@chromium.org>
Tested-by: Wanda Mora <morawand@chromium.org>
diff --git a/contrib/cvetriager/cvelib/webscraper.py b/contrib/cvetriager/cvelib/webscraper.py
index c540f39..f1a84fd 100644
--- a/contrib/cvetriager/cvelib/webscraper.py
+++ b/contrib/cvetriager/cvelib/webscraper.py
@@ -18,7 +18,7 @@
CVE_URL = 'https://cve.mitre.org/cgi-bin/cvename.cgi'
KERNEL_ORG = 'git.kernel.org'
-KERNEL_PATH = '/cgit/linux/kernel/git/torvalds'
+KERNEL_PATH = ['/cgit/linux/kernel/git/torvalds', '/pub/scm/linux/kernel/git/torvalds/']
GITHUB_COM = 'github.com'
GITHUB_PATH = '/torvalds/linux/'
@@ -48,8 +48,40 @@
return r
+def is_kernel_org(netloc, path):
+ """Check if is useful git.kernel.org link."""
+ if netloc != KERNEL_ORG:
+ return False
+
+ for link_path in KERNEL_PATH:
+ if path.startswith(link_path):
+ return True
+
+ return False
+
+
+def is_github_com(netloc, path):
+ """Check if is useful github.com link."""
+ return netloc == GITHUB_COM and path.startswith(GITHUB_PATH)
+
+
+def find_cve_description(cve_html):
+ """Returns given CVE's description."""
+ soup = BeautifulSoup(cve_html, 'html.parser')
+
+ tag = soup.find('div', attrs={'id': 'GeneratedTable'})
+
+ for t in tag.descendants:
+ if t.name == 'th' and t.text == 'Description':
+ description = t.parent.find_next_sibling().get_text()
+
+ return description.replace('\n', '')
+
+
def find_commit_links(cve_html):
"""Returns commit links from given CVE's webpage."""
+ # TODO: Additional pattern to look for might be:
+ # https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-19076
commits = []
soup = BeautifulSoup(cve_html, 'html.parser')
@@ -61,10 +93,10 @@
parsed_link = urlparse(link)
netloc, path = parsed_link.netloc, parsed_link.path
- if netloc == KERNEL_ORG and path.startswith(KERNEL_PATH):
+ if is_kernel_org(netloc, path):
commits.append(link)
- elif netloc == GITHUB_COM and path.startswith(GITHUB_PATH):
+ elif is_github_com(netloc, path):
commits.append(link)
return commits
@@ -84,13 +116,13 @@
sha = None
- if netloc == KERNEL_ORG and path.startswith(KERNEL_PATH):
+ if is_kernel_org(netloc, path):
try:
sha = parse_qs(parsed_link.query)['id'][0]
except KeyError:
LOGGER.error(f'Sha not found in {link}')
- elif netloc == GITHUB_COM and path.startswith(GITHUB_PATH):
+ elif is_github_com(netloc, path):
sha = os.path.basename(path)
return sha if is_valid(sha) else None
@@ -101,6 +133,10 @@
commits = set()
req = make_cve_request(cve_number)
+
+ cve_description = find_cve_description(req.text)
+ LOGGER.info(f'CVE Description: {cve_description}')
+
commit_links = find_commit_links(req.text)
# Collects fix commit sha(s) from links.
diff --git a/contrib/cvetriager/tests/webscraper_test.py b/contrib/cvetriager/tests/webscraper_test.py
index 615ce1d..4921db3 100644
--- a/contrib/cvetriager/tests/webscraper_test.py
+++ b/contrib/cvetriager/tests/webscraper_test.py
@@ -5,6 +5,7 @@
"""Testing script for cvelib/webscraper.py."""
import unittest
+from urllib.parse import urlparse
from cvelib import webscraper
@@ -24,6 +25,13 @@
f'https://github.com/torvalds/linux/commit/{SHA}'
]
+ CVE_DESCRIPTION = (
+ 'The tcpmss_mangle_packet function in net/netfilter/xt_TCPMSS.c in the Linux kernel before '
+ '4.11, and 4.9.x before 4.9.36, allows remote attackers to cause a denial of service '
+ '(use-after-free and memory corruption) or possibly have unspecified other impact by '
+ 'leveraging the presence of xt_TCPMSS in an iptables action.'
+ )
+
def test_make_cve_request(self):
"""Tests that url request was made."""
req = webscraper.make_cve_request(TestWebScraper.CVE_NUMBER)
@@ -33,6 +41,14 @@
# Check if proper url was fetched.
self.assertEqual(req.url, expected)
+ def test_find_cve_description(self):
+ """Tests that CVE description was returned."""
+ req = webscraper.make_cve_request(TestWebScraper.CVE_NUMBER)
+
+ description = webscraper.find_cve_description(req.text)
+
+ self.assertEqual(description, TestWebScraper.CVE_DESCRIPTION)
+
def test_find_commit_links(self):
"""Tests that correct commit links were found."""
req = webscraper.make_cve_request(TestWebScraper.CVE_NUMBER)
@@ -112,3 +128,24 @@
def test_valid_sha(self):
"""Tests that the sha found is a hexidecimal string."""
self.assertTrue(webscraper.is_valid(TestWebScraper.SHA))
+
+ def test_is_kernel_org(self):
+ """Unit test for is_kernel_org."""
+ parsed_link = urlparse(TestWebScraper.LINKS[0])
+ netloc, path = parsed_link.netloc, parsed_link.path
+
+ self.assertTrue(webscraper.is_kernel_org(netloc, path))
+
+ pub_scm_torvalds_link = ('https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/'
+ 'linux.git/commit/?id=f2d67fec0b43edce8c416101cdc52e71145b5fef')
+
+ parsed_link2 = urlparse(pub_scm_torvalds_link)
+ netloc2, path2 = parsed_link2.netloc, parsed_link2.path
+ self.assertTrue(webscraper.is_kernel_org(netloc2, path2))
+
+ def test_is_github_com(self):
+ """Unit test for is_github_com."""
+ parsed_link = urlparse(TestWebScraper.LINKS[1])
+ netloc, path = parsed_link.netloc, parsed_link.path
+
+ self.assertTrue(webscraper.is_github_com(netloc, path))