Add blocked keywords pre-upload test

BUG=b:162982485
TEST=Unittest passes and can still run pre-upload check on a CL.

Change-Id: I6c16e9b3111b3063145a3a34f5985bbb54f69085
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/repohooks/+/2316468
Tested-by: Bernie Thompson <bhthompson@chromium.org>
Reviewed-by: Paul Fagerburg <pfagerburg@chromium.org>
Reviewed-by: Mike Frysinger <vapier@chromium.org>
Commit-Queue: Bernie Thompson <bhthompson@chromium.org>
diff --git a/blocked_terms.txt b/blocked_terms.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/blocked_terms.txt
diff --git a/pre-upload.py b/pre-upload.py
index 8d0778c..bdacbec 100755
--- a/pre-upload.py
+++ b/pre-upload.py
@@ -466,8 +466,12 @@
   errors = []
   for afile in files:
     for line_num, line in _get_file_diff(afile, commit):
-      if check_callable(line):
-        errors.append('%s, line %s' % (afile, line_num))
+      result = check_callable(line)
+      if result:
+        msg = f'{afile}, line {line_num}'
+        if isinstance(result, str):
+          msg += f': {result}'
+        errors.append(msg)
   if errors:
     return HookFailure(error_description, errors)
   return None
@@ -590,6 +594,59 @@
                               'Found a tab character in:')
 
 
+def _check_keywords(_project, commit, options=()):
+  """Checks there are no blocked keywords in commit content."""
+  blocked_terms_file = os.path.join(_get_hooks_dir(), 'blocked_terms.txt')
+  common_keywords = set(osutils.ReadFile(blocked_terms_file).splitlines())
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--exclude_regex', action='append', default=[])
+  parser.add_argument('--include_regex', action='append', default=[])
+  parser.add_argument('--block', action='append', default=[])
+  parser.add_argument('--unblock', action='append', default=[])
+  opts = parser.parse_args(options)
+
+  keywords = set(common_keywords | set(opts.block))
+  keywords = sorted(keywords - set(opts.unblock))
+  files = _filter_files(_get_affected_files(commit),
+                        opts.include_regex + COMMON_INCLUDED_PATHS,
+                        opts.exclude_regex + COMMON_EXCLUDED_PATHS)
+  errors = []
+
+  def _check_line(line):
+    for word in keywords:
+      m = re.search(word, line, flags=re.I)
+      if m:
+        if opts.unblock:
+          # Even though we remove the unblocked values from the keywords list
+          # we walk through them here as well in the case that they are a
+          # sub-regex of one of the common keyword values and not a direct
+          # match.
+          for unblocked in opts.unblock:
+            if not re.search(unblocked, m.group(0)):
+              return 'Matched ' + word
+        else:
+          return 'Matched ' + word
+    return False
+
+  diff_errors = _check_lines_in_diff(commit, files, _check_line,
+                                     'Found a blocked keyword in:')
+  if diff_errors:
+    errors.append(diff_errors)
+
+  line_num = 1
+  commit_desc_errors = []
+  for line in _get_commit_desc(commit).splitlines():
+    result = _check_line(line)
+    if result:
+      commit_desc_errors.append('Commit message, line %s: %s' %
+                                (line_num, result))
+    line_num += 1
+  if commit_desc_errors:
+    errors.append(HookFailure('Found a blocked keyword in:',
+                              commit_desc_errors))
+  return errors
+
+
 def _check_tabbed_indents(_project, commit, options=()):
   """Checks that indents use tabs only."""
   TABS_REQUIRED_PATHS = [
@@ -1945,6 +2002,7 @@
     _check_exec_files,
     _check_for_uprev,
     _check_gofmt,
+    _check_keywords,
     _check_layout_conf,
     _check_no_long_lines,
     _check_no_new_gyp,
@@ -1976,6 +2034,7 @@
     'cros_license_check': _check_cros_license,
     'aosp_license_check': _check_aosp_license,
     'gofmt_check': _check_gofmt,
+    'keyword_check': _check_keywords,
     'tab_check': _check_no_tabs,
     'tabbed_indent_required_check': _check_tabbed_indents,
     'branch_check': _check_change_has_branch_field,
diff --git a/pre-upload_unittest.py b/pre-upload_unittest.py
index 4b3ac19..7613973 100755
--- a/pre-upload_unittest.py
+++ b/pre-upload_unittest.py
@@ -92,6 +92,78 @@
     self.assertEqual(u'hi \ufffd there', ret)
 
 
+class CheckKeywordsTest(PreUploadTestCase):
+  """Tests for _check_keywords."""
+
+  def setUp(self):
+    self.PatchObject(pre_upload, '_get_affected_files',
+                     return_value=['x.ebuild'])
+    self.PatchObject(pre_upload, '_filter_files', return_value=['x.ebuild'])
+    self.PatchObject(osutils, 'ReadFile',
+                     return_value='scruffy\nmangy\ndog.?pile\ncat.?circle')
+    self.diff_mock = self.PatchObject(pre_upload, '_get_file_diff')
+    self.desc_mock = self.PatchObject(pre_upload, '_get_commit_desc')
+
+  def test_good_cases(self):
+    self.desc_mock.return_value = 'Commit Message.\nLine 2'
+    self.diff_mock.return_value = [
+        (1, 'Some text without keywords.'),
+        (2, 'The dog is black has a partial keyword that does not count.'),
+    ]
+    failures = pre_upload._check_keywords(ProjectNamed('PROJECT'), 'COMMIT')
+    self.assertEqual(failures, [])
+
+  def test_bad_cases(self):
+    self.desc_mock.return_value = 'Commit Message.\nLine 2\nLine 3 scruffy'
+    self.diff_mock.return_value = [
+        (1, 'Scruffy plain catch'),
+        (2, 'dog-pile hyphenated catch'),
+        (3, 'cat_circle underscored catch'),
+        (4, 'dog pile space catch'),
+        (5, 'dogpiled substring catch'),
+        (6, 'scruffy mangy dog, multiple in a line catch'),
+    ]
+    failures = pre_upload._check_keywords(ProjectNamed('PROJECT'),
+                                          'COMMIT')
+    self.assertNotEqual(failures, [])
+    self.assertEqual('Found a blocked keyword in:', failures[0].msg)
+    self.assertEqual(['x.ebuild, line 1: Matched scruffy',
+                      'x.ebuild, line 2: Matched dog.?pile',
+                      'x.ebuild, line 3: Matched cat.?circle',
+                      'x.ebuild, line 4: Matched dog.?pile',
+                      'x.ebuild, line 5: Matched dog.?pile',
+                      'x.ebuild, line 6: Matched mangy'],
+                     failures[0].items)
+    self.assertEqual('Found a blocked keyword in:', failures[1].msg)
+    self.assertEqual(['Commit message, line 3: Matched scruffy'],
+                     failures[1].items)
+
+  def test_block_option_cases(self):
+    self.desc_mock.return_value = 'Commit Message.\nLine 2 voldemort'
+    self.diff_mock.return_value = [
+        (1, 'Line with a new term voldemort.'),
+        (2, 'Line with only they who shall not be named.'),
+    ]
+    failures = pre_upload._check_keywords(ProjectNamed('PROJECT'),
+                                          'COMMIT', ['--block', 'voldemort'])
+    self.assertNotEqual(failures, [])
+    self.assertEqual('Found a blocked keyword in:', failures[0].msg)
+    self.assertEqual(['x.ebuild, line 1: Matched voldemort'], failures[0].items)
+    self.assertEqual('Found a blocked keyword in:', failures[1].msg)
+    self.assertEqual(['Commit message, line 2: Matched voldemort'],
+                     failures[1].items)
+
+  def test_unblock_option_cases(self):
+    self.desc_mock.return_value = 'Commit message with scruffy'
+    self.diff_mock.return_value = [
+        (1, 'Line with a now unblocked term scruffy'),
+        (2, 'Line with without any blocked terms'),
+    ]
+    failures = pre_upload._check_keywords(ProjectNamed('PROJECT'),
+                                          'COMMIT', ['--unblock', 'scru.?fy'])
+    self.assertEqual(failures, [])
+
+
 class CheckNoLongLinesTest(PreUploadTestCase):
   """Tests for _check_no_long_lines."""