pre-submit: Increase support for superseding unblocking regex

Add support to allow using regex to unblock a subset of a given blocked word
regex. While working on some COIL cleanup, some of the blocked words are to
generic to support incremental cleanup of the code base while enabling presubmit
term check for already cleaned up terms.
The patch allows the definition of regex in a local unblocked_terms.txt file to
supersede a more generic blocked word defined in the global blocked_terms.txt file.

BUG=chromium:1129806
TEST=run pre-upload_unittest.py

Change-Id: I591616492adb57a54ecb9771512d29f6e22f6a69
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/repohooks/+/2434032
Tested-by: Laurent Chavey <chavey@google.com>
Commit-Queue: Laurent Chavey <chavey@google.com>
Reviewed-by: Daisuke Nojiri <dnojiri@chromium.org>
Reviewed-by: Stephane Belmon <sbelmon@google.com>
diff --git a/pre-upload.py b/pre-upload.py
index 80abe2c..ce994f5 100755
--- a/pre-upload.py
+++ b/pre-upload.py
@@ -639,18 +639,41 @@
   errors = []
 
   def _check_line(line):
+    # Store information about each span matching blocking regex.
+    # to match unblocked regex with blocked reg ex match.
+    # [{'span':re.span,    - overlap of matching regex in line
+    #   'group':re.group,  - matching term
+    #   'blocked':bool,    - True: matching blocked, False: matching unblocked
+    #   'keyword':regex,   - block regex
+    #  }, ...]
+    blocked_span = []
+    # Store information about each span matching unblocking regex.
+    # [re.span, ...]
+    unblocked_span = []
+
     for word in keywords:
-      m = re.search(word, line, flags=re.I)
-      if m:
-        matched = True
-        # The unblock values supercede blocked values, so if any unblock
-        # regex matches a term found by the block list, we ignore it.
-        for unblocked in unblocked_words:
-          if re.search(unblocked, m.group(0)):
-            matched = False
-            break
-        if matched:
-          return f'Matched "{m[0]}" with regex of "{word}"'
+      for match in re.finditer(word, line, flags=re.I):
+        blocked_span.append({'span' : match.span(),
+                             'group' : match.group(0),
+                             'blocked' : True,
+                             'keyword' : word})
+
+    for unblocked in unblocked_words:
+      for match in re.finditer(unblocked, line, flags=re.I):
+        unblocked_span.append(match.span())
+
+    # Unblock terms that are superset of blocked terms:
+    #   blocked := "this.?word"
+    #   unblocked := "\.this.?word"
+    # "this line is blocked because of this1word"
+    # "this line is unblocked because of thenew.this1word"
+    #
+    for b in blocked_span:
+      for ub in unblocked_span:
+        if ub[0] <= b['span'][0] and ub[1] >= b['span'][1]:
+          b['blocked'] = False
+      if b['blocked']:
+        return f'Matched "{b["group"]}" with regex of "{b["keyword"]}"'
     return False
 
   diff_errors = _check_lines_in_diff(commit, files, _check_line,
diff --git a/pre-upload_unittest.py b/pre-upload_unittest.py
index c9addce..ccd0474 100755
--- a/pre-upload_unittest.py
+++ b/pre-upload_unittest.py
@@ -181,6 +181,35 @@
                                                      '--unblock', 'scru.?fy'])
     self.assertEqual(failures, [])
 
+  def test_unblock_and_block_option_cases(self):
+    self.desc_mock.return_value = 'Commit message with scruffy'
+    self.diff_mock.return_value = [
+        (1, 'Two unblocked terms scruffy and dog-pile'),
+        (2, 'Without any blocked terms'),
+        (3, 'Blocked dogpile'),
+        (4, 'Unblocked m.dogpile'),
+        (5, 'Blocked dogpile and unblocked m.dogpile'),
+        (6, 'Unlocked m.dogpile and blocked dogpile'),
+        (7, 'Unlocked m.dogpile and unblocked dog-pile'),
+    ]
+    # scruffy matches regex of 'scruffy' in block list but excluded by
+    # a different regex of 'scru.?fy' in unblock list.
+    # dogpile, dog.pile matches regex of 'dog.?pile' in block list.
+    # m.dogpile and dog-pile matches regex of 'dog.?pile' in block list but
+    # excluded by different regex '\.dog.?pile' and 'dog-pile' in unblock list.
+    failures = pre_upload._check_keywords(self.project,
+                                          'COMMIT',
+                                          ['--unblock', r'dog-pile',
+                                           '--unblock', r'scru.?fy',
+                                           '--unblock', r'\.dog.?pile'])
+    self.assertNotEqual(failures, [])
+    self.assertEqual('Found a blocked keyword in:', failures[0].msg)
+    self.assertEqual(
+        [r'x.ebuild, line 3: Matched "dogpile" with regex of "dog.?pile"',
+         r'x.ebuild, line 5: Matched "dogpile" with regex of "dog.?pile"',
+         r'x.ebuild, line 6: Matched "dogpile" with regex of "dog.?pile"'],
+        failures[0].items)
+
 
 class CheckNoLongLinesTest(PreUploadTestCase):
   """Tests for _check_no_long_lines."""