pre-submit: Increase support for superseding unblocking regex Add support to allow using regex to unblock a subset of a given blocked word regex. While working on some COIL cleanup, some of the blocked words are to generic to support incremental cleanup of the code base while enabling presubmit term check for already cleaned up terms. The patch allows the definition of regex in a local unblocked_terms.txt file to supersede a more generic blocked word defined in the global blocked_terms.txt file. BUG=chromium:1129806 TEST=run pre-upload_unittest.py Change-Id: I591616492adb57a54ecb9771512d29f6e22f6a69 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/repohooks/+/2434032 Tested-by: Laurent Chavey <chavey@google.com> Commit-Queue: Laurent Chavey <chavey@google.com> Reviewed-by: Daisuke Nojiri <dnojiri@chromium.org> Reviewed-by: Stephane Belmon <sbelmon@google.com>

commit: 434af9a93785aab809ad37f210d7cb4808830b88 [log] [tgz]
author: Laurent Chavey <chavey@google.com> Mon Sep 28 22:25:16 2020 +0900
committer: Commit Bot <commit-bot@chromium.org> Wed Oct 07 03:30:33 2020 +0000
tree: 44759c210659f1da0febcb51cd7c2a8a6b01483a
parent: 0dc1f943cd7d43565bc2cb64327a47fef8b98cc8 [diff]
diff --git a/pre-upload.py b/pre-upload.py
index 80abe2c..ce994f5 100755
--- a/pre-upload.py
+++ b/pre-upload.py

@@ -639,18 +639,41 @@
   errors = []
 
   def _check_line(line):
+    # Store information about each span matching blocking regex.
+    # to match unblocked regex with blocked reg ex match.
+    # [{'span':re.span,    - overlap of matching regex in line
+    #   'group':re.group,  - matching term
+    #   'blocked':bool,    - True: matching blocked, False: matching unblocked
+    #   'keyword':regex,   - block regex
+    #  }, ...]
+    blocked_span = []
+    # Store information about each span matching unblocking regex.
+    # [re.span, ...]
+    unblocked_span = []
+
     for word in keywords:
-      m = re.search(word, line, flags=re.I)
-      if m:
-        matched = True
-        # The unblock values supercede blocked values, so if any unblock
-        # regex matches a term found by the block list, we ignore it.
-        for unblocked in unblocked_words:
-          if re.search(unblocked, m.group(0)):
-            matched = False
-            break
-        if matched:
-          return f'Matched "{m[0]}" with regex of "{word}"'
+      for match in re.finditer(word, line, flags=re.I):
+        blocked_span.append({'span' : match.span(),
+                             'group' : match.group(0),
+                             'blocked' : True,
+                             'keyword' : word})
+
+    for unblocked in unblocked_words:
+      for match in re.finditer(unblocked, line, flags=re.I):
+        unblocked_span.append(match.span())
+
+    # Unblock terms that are superset of blocked terms:
+    #   blocked := "this.?word"
+    #   unblocked := "\.this.?word"
+    # "this line is blocked because of this1word"
+    # "this line is unblocked because of thenew.this1word"
+    #
+    for b in blocked_span:
+      for ub in unblocked_span:
+        if ub[0] <= b['span'][0] and ub[1] >= b['span'][1]:
+          b['blocked'] = False
+      if b['blocked']:
+        return f'Matched "{b["group"]}" with regex of "{b["keyword"]}"'
     return False
 
   diff_errors = _check_lines_in_diff(commit, files, _check_line,

diff --git a/pre-upload_unittest.py b/pre-upload_unittest.py
index c9addce..ccd0474 100755
--- a/pre-upload_unittest.py
+++ b/pre-upload_unittest.py

@@ -181,6 +181,35 @@
                                                      '--unblock', 'scru.?fy'])
     self.assertEqual(failures, [])
 
+  def test_unblock_and_block_option_cases(self):
+    self.desc_mock.return_value = 'Commit message with scruffy'
+    self.diff_mock.return_value = [
+        (1, 'Two unblocked terms scruffy and dog-pile'),
+        (2, 'Without any blocked terms'),
+        (3, 'Blocked dogpile'),
+        (4, 'Unblocked m.dogpile'),
+        (5, 'Blocked dogpile and unblocked m.dogpile'),
+        (6, 'Unlocked m.dogpile and blocked dogpile'),
+        (7, 'Unlocked m.dogpile and unblocked dog-pile'),
+    ]
+    # scruffy matches regex of 'scruffy' in block list but excluded by
+    # a different regex of 'scru.?fy' in unblock list.
+    # dogpile, dog.pile matches regex of 'dog.?pile' in block list.
+    # m.dogpile and dog-pile matches regex of 'dog.?pile' in block list but
+    # excluded by different regex '\.dog.?pile' and 'dog-pile' in unblock list.
+    failures = pre_upload._check_keywords(self.project,
+                                          'COMMIT',
+                                          ['--unblock', r'dog-pile',
+                                           '--unblock', r'scru.?fy',
+                                           '--unblock', r'\.dog.?pile'])
+    self.assertNotEqual(failures, [])
+    self.assertEqual('Found a blocked keyword in:', failures[0].msg)
+    self.assertEqual(
+        [r'x.ebuild, line 3: Matched "dogpile" with regex of "dog.?pile"',
+         r'x.ebuild, line 5: Matched "dogpile" with regex of "dog.?pile"',
+         r'x.ebuild, line 6: Matched "dogpile" with regex of "dog.?pile"'],
+        failures[0].items)
+
 
 class CheckNoLongLinesTest(PreUploadTestCase):
   """Tests for _check_no_long_lines."""
commit	434af9a93785aab809ad37f210d7cb4808830b88	[log] [tgz]
author	Laurent Chavey <chavey@google.com>	Mon Sep 28 22:25:16 2020 +0900
committer	Commit Bot <commit-bot@chromium.org>	Wed Oct 07 03:30:33 2020 +0000
tree	44759c210659f1da0febcb51cd7c2a8a6b01483a
parent	0dc1f943cd7d43565bc2cb64327a47fef8b98cc8 [diff]