pre-upload: Read unblocked_terms.txt from project sub-directory

Currently, only a project root directory is searched for
unblocked_terms.txt. This forces projects hosting multiple components
(e.g. platform2) to share one unblocked list across all components,
which are each independently developed.

This patch makes pre-upload search the parent directories of each
file being changed for unblocked_terms.txt. If one is found, the
search is stopped and unblocked_terms.txt's in the upper directories
will be ignored.

BUG=b:179270518
TEST=Manually define unblocked_terms.txt and create a change with
blocked words.

Change-Id: Ic2e1c99d2d7c44505fd6e155d39b4b8f1526cfb0
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/repohooks/+/2693684
Tested-by: Daisuke Nojiri <dnojiri@chromium.org>
Commit-Queue: Daisuke Nojiri <dnojiri@chromium.org>
Reviewed-by: Mike Frysinger <vapier@chromium.org>
diff --git a/README.md b/README.md
index d7c0e51..447d541 100644
--- a/README.md
+++ b/README.md
@@ -136,8 +136,9 @@
 Repohook references the global `unblocked_terms.txt` only if a copy doesn't
 exist in a project. Thus, you can copy `unblocked_terms.txt` to your project
 and remove the words which are already cleared locally. That'll allow your
-project to make progress individually in terms of making the language more
-inclusive. This local copy also works as a TO-DO list.
+project to manage keyword-blocking/unblocking individually. Each project can
+have multiple `unblocked_terms.txt`. The parent directories of each file being
+changed are searched. The one nearest to the file has a higher priority.
 
 1.  Copy `unblocked_terms.txt` to the project's root directory.
 2.  List which words are already cleared (or not present):
diff --git a/blocked_terms_unittest.py b/blocked_terms_unittest.py
index 4318a03..33023d7 100755
--- a/blocked_terms_unittest.py
+++ b/blocked_terms_unittest.py
@@ -26,7 +26,6 @@
 # The sys.path monkey patching confuses the linter.
 # pylint: disable=wrong-import-position
 from chromite.lib import cros_test_lib
-from chromite.lib import osutils
 
 
 assert sys.version_info >= (3, 6), 'This module requires Python 3.6+'
@@ -86,11 +85,12 @@
         self.assertEqual(failures, [])
 
     # Check blocked terms.
-    self.rf_mock.side_effect = [self.common_file_terms, set()]
+    self.rf_mock.side_effect = [self.common_file_terms, set(), set()]
     _check_keyword(unblocked=False)
 
     # Check unblocked terms.
     self.rf_mock.side_effect = [self.common_file_terms,
+                                self.unblocked_file_terms,
                                 self.unblocked_file_terms]
     _check_keyword(unblocked=True)
 
diff --git a/pre-upload.py b/pre-upload.py
index 0e7df81..4977e36 100755
--- a/pre-upload.py
+++ b/pre-upload.py
@@ -613,34 +613,24 @@
   return file_terms
 
 
-def _check_keywords(_project, commit, options=()):
-  """Checks there are no blocked keywords in commit content."""
-  # Read options from override list.
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--exclude_regex', action='append', default=[])
-  parser.add_argument('--include_regex', action='append', default=[])
-  parser.add_argument('--block', action='append', default=[])
-  parser.add_argument('--unblock', action='append', default=[])
-  opts = parser.parse_args(options)
+def _check_keywords_in_file(project, commit, file, keywords,
+                            unblocked_terms_file, opts):
+  """Checks there are no blocked keywords in a file being changed."""
+  if file:
+    # Search for UNBLOCKED_TERMS_FILE in the parent directories of the file
+    # being changed.
+    d = os.path.dirname(file)
+    while d != project.dir:
+      terms_file = os.path.join(d, UNBLOCKED_TERMS_FILE)
+      if os.path.isfile(terms_file):
+        unblocked_terms_file = terms_file
+        break
+      d = os.path.dirname(d)
 
-  # Read blocked word list.
-  blocked_terms_file = os.path.join(_get_hooks_dir(), BLOCKED_TERMS_FILE)
-  common_keywords = _read_terms_file(blocked_terms_file)
-
-  # Read unblocked word list. Global list is skipped if local list exists.
-  unblocked_terms_file = os.path.join(_get_hooks_dir(), UNBLOCKED_TERMS_FILE)
-  if os.path.isfile(os.path.join(_project.dir, UNBLOCKED_TERMS_FILE)):
-    unblocked_terms_file = os.path.join(_project.dir, UNBLOCKED_TERMS_FILE)
+  # Read unblocked word list.
   unblocked_words = _read_terms_file(unblocked_terms_file)
   unblocked_words.update(opts.unblock)
-
-  keywords = set(common_keywords | set(opts.block))
   keywords = sorted(keywords - unblocked_words)
-  files = _filter_files(
-      _get_affected_files(commit),
-      opts.include_regex + COMMON_INCLUDED_PATHS + [r'^.*\.md$'],
-      opts.exclude_regex + COMMON_EXCLUDED_PATHS)
-  errors = []
 
   def _check_line(line):
     # Store information about each span matching blocking regex.
@@ -680,10 +670,9 @@
         return f'Matched "{b["group"]}" with regex of "{b["keyword"]}"'
     return False
 
-  diff_errors = _check_lines_in_diff(commit, files, _check_line,
-                                     'Found a blocked keyword in:')
-  if diff_errors:
-    errors.append(diff_errors)
+  if file:
+    return _check_lines_in_diff(commit, [file], _check_line,
+                                'Found a blocked keyword in:')
 
   line_num = 1
   commit_desc_errors = []
@@ -694,8 +683,50 @@
                                 (line_num, result))
     line_num += 1
   if commit_desc_errors:
-    errors.append(HookFailure('Found a blocked keyword in:',
-                              commit_desc_errors))
+    return HookFailure('Found a blocked keyword in:', commit_desc_errors)
+
+  return None
+
+
+def _check_keywords(project, commit, options=()):
+  """Checks there are no blocked keywords in commit content."""
+  # Read options from override list.
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--exclude_regex', action='append', default=[])
+  parser.add_argument('--include_regex', action='append', default=[])
+  parser.add_argument('--block', action='append', default=[])
+  parser.add_argument('--unblock', action='append', default=[])
+  opts = parser.parse_args(options)
+
+  # Read blocked word list.
+  blocked_terms_file = os.path.join(_get_hooks_dir(), BLOCKED_TERMS_FILE)
+  common_keywords = _read_terms_file(blocked_terms_file)
+
+  # Find unblocked word list in project root directory. If not found, global
+  # list is used.
+  unblocked_terms_file = os.path.join(_get_hooks_dir(), UNBLOCKED_TERMS_FILE)
+  if os.path.isfile(os.path.join(project.dir, UNBLOCKED_TERMS_FILE)):
+    unblocked_terms_file = os.path.join(project.dir, UNBLOCKED_TERMS_FILE)
+
+  keywords = set(common_keywords | set(opts.block))
+
+  files = _filter_files(
+      _get_affected_files(commit),
+      opts.include_regex + COMMON_INCLUDED_PATHS + [r'^.*\.md$'],
+      opts.exclude_regex + COMMON_EXCLUDED_PATHS)
+
+  errors = []
+  for file in files:
+    errs = _check_keywords_in_file(project, commit, file,
+                                   keywords, unblocked_terms_file, opts)
+    if errs:
+      errors.append(errs)
+
+  errs = _check_keywords_in_file(project, commit, None,
+                                 keywords, unblocked_terms_file, opts)
+  if errs:
+    errors.append(errs)
+
   return errors
 
 
diff --git a/pre-upload_unittest.py b/pre-upload_unittest.py
index 1ae63f9..1f8e6d7 100755
--- a/pre-upload_unittest.py
+++ b/pre-upload_unittest.py
@@ -99,10 +99,12 @@
     self.PatchObject(pre_upload, '_get_affected_files',
                      return_value=['x.ebuild'])
     self.PatchObject(pre_upload, '_filter_files', return_value=['x.ebuild'])
-    # First call for blocked_terms.txt and second call for unblocked_terms.txt.
+    # First call reads blocked_terms.txt, second call reads unblocked_terms.txt
+    # for _get_file_diff, and third call reads unblocked_terms.txt for
+    # _get_commit_desc.
     self.rf_mock = self.PatchObject(
         osutils, 'ReadFile',
-        side_effect=['scruffy\nmangy\ndog.?pile\ncat.?circle', 'fox'])
+        side_effect=['scruffy\nmangy\ndog.?pile\ncat.?circle', 'fox', 'fox'])
     self.diff_mock = self.PatchObject(pre_upload, '_get_file_diff')
     self.desc_mock = self.PatchObject(pre_upload, '_get_commit_desc')
     self.project = pre_upload.Project(name='PROJECT', dir=self.tempdir,
@@ -120,6 +122,7 @@
     self.rf_mock.assert_has_calls([
         mock.call(os.path.join(pre_upload._get_hooks_dir(),
                                pre_upload.BLOCKED_TERMS_FILE)),
+        mock.call(pre_upload.UNBLOCKED_TERMS_FILE),
         mock.call(os.path.join(pre_upload._get_hooks_dir(),
                                pre_upload.UNBLOCKED_TERMS_FILE)),
     ])