pre-submit: Populate blocked_terms.txt
This patch populates the global list of blocked words. The list of
unblocked words are symlinked from the block list. Thus, nothing
will be blocked globally.
unblocked_terms.txt in this directry is referenced only if it
doesn't exist in the project directory. It's expected to be copied
to each project when a project is starting COIL. It will also serve
as a TODO list to locally track the progress.
See README.md for how to use unblocked_terms.txt for your project.
BUG=b:165908442
TEST=Run repo upload --cbr --dry-run .
[COMMIT 1/1 7a40d010022f] test: blocked_terms.txt
[FAILED] path/to/X: _check_keywords
Found a blocked keyword in:
.../main.c, line 41: Matched "dog-pile" with regex of "dog.?pile"
Change-Id: Iea03b536d24a1a1c9590aebed162c6624166a343
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/repohooks/+/2369239
Tested-by: Daisuke Nojiri <dnojiri@chromium.org>
Reviewed-by: Mike Frysinger <vapier@chromium.org>
Reviewed-by: Bernie Thompson <bhthompson@chromium.org>
Commit-Queue: Mike Frysinger <vapier@chromium.org>
Auto-Submit: Daisuke Nojiri <dnojiri@chromium.org>
diff --git a/README.md b/README.md
index a721c23..e90eabb 100644
--- a/README.md
+++ b/README.md
@@ -126,6 +126,28 @@
cros_license_check: --exclude_regex=\b(checkpatch\.pl|kernel-doc)$
```
+## Blocked and Unblocked Word List
+
+`blocked_terms.txt` contains a list of words which are blocked if
+`keyword_check` is enabled in the project. `unblocked_terms.txt` is a copy of
+`blocked_terms.txt`. So, by default nothing will be blocked.
+
+`unblocked_terms.txt` in this directry is referenced only if it doesn't exist in
+a project directory. To transition to the default blocked word list for project
+X, one can do:
+
+1. Copy `unblocked_terms.txt` to X.
+2. Remove words which are not used in X.
+ `$ egrep -r -i -I '(keyword1|keyword2|...)'`
+3. Test and submit CL.
+
+Then, when blocking word 'foo' for project X, one can do:
+
+1. Remove 'foo' from `unblocked_terms.txt` in X.
+2. Grep and fix matches.
+ `$ egrep -r -i -I foo`
+3. Test & submit CL.
+
# Third Party code
We have many third party repos where you probably want to disable CrOS checks.
diff --git a/blocked_terms.txt b/blocked_terms.txt
index e69de29..1e1f9e3 100644
--- a/blocked_terms.txt
+++ b/blocked_terms.txt
@@ -0,0 +1,39 @@
+# DELETE THIS COMMENT IN YOUR COPY.
+#
+# This is the global list of words to be blocked. unblocked_terms.txt has the
+# same contents. Copy unblocked_terms.txt to your project to enable repo hook to
+# block the words.
+#
+# See repohooks/README.md for more details.
+
+black.?hat
+black.?list
+build.?cop
+crazy
+cripple
+dummy
+first.?class.?citizen
+grandfathered
+gr[ae]y.?hat
+gr[ae]y.?list
+insane
+\bhe\b
+\bshe\b
+\bhim\b
+\bher\b
+\bhis\b
+\bhers\b
+man.?in.?the.?middle
+master
+mitm
+native
+red.?line
+rtfm
+sane
+sanity
+slave
+white.?glove
+white.?hat
+white.?label
+white.?list
+wtf
diff --git a/pre-upload.py b/pre-upload.py
index 0b02750..7c252ea 100755
--- a/pre-upload.py
+++ b/pre-upload.py
@@ -118,6 +118,9 @@
TEST_FIELD_RE = r'\nTEST=\S+'
+BLOCKED_TERMS_FILE = 'blocked_terms.txt'
+UNBLOCKED_TERMS_FILE = 'unblocked_terms.txt'
+
# Exceptions
@@ -594,10 +597,21 @@
'Found a tab character in:')
+def _read_terms_file(terms_file):
+ """Read list of words from file, skipping comments and blank lines."""
+ file_terms = set()
+ for line in osutils.ReadFile(terms_file).splitlines():
+ # Allow comment and blank lines.
+ line = line.split('#', 1)[0]
+ if not line:
+ continue
+ file_terms.add(line)
+ return file_terms
+
+
def _check_keywords(_project, commit, options=()):
"""Checks there are no blocked keywords in commit content."""
- blocked_terms_file = os.path.join(_get_hooks_dir(), 'blocked_terms.txt')
- common_keywords = set(osutils.ReadFile(blocked_terms_file).splitlines())
+ # Read options from override list.
parser = argparse.ArgumentParser()
parser.add_argument('--exclude_regex', action='append', default=[])
parser.add_argument('--include_regex', action='append', default=[])
@@ -605,8 +619,19 @@
parser.add_argument('--unblock', action='append', default=[])
opts = parser.parse_args(options)
+ # Read blocked word list.
+ blocked_terms_file = os.path.join(_get_hooks_dir(), BLOCKED_TERMS_FILE)
+ common_keywords = _read_terms_file(blocked_terms_file)
+
+ # Read unblocked word list. Global list is skipped if local list exists.
+ unblocked_terms_file = os.path.join(_get_hooks_dir(), UNBLOCKED_TERMS_FILE)
+ if os.path.isfile(os.path.join(_project.dir, UNBLOCKED_TERMS_FILE)):
+ unblocked_terms_file = os.path.join(_project.dir, UNBLOCKED_TERMS_FILE)
+ unblocked_words = _read_terms_file(unblocked_terms_file)
+ unblocked_words.update(opts.unblock)
+
keywords = set(common_keywords | set(opts.block))
- keywords = sorted(keywords - set(opts.unblock))
+ keywords = sorted(keywords - unblocked_words)
files = _filter_files(_get_affected_files(commit),
opts.include_regex + COMMON_INCLUDED_PATHS,
opts.exclude_regex + COMMON_EXCLUDED_PATHS)
@@ -619,12 +644,12 @@
matched = True
# The unblock values supercede blocked values, so if any unblock
# regex matches a term found by the block list, we ignore it.
- for unblocked in opts.unblock:
+ for unblocked in unblocked_words:
if re.search(unblocked, m.group(0)):
matched = False
break
if matched:
- return 'Matched ' + word
+ return f'Matched "{m[0]}" with regex of "{word}"'
return False
diff_errors = _check_lines_in_diff(commit, files, _check_line,
diff --git a/pre-upload_unittest.py b/pre-upload_unittest.py
index d5170ec..59ea3d5 100755
--- a/pre-upload_unittest.py
+++ b/pre-upload_unittest.py
@@ -92,17 +92,21 @@
self.assertEqual(u'hi \ufffd there', ret)
-class CheckKeywordsTest(PreUploadTestCase):
+class CheckKeywordsTest(PreUploadTestCase, cros_test_lib.TempDirTestCase):
"""Tests for _check_keywords."""
def setUp(self):
self.PatchObject(pre_upload, '_get_affected_files',
return_value=['x.ebuild'])
self.PatchObject(pre_upload, '_filter_files', return_value=['x.ebuild'])
- self.PatchObject(osutils, 'ReadFile',
- return_value='scruffy\nmangy\ndog.?pile\ncat.?circle')
+ # First call for blocked_terms.txt and second call for unblocked_terms.txt.
+ self.rf_mock = self.PatchObject(
+ osutils, 'ReadFile',
+ side_effect=['scruffy\nmangy\ndog.?pile\ncat.?circle', 'fox'])
self.diff_mock = self.PatchObject(pre_upload, '_get_file_diff')
self.desc_mock = self.PatchObject(pre_upload, '_get_commit_desc')
+ self.project = pre_upload.Project(name='PROJECT', dir=self.tempdir,
+ remote=None)
def test_good_cases(self):
self.desc_mock.return_value = 'Commit Message.\nLine 2'
@@ -110,9 +114,16 @@
(1, 'Some text without keywords.'),
(2, 'The dog is black has a partial keyword that does not count.'),
]
- failures = pre_upload._check_keywords(ProjectNamed('PROJECT'), 'COMMIT')
+ failures = pre_upload._check_keywords(self.project, 'COMMIT')
self.assertEqual(failures, [])
+ self.rf_mock.assert_has_calls([
+ mock.call(os.path.join(pre_upload._get_hooks_dir(),
+ pre_upload.BLOCKED_TERMS_FILE)),
+ mock.call(os.path.join(pre_upload._get_hooks_dir(),
+ pre_upload.UNBLOCKED_TERMS_FILE)),
+ ])
+
def test_bad_cases(self):
self.desc_mock.return_value = 'Commit Message.\nLine 2\nLine 3 scruffy'
self.diff_mock.return_value = [
@@ -123,20 +134,21 @@
(5, 'dogpiled substring catch'),
(6, 'scruffy mangy dog, multiple in a line catch'),
]
- failures = pre_upload._check_keywords(ProjectNamed('PROJECT'),
- 'COMMIT')
+ failures = pre_upload._check_keywords(self.project, 'COMMIT')
self.assertNotEqual(failures, [])
self.assertEqual('Found a blocked keyword in:', failures[0].msg)
- self.assertEqual(['x.ebuild, line 1: Matched scruffy',
- 'x.ebuild, line 2: Matched dog.?pile',
- 'x.ebuild, line 3: Matched cat.?circle',
- 'x.ebuild, line 4: Matched dog.?pile',
- 'x.ebuild, line 5: Matched dog.?pile',
- 'x.ebuild, line 6: Matched mangy'],
- failures[0].items)
+ self.assertEqual(
+ ['x.ebuild, line 1: Matched "Scruffy" with regex of "scruffy"',
+ 'x.ebuild, line 2: Matched "dog-pile" with regex of "dog.?pile"',
+ 'x.ebuild, line 3: Matched "cat_circle" with regex of "cat.?circle"',
+ 'x.ebuild, line 4: Matched "dog pile" with regex of "dog.?pile"',
+ 'x.ebuild, line 5: Matched "dogpile" with regex of "dog.?pile"',
+ 'x.ebuild, line 6: Matched "mangy" with regex of "mangy"'],
+ failures[0].items)
self.assertEqual('Found a blocked keyword in:', failures[1].msg)
- self.assertEqual(['Commit message, line 3: Matched scruffy'],
- failures[1].items)
+ self.assertEqual(
+ ['Commit message, line 3: Matched "scruffy" with regex of "scruffy"'],
+ failures[1].items)
def test_block_option_cases(self):
self.desc_mock.return_value = 'Commit Message.\nLine 2 voldemort'
@@ -144,29 +156,27 @@
(1, 'Line with a new term voldemort.'),
(2, 'Line with only they who shall not be named.'),
]
- failures = pre_upload._check_keywords(ProjectNamed('PROJECT'),
+ failures = pre_upload._check_keywords(self.project,
'COMMIT', ['--block', 'voldemort'])
self.assertNotEqual(failures, [])
self.assertEqual('Found a blocked keyword in:', failures[0].msg)
- self.assertEqual(['x.ebuild, line 1: Matched voldemort'], failures[0].items)
+ self.assertEqual(
+ ['x.ebuild, line 1: Matched "voldemort" with regex of "voldemort"'],
+ failures[0].items)
self.assertEqual('Found a blocked keyword in:', failures[1].msg)
- self.assertEqual(['Commit message, line 2: Matched voldemort'],
- failures[1].items)
+ self.assertEqual(
+ ['Commit message, line 2: '
+ 'Matched "voldemort" with regex of "voldemort"'], failures[1].items)
def test_unblock_option_cases(self):
self.desc_mock.return_value = 'Commit message with scruffy'
self.diff_mock.return_value = [
- (1, 'Line with a now unblocked term scruffy'),
- (2, 'Line with without any blocked terms'),
- ]
- failures = pre_upload._check_keywords(ProjectNamed('PROJECT'),
- 'COMMIT', ['--unblock', 'scru.?fy'])
- self.assertEqual(failures, [])
- self.diff_mock.return_value = [
(1, 'Line with two unblocked terms scruffy big dog-pile'),
(2, 'Line with without any blocked terms'),
]
- failures = pre_upload._check_keywords(ProjectNamed('PROJECT'),
+ # scruffy matches regex of 'scruffy' in block list but excluded by
+ # different regex of 'scru.?fy' in unblock list.
+ failures = pre_upload._check_keywords(self.project,
'COMMIT', ['--unblock', 'dog.?pile',
'--unblock', 'scru.?fy'])
self.assertEqual(failures, [])
diff --git a/unblocked_terms.txt b/unblocked_terms.txt
new file mode 120000
index 0000000..9f7f7f4
--- /dev/null
+++ b/unblocked_terms.txt
@@ -0,0 +1 @@
+blocked_terms.txt
\ No newline at end of file