Add script to search blocked words in repo
Usage: search_blocked_words.sh [/path/to/word_list.txt]
Use case 1: Search all globally blocked words.
$ search_blocked_words.sh ~/trunk/src/repohooks/blocked_terms.txt
- This is useful when first introducing unblocked_terms.txt to a repo.
- This is also useful to verify the blocked words indeed do not exist.
Some CLs may have checked in blocked words using '--no-verify' flag
circumventing the repo check.
Use case 2: Search all words in the local unblock list.
$ search_blocked_words.sh
- This is useful to learn the current COIL status.
For example, running the script in src/platform2 results in:
reg_exp #lines #files #filenames
black.?hat 0 0 0
black.?list 47 12 0
build.?cop 0 0 0
crazy 12 4 1
cripple 0 0 0
dummy 902 186 3
first.?class.?citizen 0 0 0
grandfathered 0 0 0
gr[ae]y.?hat 0 0 0
gr[ae]y.?list 0 0 0
insane 0 0 0
\bhe\b 18 12 0
\bshe\b 2 2 0
\bhim\b 0 0 0
\bher\b 0 0 0
\bhis\b 1 1 0
\bhers\b 0 0 0
man.?in.?the.?middle 0 0 0
master 1616 216 16
mitm 11 6 0
native 932 238 4
red.?line 1 1 0
rtfm 1 1 0
sane 564 39 5
sanity 137 77 0
slave 260 49 1
white.?glove 0 0 0
white.?hat 0 0 0
white.?label 227 24 0
white.?list 244 59 4
wtf 26 4 0
BUG=b:165908442
TEST=See above.
Change-Id: Ic0ad12362883435ab33f8eb928f88602dad91375
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/dev-util/+/2404702
Tested-by: Daisuke Nojiri <dnojiri@chromium.org>
Reviewed-by: Mike Frysinger <vapier@chromium.org>
Reviewed-by: Daisuke Nojiri <dnojiri@chromium.org>
Commit-Queue: Daisuke Nojiri <dnojiri@chromium.org>
diff --git a/contrib/search_blocked_words.sh b/contrib/search_blocked_words.sh
new file mode 100755
index 0000000..6057139
--- /dev/null
+++ b/contrib/search_blocked_words.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# Copyright 2020 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Scripts to search blocked words.
+#
+# Usage: search_blocked_words.sh [/path/to/word_list.txt]
+#
+# Use case 1: Search all globally blocked words.
+#
+# $ search_blocked_words.sh ~/trunk/src/repohooks/blocked_terms.txt
+#
+# - This is useful when first introducing unblocked_terms.txt to a repo.
+# - This is also useful to verify the blocked words indeed do not exist.
+# Some CLs may have checked in blocked words using '--no-verify' flag
+# circumventing the repo check.
+#
+# Use case 2: Search all words in the local unblock list.
+#
+# $ search_blocked_words.sh
+#
+# - This is useful to learn the current COIL status.
+
+input_file=${1:-unblocked_terms.txt}
+if [[ ! -r "${input_file}" ]]; then
+ echo "Error: '${input_file}' not found."
+ echo "Have you copied repohooks/unblocked_terms.txt?"
+ echo "Read repohooks/README.md for more information."
+ exit 1
+fi
+
+all=$(git ls-tree -r --name-only HEAD)
+
+# Create list of paths without symlinks.
+paths=()
+while IFS= read -r path; do
+ [[ -L "${path}" ]] && continue
+ paths+=("${path}")
+done <<< "${all}"
+
+# Create lists of dirs, files, symlinks. Must be basename and have no duplicate.
+# We use xargs (instead of directly feeding to dirname or basename) to avoid
+# exceeding argument size limit.
+dirs=$(echo ${all} | xargs dirname | sort -u | xargs basename -a)
+files=$(echo ${all} | xargs basename -a)
+
+results=("reg_exp #lines #files #filenames")
+while read -r regex; do
+ # Skip blank and comment lines.
+ [[ -z "${regex}" ]] && continue
+ [[ "${regex}" == "#"* ]] && continue
+
+ # Count matching lines.
+ line_count=$(echo "${paths[@]}" | xargs grep -E -i -I -c "${regex}" \
+ | awk -F: '{ s+=$2 } END { print s }')
+
+ # Count matching paths.
+ path_count=$(echo "${paths[@]}" | xargs grep -E -i -I -l "${regex}" | wc -l)
+
+ # Count matching file & dir names.
+ name_count=$(echo "${files}" "${dirs}" | grep -E -i "${regex}" | wc -l)
+
+ # Save result.
+ results+=("${regex} ${line_count} ${path_count} ${name_count}")
+done < "${input_file}"
+
+# Present results.
+printf '%s\n' "${results[@]}" | column --table --table-right 2,3,4
+