Add script to search blocked words in repo Usage: search_blocked_words.sh [/path/to/word_list.txt] Use case 1: Search all globally blocked words. $ search_blocked_words.sh ~/trunk/src/repohooks/blocked_terms.txt - This is useful when first introducing unblocked_terms.txt to a repo. - This is also useful to verify the blocked words indeed do not exist. Some CLs may have checked in blocked words using '--no-verify' flag circumventing the repo check. Use case 2: Search all words in the local unblock list. $ search_blocked_words.sh - This is useful to learn the current COIL status. For example, running the script in src/platform2 results in: reg_exp #lines #files #filenames black.?hat 0 0 0 black.?list 47 12 0 build.?cop 0 0 0 crazy 12 4 1 cripple 0 0 0 dummy 902 186 3 first.?class.?citizen 0 0 0 grandfathered 0 0 0 gr[ae]y.?hat 0 0 0 gr[ae]y.?list 0 0 0 insane 0 0 0 \bhe\b 18 12 0 \bshe\b 2 2 0 \bhim\b 0 0 0 \bher\b 0 0 0 \bhis\b 1 1 0 \bhers\b 0 0 0 man.?in.?the.?middle 0 0 0 master 1616 216 16 mitm 11 6 0 native 932 238 4 red.?line 1 1 0 rtfm 1 1 0 sane 564 39 5 sanity 137 77 0 slave 260 49 1 white.?glove 0 0 0 white.?hat 0 0 0 white.?label 227 24 0 white.?list 244 59 4 wtf 26 4 0 BUG=b:165908442 TEST=See above. Change-Id: Ic0ad12362883435ab33f8eb928f88602dad91375 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/dev-util/+/2404702 Tested-by: Daisuke Nojiri <dnojiri@chromium.org> Reviewed-by: Mike Frysinger <vapier@chromium.org> Reviewed-by: Daisuke Nojiri <dnojiri@chromium.org> Commit-Queue: Daisuke Nojiri <dnojiri@chromium.org>

commit: 1cb8372782a3afa1b5a644560a3b3d26bb6e2e87 [log] [tgz]
author: Daisuke Nojiri <dnojiri@chromium.org> Thu Sep 10 12:54:58 2020 -0700
committer: Commit Bot <commit-bot@chromium.org> Sat Sep 12 23:15:04 2020 +0000
tree: 4e5ebbf1762ef3e49fedf0dc8c9ce2e644881025
parent: 4363d83e98a36b2825af2c1e8da72b093ebfd72e [diff]
diff --git a/contrib/search_blocked_words.sh b/contrib/search_blocked_words.sh
new file mode 100755
index 0000000..6057139
--- /dev/null
+++ b/contrib/search_blocked_words.sh

@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# Copyright 2020 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Scripts to search blocked words.
+#
+# Usage: search_blocked_words.sh [/path/to/word_list.txt]
+#
+# Use case 1: Search all globally blocked words.
+#
+#   $ search_blocked_words.sh ~/trunk/src/repohooks/blocked_terms.txt
+#
+# - This is useful when first introducing unblocked_terms.txt to a repo.
+# - This is also useful to verify the blocked words indeed do not exist.
+#   Some CLs may have checked in blocked words using '--no-verify' flag
+#   circumventing the repo check.
+#
+# Use case 2: Search all words in the local unblock list.
+#
+#   $ search_blocked_words.sh
+#
+# - This is useful to learn the current COIL status.
+
+input_file=${1:-unblocked_terms.txt}
+if [[ ! -r "${input_file}" ]]; then
+  echo "Error: '${input_file}' not found."
+  echo "Have you copied repohooks/unblocked_terms.txt?"
+  echo "Read repohooks/README.md for more information."
+  exit 1
+fi
+
+all=$(git ls-tree -r --name-only HEAD)
+
+# Create list of paths without symlinks.
+paths=()
+while IFS= read -r path; do
+  [[ -L "${path}" ]] && continue
+  paths+=("${path}")
+done <<< "${all}"
+
+# Create lists of dirs, files, symlinks. Must be basename and have no duplicate.
+# We use xargs (instead of directly feeding to dirname or basename) to avoid
+# exceeding argument size limit.
+dirs=$(echo ${all} | xargs dirname | sort -u | xargs basename -a)
+files=$(echo ${all} | xargs basename -a)
+
+results=("reg_exp #lines #files #filenames")
+while read -r regex; do
+  # Skip blank and comment lines.
+  [[ -z "${regex}" ]] && continue
+  [[ "${regex}" == "#"* ]] && continue
+
+  # Count matching lines.
+  line_count=$(echo "${paths[@]}" | xargs grep -E -i -I -c "${regex}" \
+      | awk -F: '{ s+=$2 } END { print s }')
+
+  # Count matching paths.
+  path_count=$(echo "${paths[@]}" | xargs grep -E -i -I -l "${regex}" | wc -l)
+
+  # Count matching file & dir names.
+  name_count=$(echo "${files}" "${dirs}" | grep -E -i "${regex}" | wc -l)
+
+  # Save result.
+  results+=("${regex} ${line_count} ${path_count} ${name_count}")
+done < "${input_file}"
+
+# Present results.
+printf '%s\n' "${results[@]}" | column --table --table-right 2,3,4
+
commit	1cb8372782a3afa1b5a644560a3b3d26bb6e2e87	[log] [tgz]
author	Daisuke Nojiri <dnojiri@chromium.org>	Thu Sep 10 12:54:58 2020 -0700
committer	Commit Bot <commit-bot@chromium.org>	Sat Sep 12 23:15:04 2020 +0000
tree	4e5ebbf1762ef3e49fedf0dc8c9ce2e644881025
parent	4363d83e98a36b2825af2c1e8da72b093ebfd72e [diff]