Add script to find unused functions in autotest
By identifying unused functions, we can remove thousands of lines of
dead code, making Autotest easier to maintain and upgrade.
BUG=b:170967823
TEST=autotest_unused.py
TEST=autotest_unused.py -v -d server/cros/faft client/cros/faft
TEST=autotest_unused.py from outside chroot
Change-Id: Ib97631e249a1bdeaf2fa5cd718da34aa2f4a11a6
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crostestutils/+/2511110
Tested-by: Greg Edelston <gredelston@google.com>
Reviewed-by: Kevin Shelton <kmshelton@chromium.org>
Reviewed-by: Derek Beckett <dbeckett@chromium.org>
Commit-Queue: Kevin Shelton <kmshelton@chromium.org>
Auto-Submit: Greg Edelston <gredelston@google.com>
diff --git a/code_health/autotest_unused.py b/code_health/autotest_unused.py
new file mode 100755
index 0000000..fe920d2
--- /dev/null
+++ b/code_health/autotest_unused.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+# Copyright 2020 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Find unused functions within the Autotest repository.
+
+The Autotest repository is normally found in the chroot at
+~/trunk/src/third_party/autotest/files/. It is huge, and has lots of dead code:
+functions that were previously necessary, but are no longer used. This script
+serves to find functions and methods that now appear to be unused in Autotest.
+
+A few other repostories, enumerated in REPOS_CALLING_AUTOTEST, rely on
+functions defined in the Autotest repository. This script also checks those
+repositories when determining which functions are unused.
+
+It's hard to tell whether a function is called. To avoid breaking things, we're
+taking a conservative definition of what might be a function call. If all
+occurrences of the function's name appear to be function definitions—that is,
+lines starting with "def my_function("—then we say it's unused. Otherwise,
+assume it's used. As a result, this script will surely miss some unused
+functions. For example, if a function name is only used in a comment, we
+interpret that as the function being called. We're optimizing for safety here.
+"""
+
+import argparse
+import logging
+import os
+import re
+import subprocess
+import sys
+import time
+
+SRC_DIR = os.path.expanduser('~/trunk/src/')
+THIRD_PARTY_DIR = os.path.join(SRC_DIR, 'third_party')
+AUTOTEST_DIR = os.path.join(THIRD_PARTY_DIR, 'autotest', 'files')
+REPOS_CALLING_AUTOTEST = [
+ AUTOTEST_DIR,
+ os.path.join(SRC_DIR, 'platform', 'moblab', 'third_party', 'autotest'),
+ os.path.join(THIRD_PARTY_DIR, 'autotest-private'),
+ os.path.join(THIRD_PARTY_DIR, 'autotest-private-utils'),
+ os.path.join(THIRD_PARTY_DIR, 'autotest-tests-cheets'),
+ os.path.join(THIRD_PARTY_DIR, 'autotest-tests-lakitu'),
+ os.path.join(THIRD_PARTY_DIR, 'autotest-tests-kumo'),
+]
+
+
+def require_chroot():
+ """Exit with status 2 if not in the chroot."""
+ if not os.path.isfile('/etc/cros_chroot_version'):
+ logging.error('Must run script from inside chroot.')
+ sys.exit(2)
+
+
+def require_dirs():
+ """Exit with status 2 if the expected directories are not found."""
+ for path in [SRC_DIR, THIRD_PARTY_DIR] + REPOS_CALLING_AUTOTEST:
+ if not os.path.isdir(path):
+ logging.error('Path not found: %s', os.path.abspath(path))
+ sys.exit(2)
+
+
+def parse_args(argv):
+ """Interpret command-line args.
+
+ Args:
+ argv: A list of args passed into the script in the command-line.
+ Should exclude the script name itself.
+ Normally, this should be set to sys.argv[1:].
+
+ Returns:
+ argparse.Namespace with the following attributes:
+ def_dirs: A list of directories to search recursively for function
+ definitions, defined relative to AUTOTEST_DIR.
+ Defaults to all of Autotest.
+ verbose: Bool representing whether STDOUT should see debug logs.
+
+ Raises:
+ FileNotFoundError: If any of def_dirs cannot be found in AUTOTEST_DIR
+ """
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-d',
+ '--def-dirs',
+ nargs='*',
+ default=[''],
+ help='Directory/ies to search recursively for function '
+ 'definitions, defined relative to Autotest.')
+ parser.add_argument('-v',
+ '--verbose',
+ action='store_true',
+ help='Include verbose (debugging) logs in stdout')
+ args = parser.parse_args(argv)
+ for def_dir in args.def_dirs:
+ full_path = os.path.join(AUTOTEST_DIR, def_dir)
+ if not os.path.isdir(full_path):
+ raise FileNotFoundError('def-dir %s not found' % full_path)
+ return args
+
+
+def grep(pattern, paths, exclude_unittests=False):
+ """Run rg (ripgrep). Return a list of lines matching the pattern."""
+ if not isinstance(paths, list):
+ raise ValueError('rg expects paths to be a list; got %s' % paths)
+ flags = [
+ '--no-heading',
+ '--no-filename',
+ '--no-line-number',
+ # Require word-boundaries.
+ '-w',
+ # Always exclude fw-testing-configs/.
+ '--iglob',
+ '\'!**/server/cros/faft/fw-testing-configs/**\'',
+ ]
+ if exclude_unittests:
+ flags += ['--iglob', '\'!*_unittest.py\'']
+ cmd = 'rg %s \'%s\' %s' % (' '.join(flags), pattern, ' '.join(paths))
+ proc = subprocess.run(cmd,
+ stdout=subprocess.PIPE,
+ universal_newlines=True,
+ shell=True)
+ if proc.returncode == 1:
+ return ''
+ proc.check_returncode()
+ return proc.stdout.strip()
+
+
+def find_function_defs(search_dirs):
+ """Find a list of all functions (not methods) defined in certain files.
+
+ Args:
+ files: A list of Python files to search.
+
+ Returns:
+ A list of strings, each representing the name of a function defined in
+ one of the .py files.
+ """
+ # Regex will search for:
+ # 1. Start of line
+ # 2. Any amount of whitespace (including none)
+ # 3. The string literal 'def '
+ # 4. Any amount of word characters
+ # In short, it searches for function/method definitions.
+ def_lines = grep(r'^\s*def \w+', search_dirs, exclude_unittests=True)
+ funcs = set()
+ for line in def_lines.split('\n'):
+ if not line:
+ continue
+ # Regex will try to match, at the start of the line:
+ # 1. Any amount of whitespace (including none)
+ # 2. The string literal 'def'
+ # 3. Any nonzero amount of whitespace
+ # 4. Any nonzero amount of word characters -- and capture this group
+ # 5. Optionally, any amount of whitespace
+ # 6. An open parenthesis
+ # In short, it searches for function/method definitions,
+ # and captures the name of the function/method.
+ match = re.match(r'\s*def\s+(\w+)\s*\(', line)
+ if match is None:
+ logging.warning('Failed to find function def in rg line "%s"', line)
+ continue
+ funcs.add(match.group(1))
+ return funcs
+
+
+def is_function_used_in_dir(func, search_dir):
+ """Determine if a function is used in search_dir, besides defining it."""
+ try:
+ called_lines = grep(r'%s' % func, [search_dir]).split('\n')
+ except UnicodeDecodeError:
+ return False
+ # If any line uses the function name but doesn't define it,
+ # then count the function as used.
+ for line in called_lines:
+ if not line:
+ continue
+ if re.match(r'\s*def %s' % func, line) is None:
+ return True
+ return False
+
+
+def is_function_used(func):
+ """Determine if a function is used anywhere."""
+ for search_dir in REPOS_CALLING_AUTOTEST:
+ if is_function_used_in_dir(func, search_dir):
+ return True
+ return False
+
+
+def time_since(start_time):
+ """Calculate the amount of time since start_time."""
+ return time.time() - start_time
+
+
+def main(argv):
+ """Find all unused functions in requested subdirs of Autotest."""
+ require_chroot()
+ require_dirs()
+ args = parse_args(argv)
+ logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
+
+ # Find which functions we're going to check.
+ def_dirs = [os.path.join(AUTOTEST_DIR, d) for d in args.def_dirs]
+ logging.debug('Searching for function definitions in these dirs:')
+ for def_dir in def_dirs:
+ logging.debug('\t%s', def_dir)
+ funcs = find_function_defs(def_dirs)
+ logging.debug('Found %d unique function names', len(funcs))
+
+ # Search every function in all search directories.
+ unused_funcs = set()
+ start_time = time.time()
+ for i, func in enumerate(funcs):
+ if not is_function_used(func):
+ unused_funcs.add(func)
+ logging.debug('Unused function: %s', func)
+ # Give some feedback to the user.
+ if (i+1) % 100 == 0:
+ time_per_func = time_since(start_time) / (i+1)
+ time_remaining = time_per_func * (len(funcs) - i)
+ logging.info('Searched %d/%d; %d unused. %.2f seconds remain.',
+ i+1,
+ len(funcs),
+ len(unused_funcs),
+ time_remaining)
+ logging.debug('Finished in %.2f seconds.', time_since(start_time))
+ if unused_funcs:
+ logging.info('Unused functions: %s', str(unused_funcs))
+ logging.info('Total: %d', len(unused_funcs))
+ else:
+ logging.info('No unused functions detected.')
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])