cryptohome/error/tool/location_db.py - mirrors/cros/chromiumos/platform2 - Git at Google

 #!/usr/bin/env python3

 # Copyright 2022 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """This tool maintains locations.h.

 This tool generates and maintains the data in error/locations.h and also
 verifies that the usage of error location is correct.
 """

 import argparse
 import bisect
 import logging
 import operator
 import os
 import os.path
 import re
 import subprocess
 import sys


 class Symbol:
     """Represents a symbol for error location."""

     HEADER_TEMPLATE = ('/* %s */\n'
                        '%s = %d,\n')

     def __init__(self, symbol):
         """Constructor for Symbol.

         Args:
             symbol (str): The representation of the symbol.
         """

         # 'symbol' is the string that represents the symbol.
         # It is the identifier used in the C/C++ source.
         self.symbol = symbol

         # 'allow_dup' is set to true if the configuration file specifically
         # allows this symbol to be used multiple times in the source file.
         self.allow_dup = False

         # 'line_num' is the list of line numbers at which this symbol
         # appeared in the source file. It corresponds 1:1 with
         # self.index_in_file and self.path.
         self.line_num = []

         # 'index_in_file' is the location of the symbol in the source file, in
         # number of characters. It is a list and each element corresponds 1:1
         # with self.line_num and self.path.
         self.index_in_file = []

         # 'path' is the path to the file. It is a list and each element
         # corresponds 1:1 with self.index_in_file and self.line_num.
         self.path = []

         # 'value' is the numeric value of the symbol, if one is assigned.
         # It is the value for the enum in the generated file.
         self.value = None

     def generate_lines(self):
         """Generates the lines for this symbol in locations.h.

         Returns:
             A list of strings that is the lines to be placed in locations.h.
         """

         assert self.value is not None
         return [Symbol.HEADER_TEMPLATE % (self._generate_comments(),
                                           self.symbol, self.value),]

     def _generate_comments(self):
         if self.allow_dup:
             return '=Duplicate Allowed='
         if len(self.line_num) == 0 and len(self.path) == 0:
             return '=Obsolete='
         assert len(self.line_num) == 1 and len(self.path) == 1
         return '%s' % (self.path[0],)

     def merge(self, target):
         """Merges information from another symbol into this object.

         The caller is responsible for destroying target after the call.
         There's no guarantee on the state of target after the call.

         Args:
             target: Another Symbol.
         """

         assert self.symbol == target.symbol
         assert self.allow_dup == target.allow_dup
         assert (len(self.line_num) == len(self.index_in_file) and
                 len(self.line_num) == len(self.path))
         assert (len(target.line_num) == len(target.index_in_file) and
                 len(target.line_num) == len(target.path))

         self.line_num += target.line_num
         self.path += target.path
         self.index_in_file += target.index_in_file

         if self.value is not None:
             assert target.value is None
         else:
             self.value = target.value

     def __str__(self):
         locs = ','.join(['%s:%d' % x for x in
                          zip(self.path, self.line_num)])
         result = '%s=%s @ %s' % (self.symbol, self.value, locs)
         if self.allow_dup:
             result += ' duplicates allowed'
         return result


 class LineNumberFinder:
     """This converts index in file to line number.

     This utility converts position in the file into line number.
     Each instance represents a file.
     """

     def __init__(self, content):
         """Constructor for LineNumberFinder.

         Args:
             content (str): The content of the file.
         """

         # '_content' is the content of the file in string format.
         self._content = content

         # '_line_num_of_index' is the mapping from line number to index.
         # -1 here so that binary search is guaranteed to be bounded and that
         # the line number starts from 1.
         self._line_num_of_index = [-1, 0,]

         self._preprocess()

     def _preprocess(self):
         """Populate self._line_num_of_index."""
         self._line_num_of_index.extend(i for i, c in enumerate(self._content)
                                        if c == '\n')
         self._line_num_of_index.append(len(self._content))

     def find_by_pos(self, idx):
         """Find the line that idx char is on.

         Args:
             idx (int): The location in number of characters.

         Returns:
             An integer that is the line number, it starts from 1.
         """
         return bisect.bisect_right(self._line_num_of_index, idx)-1


 class SourceScanner:
     """This scans for error location usage in the source"""

     ERROR_LOC_USAGE_RE = r'CRYPTOHOME_ERR_LOC\(\s*([a-zA-Z][a-zA-Z0-9]*)\s*\)'

     @staticmethod
     def scan_single_file(path):
         """Scan a single file for error location usage.

         Args:
             path (str): The path to the file to scan.

         Returns:
             A list of Symbol, representing the symbols found in the given file.
         """

         logging.debug('Scanning file %s', path)
         with open(path, 'r') as f:
             content = f.read()
         linenum_util = LineNumberFinder(content)
         results = []

         # Search for the target string in the source file.
         pat = re.compile(SourceScanner.ERROR_LOC_USAGE_RE)
         for m in pat.finditer(content):
             loc_name = m.group(1)
             loc_pos = m.start(1)
             symbol = Symbol(loc_name)
             symbol.path.append(path)
             symbol.index_in_file.append(loc_pos)
             symbol.line_num.append(linenum_util.find_by_pos(loc_pos))
             results.append(symbol)

         return results

     @staticmethod
     def scan_directory(path, allowed_ext):
         """Scan a directory recursively for error location usage.

         Args:
             path (str): The path to the directory to scan.
             allowed_ext (Set[str]): Allowed extensions.
                 Only scan files with extensions in the Set.

         Returns:
             A list of Symbol, representing the symbols found in the directory.
         """

         logging.debug('Scanning directory %s', path)
         result = []
         for f in os.scandir(path):
             if f.is_dir():
                 result += SourceScanner.scan_directory(f.path, allowed_ext)
                 continue
             if (f.is_file() and
                     os.path.splitext(f.name.lower())[1] in allowed_ext):
                 result += SourceScanner.scan_single_file(f.path)
         return result


 class Verifier:
     """Verifies the result from scanning.

     This is used to verify that the result from scanner is the correct usage
     for error location, i.e. there are no duplications outside of the allowed
     ones. It also helps to collate the various symbols together.
     """

     def __init__(self, dup_allowlist):
         """Constructor for Verifier.

         Args:
             dup_allowlist (Set[str]): a set of symbol representation that is in
             the duplication allowlist. If a symbol is in the allowlist, then
             that string can be used multiple times in the source file.
         """

         # '_dup_allowlist' is the duplication allowlist, see comment above.
         self._dup_allowlist = dup_allowlist

     def _update_allow_dup_in_symbols(self, symbols):
         """Update .allow_dup for all symbol in symbols."""

         for sym in symbols:
             if sym.symbol in self._dup_allowlist:
                 sym.allow_dup = True

     def collate_and_verify(self, input_symbols):
         """Collate the list of symbols and check for duplications.

         This function collate the symbols by merging the same symbol in the
         `input_symbols` list, and check to see if there's any duplicate for
         symbols not in the `self._dup_allowlist`.

         Args:
             input_symbols (List[Symbol]): A list of symbols from the codebase.

         Returns:
             Tuple[Dict[str, Symbol], Dict[str, Symbol]]: Tuple of
             collated_symbols and violating_dup. `collated_symbols` is the
             collated symbols after removing duplicates. `violating_dup` is the
             set of symbols that are duplicated and not in the allow list.
         """

         self._update_allow_dup_in_symbols(input_symbols)
         collated_symbols = {}
         violating_dup = {}
         for sym in input_symbols:
             dup = collated_symbols.get(sym.symbol)
             if dup:
                 if sym.allow_dup:
                     dup.merge(sym)
                 else:
                     dup.merge(sym)
                     violating_dup[sym.symbol] = dup
             else:
                 # No duplicates.
                 collated_symbols[sym.symbol] = sym
         return collated_symbols, violating_dup


 class LocationDB:
     """Database in locations.h

     This class manages the mapping between error location symbol and their
     values in locations.h.
     """

     GENERATED_START = ('// Start of generated content. '
                        'Do NOT modify after this line.')
     GENERATED_END = '// End of generated content.'
     EXISTING_RECORDS_RE = (r'\/\*\s*([a-zA-Z0-9:/_.= \n]|\s)*\s*\*\/\s*'
                            r'\s+([a-zA-Z][a-zA-Z0-9]*)\s*\=\s*([0-9]+)\s*,')

     def __init__(self, path, dup_allowlist):
         """Constructor for LocationDB.

         Args:
             path (str): The path to locations.h.
             dup_allowlist (Set[str]): Duplication allowlist, see
             Verifier.__init__'s documentation.
         """

         # 'path' is the path to locations.h.
         self.path = path

         # '_dup_allowlist' is a set that holds the allowlist of symbols that
         # can be used multiple times in the source tree. See Verifier.__init__
         # for more info.
         self._dup_allowlist = dup_allowlist

         # 'symbols' is a dict that maps the symbol's representation (as a str)
         # to the Symbol object. It is None if we are not loaded yet.
         self.symbols = None

         # 'value_to_symbol' is a dict that maps the symbol's value (the integer
         # value of the enum) to the Symbol object. It is None if we are not
         # loaded yet.
         self.value_to_symbol = None

         # '_lines' holds the content of the locations.h file. It is None if we
         # are not loaded yet.
         self._lines = None

         # '_start_line' is the line number in locations.h at which the enum
         # section starts. It is None if we are not loaded yet.
         # Line number starts from 1.
         self._start_line = None

         # '_end_line' is the line number in locations.h at which the enum
         # section ends. It is None if we are not loaded yet.
         # Line number starts from 1.
         self._end_line = None

         # '_next_value' is the next available enum value in locations.h.
         # It is None if we are not loaded yet.
         self._next_value = None

     def _find_generated_marker(self):
         """Finds and sets the start and end of generated marker.

         Returns:
             bool: True iff only one pair of generated marker is found.
         """

         self._start_line = None
         self._end_line = None
         for line_num_index, line in enumerate(self._lines):
             line_num = line_num_index + 1
             if line.strip() == LocationDB.GENERATED_START:
                 if self._start_line is not None:
                     logging.error(('Multiple generated starting marker at %d'
                                    ' and %d'), self._start_line, line_num)
                     return False
                 self._start_line = line_num
             if line.strip() == LocationDB.GENERATED_END:
                 if self._end_line is not None:
                     logging.error(('Multiple generated ending marker at %d '
                                    'and %d'), self._end_line, line_num)
                     return False
                 self._end_line = line_num
         if self._start_line is None:
             logging.error('No generated starting marker in locations.h')
             return False
         if self._end_line is None:
             logging.error('No generated ending marker in locations.h')
             return False
         return True

     def _scan_for_existing_records(self, content):
         """Parse all existing records in 'content'."""
         pat = re.compile(LocationDB.EXISTING_RECORDS_RE)
         self.symbols = {}
         for m in pat.finditer(content):
             s = Symbol(m.group(2))
             s.value = int(m.group(3))
             self.symbols[s.symbol] = s
         return len(self.symbols)

     def _build_reverse_map(self):
         """Populate self.value_to_symbol."""
         self.value_to_symbol = {}
         for sym in self.symbols:
             value = self.symbols[sym].value
             if value is not None:
                 # symbols are guaranteed to be unique in existing locations.h.
                 assert value not in self.value_to_symbol
                 self.value_to_symbol[value] = sym

     def _get_generated_lines(self):
         assert self._start_line is not None
         assert self._end_line is not None
         return '\n'.join(self._lines[self._start_line:self._end_line-1])

     def load(self):
         """Load from locations.h.

         This method will load the content of locations.h from `self.path`.

         Returns:
             bool: True if successful.
         """

         with open(self.path, 'r') as f:
             self._lines = f.readlines()
         if not self._find_generated_marker():
             return False
         self._scan_for_existing_records(self._get_generated_lines())
         self._build_reverse_map()
         return True

     def _generate_header_lines(self):
         symbols_list = [s for s in self.symbols.values()]
         symbols_list.sort(key=operator.attrgetter('value'))
         return [sym.generate_lines() for sym in symbols_list]

     def store(self):
         """Save the state in this object back into locations.h.

         This method will convert the state in this object into string content
         to be written back into locations.h, then it'll write the result into
         `self.path`.
         """

         assert self._lines is not None and self.symbols is not None
         result_lines = []
         # Include the portion that is before the generated content.
         result_lines += self._lines[0:self._start_line]
         # Add the generated portion
         result_lines += sum(self._generate_header_lines(), [])
         # Include the portion that is after the generated content.
         result_lines += self._lines[self._end_line-1:]
         with open(self.path, 'w') as f:
             f.write(''.join(result_lines))
         # Invalidate the variables to ensure stale data isn't left behind.
         self.symbols = None
         self._lines = None
         self._start_line = None
         self._end_line = None
         self._next_value = None
         self.value_to_symbol = None
         # Format the result
         subprocess.call(['clang-format', '-i', self.path])

     def update_from_scan_result(self, result):
         """Update the state of this object.

         This method will update the internal state within this object from
         the Symbols found in `result`.

         Args:
             result (Dict[str, Symbol]): A dict of symbols found in source tree.
         """

         # Clear relevant fields in the current symbols.
         # The enums start at 100 because we want to reserve the first 100 enum
         # in case there's any special use case.
         self._next_value = 100
         for sym in self.symbols.values():
             sym.line_num = []
             sym.path = []
             sym.index_in_file = []
             sym.allow_dup = sym.symbol in self._dup_allowlist
             if sym.value:
                 self._next_value = max(self._next_value, sym.value+1)

         for sym in result.values():
             if sym.symbol in self.symbols:
                 self.symbols[sym.symbol].merge(sym)
             else:
                 self.symbols[sym.symbol] = sym
                 self.symbols[sym.symbol].value = self._next_value
                 self._next_value += 1

         self._build_reverse_map()


 class DBTool:
     """Bridge for various classes above.

     This class is in charge of calling the various classes above and bridge
     their input/outputs to each other.
     """

     ALLOWED_SRC_EXT = frozenset({'.cc', '.h'})
     SCAN_DENYLIST = frozenset({'./error/location_utils.h'})
     LOCATIONS_H_PATH = './error/locations.h'

     def __init__(self, allowlist_path):
         """Constructor for DBTool.

         Args:
             allowlist_path (str): The path to the file that stores the content
             of dup_allowlist. Each line is a symbol that is in the allowlist,
             thus each line is a symbol that can appear multiple times in the
             code base.
         """

         # 'db_path' is the path to locations.h.
         self.db_path = DBTool.LOCATIONS_H_PATH

         # 'allowlist_path' is the path to duplication allowlist configuration.
         # See comment in DBTool.__init__() above.
         self.allowlist_path = allowlist_path

         # '_dup_allowlist' is the duplication allowlist, see comment in
         # Verifier.__init__().
         self._dup_allowlist = set({})
         self._load_dup_allowlist()

         # 'verifier' is an instance of Verifier for verifying symbols.
         self.verifier = Verifier(self._dup_allowlist)

         # 'db' is an instance of LocationDB for loading/storing locations.h.
         self.db = LocationDB(self.db_path, self._dup_allowlist)

     def _load_dup_allowlist(self):
         """Load self._dup_allowlist from file."""

         with open(self.allowlist_path, 'r') as f:
             lines = f.readlines()
         lines = [line.strip() for line in lines]
         lines = [line for line in lines if len(line) > 0 and line[0] != '#']
         for line in lines:
             self._dup_allowlist.add(line)

     def check_sources(self):
         """Scan the codebase and check for errors.

         Returns:
             Tuple[bool, Dict[str, Symbol]]: Returns (success, symbols), where
             by success is a True iff the operation is successful and there's no
             error found, and in that case Symbol will be the symbols found in
             the code base.
         """

         # Scan for all symbols.
         all_symbols = SourceScanner.scan_directory('.', DBTool.ALLOWED_SRC_EXT)
         all_symbols = [r for r in all_symbols
                        if r.path[0] not in DBTool.SCAN_DENYLIST]
         collated_symbols, violations = self.verifier.collate_and_verify(
             all_symbols)
         # Notify the user on any violations.
         if len(violations) != 0:
             print('Please remove duplicate usage of error location in code:')
             for s in violations:
                 print(violations[s])
             return False, None
         return True, collated_symbols

     def update_location_db(self):
         """Scan the code base and update locations.h

         Scan the code base for all usage of error symbols, then process them
         to see if there's any error. If there's no error, update locations.h.

         Returns:
             bool: True if successful.
         """

         if not self._load_full_db():
             return False
         self.db.store()

         return True

     def _load_full_db(self):
         success, symbols = self.check_sources()
         if not success:
             return False

         # Load the content of the locations.h
         self.db.load()
         self.db.update_from_scan_result(symbols)
         return True

     def lookup_symbol(self, value):
         """Print the usage location for an error ID node.

         Given an error ID node, as in, a symbol, locate where it is used and
         print it out.

         Args:
             value (str): The symbol.
         """

         self._load_full_db()
         if value not in self.db.value_to_symbol:
             print('Value %s not found' % value)
             return False
         symbol = self.db.symbols[self.db.value_to_symbol[value]]
         print('Value %s is %s and can be found at:' % (symbol.value,
                                                        symbol.symbol))
         for path, line in zip(symbol.path, symbol.line_num):
             print('%s:%d'  % (path, line))
         return True

     def decode_stack(self, locs):
         """Print the stack for an error ID.

         Given an error ID (dash-separated symbols), decode the symbols and
         print out their location in the code base.

         Args:
             locs (str): A dash-separated symbols string.
         """

         self._load_full_db()

         stack = [int(x) for x in locs.split('-')]
         for val in stack:
             if val not in self.db.value_to_symbol:
                 print('Value %s not found' % val)
             else:
                 symbol = self.db.symbols[self.db.value_to_symbol[val]]
                 print('%s' % (symbol,))


 class DBToolCommandLine:
     """This class handles the command line operations for the tool."""

     def __init__(self):
         """Constructor for DBToolCommandLine."""

         # 'parser' is an ArgumentParser instance for parsing command line
         # arguments.
         self.parser = None

         # 'args' is the arguments parsed by self.parser.
         self.args = None

         # 'db_tool' is an instance of DBTool for carrying out the operations
         # specified in arguments.
         self.db_tool = None

         # 'allowlist_path' is the path to the duplication allowlist
         # configuration.
         self.allowlist_path = None

     def _setup_logging(self):
         logging.basicConfig(level=logging.INFO)

     def _parse_args(self):
         self.parser = argparse.ArgumentParser(description=
                                               'Tool for handling error '
                                               'location in locations.h')
         self.parser.add_argument('--update',
                                  help=('Scan the source directory'
                                        ' and update the locations.h db'),
                                  action='store_true')
         self.parser.add_argument('--check',
                                  help=('Scan the source directory and check '
                                        'that cryptohome error is used '
                                        'correctly.'),
                                  action='store_true')
         self.parser.add_argument('--lookup',
                                  help='Lookup a single error location code',
                                  default=None)
         self.parser.add_argument('--decode',
                                  help=('Decode a stack of error location, ex.'
                                        '42-7-15'),
                                  default=None)
         self.parser.add_argument('--src',
                                  help=('The cryptohome source '
                                        'directory'), default=None)
         self.args = self.parser.parse_args()

     def _goto_srcdir(self):
         assert self.args is not None
         srcdir = self.args.src
         if srcdir is None:
             srcdir = os.path.join(os.path.dirname(__file__), '..', '..')
         srcdir = os.path.abspath(srcdir)
         logging.info('Using cryptohome source at: %s', srcdir)
         os.chdir(srcdir)

     def _get_dup_allowlist_path(self):
         path = os.path.join(os.path.dirname(__file__), 'dup_allowlist.txt')
         path = os.path.abspath(path)
         return path

     def main(self):
         """The main function for this command line tool.

         Returns:
             int: The exit code.
         """
         self._parse_args()
         self._setup_logging()
         self.allowlist_path = self._get_dup_allowlist_path()
         self._goto_srcdir()
         self.db_tool = DBTool(self.allowlist_path)
         if self.args.update:
             self.db_tool.update_location_db()
         elif self.args.check:
             result, _ = self.db_tool.check_sources()
             if not result:
                 return 1
         elif self.args.lookup is not None:
             self.db_tool.lookup_symbol(int(self.args.lookup))
         elif self.args.decode is not None:
             self.db_tool.decode_stack(self.args.decode)
         else:
             logging.error('No action specified, please see --help')
             return 1
         return 0


 # Invoke the main function for the tool.
 if __name__ == '__main__':
     cmdline = DBToolCommandLine()
     return_value = cmdline.main()
     sys.exit(return_value)
	#!/usr/bin/env python3

	# Copyright 2022 The Chromium OS Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""This tool maintains locations.h.

	This tool generates and maintains the data in error/locations.h and also
	verifies that the usage of error location is correct.
	"""

	import argparse
	import bisect
	import logging
	import operator
	import os
	import os.path
	import re
	import subprocess
	import sys


	class Symbol:
	"""Represents a symbol for error location."""

	HEADER_TEMPLATE = ('/* %s */\n'
	'%s = %d,\n')

	def __init__(self, symbol):
	"""Constructor for Symbol.

	Args:
	symbol (str): The representation of the symbol.
	"""

	# 'symbol' is the string that represents the symbol.
	# It is the identifier used in the C/C++ source.
	self.symbol = symbol

	# 'allow_dup' is set to true if the configuration file specifically
	# allows this symbol to be used multiple times in the source file.
	self.allow_dup = False

	# 'line_num' is the list of line numbers at which this symbol
	# appeared in the source file. It corresponds 1:1 with
	# self.index_in_file and self.path.
	self.line_num = []

	# 'index_in_file' is the location of the symbol in the source file, in
	# number of characters. It is a list and each element corresponds 1:1
	# with self.line_num and self.path.
	self.index_in_file = []

	# 'path' is the path to the file. It is a list and each element
	# corresponds 1:1 with self.index_in_file and self.line_num.
	self.path = []

	# 'value' is the numeric value of the symbol, if one is assigned.
	# It is the value for the enum in the generated file.
	self.value = None

	def generate_lines(self):
	"""Generates the lines for this symbol in locations.h.

	Returns:
	A list of strings that is the lines to be placed in locations.h.
	"""

	assert self.value is not None
	return [Symbol.HEADER_TEMPLATE % (self._generate_comments(),
	self.symbol, self.value),]

	def _generate_comments(self):
	if self.allow_dup:
	return '=Duplicate Allowed='
	if len(self.line_num) == 0 and len(self.path) == 0:
	return '=Obsolete='
	assert len(self.line_num) == 1 and len(self.path) == 1
	return '%s' % (self.path[0],)

	def merge(self, target):
	"""Merges information from another symbol into this object.

	The caller is responsible for destroying target after the call.
	There's no guarantee on the state of target after the call.

	Args:
	target: Another Symbol.
	"""

	assert self.symbol == target.symbol
	assert self.allow_dup == target.allow_dup
	assert (len(self.line_num) == len(self.index_in_file) and
	len(self.line_num) == len(self.path))
	assert (len(target.line_num) == len(target.index_in_file) and
	len(target.line_num) == len(target.path))

	self.line_num += target.line_num
	self.path += target.path
	self.index_in_file += target.index_in_file

	if self.value is not None:
	assert target.value is None
	else:
	self.value = target.value

	def __str__(self):
	locs = ','.join(['%s:%d' % x for x in
	zip(self.path, self.line_num)])
	result = '%s=%s @ %s' % (self.symbol, self.value, locs)
	if self.allow_dup:
	result += ' duplicates allowed'
	return result


	class LineNumberFinder:
	"""This converts index in file to line number.

	This utility converts position in the file into line number.
	Each instance represents a file.
	"""

	def __init__(self, content):
	"""Constructor for LineNumberFinder.

	Args:
	content (str): The content of the file.
	"""

	# '_content' is the content of the file in string format.
	self._content = content

	# '_line_num_of_index' is the mapping from line number to index.
	# -1 here so that binary search is guaranteed to be bounded and that
	# the line number starts from 1.
	self._line_num_of_index = [-1, 0,]

	self._preprocess()

	def _preprocess(self):
	"""Populate self._line_num_of_index."""
	self._line_num_of_index.extend(i for i, c in enumerate(self._content)
	if c == '\n')
	self._line_num_of_index.append(len(self._content))

	def find_by_pos(self, idx):
	"""Find the line that idx char is on.

	Args:
	idx (int): The location in number of characters.

	Returns:
	An integer that is the line number, it starts from 1.
	"""
	return bisect.bisect_right(self._line_num_of_index, idx)-1


	class SourceScanner:
	"""This scans for error location usage in the source"""

	ERROR_LOC_USAGE_RE = r'CRYPTOHOME_ERR_LOC\(\s([a-zA-Z][a-zA-Z0-9])\s*\)'

	@staticmethod
	def scan_single_file(path):
	"""Scan a single file for error location usage.

	Args:
	path (str): The path to the file to scan.

	Returns:
	A list of Symbol, representing the symbols found in the given file.
	"""

	logging.debug('Scanning file %s', path)
	with open(path, 'r') as f:
	content = f.read()
	linenum_util = LineNumberFinder(content)
	results = []

	# Search for the target string in the source file.
	pat = re.compile(SourceScanner.ERROR_LOC_USAGE_RE)
	for m in pat.finditer(content):
	loc_name = m.group(1)
	loc_pos = m.start(1)
	symbol = Symbol(loc_name)
	symbol.path.append(path)
	symbol.index_in_file.append(loc_pos)
	symbol.line_num.append(linenum_util.find_by_pos(loc_pos))
	results.append(symbol)

	return results

	@staticmethod
	def scan_directory(path, allowed_ext):
	"""Scan a directory recursively for error location usage.

	Args:
	path (str): The path to the directory to scan.
	allowed_ext (Set[str]): Allowed extensions.
	Only scan files with extensions in the Set.

	Returns:
	A list of Symbol, representing the symbols found in the directory.
	"""

	logging.debug('Scanning directory %s', path)
	result = []
	for f in os.scandir(path):
	if f.is_dir():
	result += SourceScanner.scan_directory(f.path, allowed_ext)
	continue
	if (f.is_file() and
	os.path.splitext(f.name.lower())[1] in allowed_ext):
	result += SourceScanner.scan_single_file(f.path)
	return result


	class Verifier:
	"""Verifies the result from scanning.

	This is used to verify that the result from scanner is the correct usage
	for error location, i.e. there are no duplications outside of the allowed
	ones. It also helps to collate the various symbols together.
	"""

	def __init__(self, dup_allowlist):
	"""Constructor for Verifier.

	Args:
	dup_allowlist (Set[str]): a set of symbol representation that is in
	the duplication allowlist. If a symbol is in the allowlist, then
	that string can be used multiple times in the source file.
	"""

	# '_dup_allowlist' is the duplication allowlist, see comment above.
	self._dup_allowlist = dup_allowlist

	def _update_allow_dup_in_symbols(self, symbols):
	"""Update .allow_dup for all symbol in symbols."""

	for sym in symbols:
	if sym.symbol in self._dup_allowlist:
	sym.allow_dup = True

	def collate_and_verify(self, input_symbols):
	"""Collate the list of symbols and check for duplications.

	This function collate the symbols by merging the same symbol in the
	`input_symbols` list, and check to see if there's any duplicate for
	symbols not in the `self._dup_allowlist`.

	Args:
	input_symbols (List[Symbol]): A list of symbols from the codebase.

	Returns:
	Tuple[Dict[str, Symbol], Dict[str, Symbol]]: Tuple of
	collated_symbols and violating_dup. `collated_symbols` is the
	collated symbols after removing duplicates. `violating_dup` is the
	set of symbols that are duplicated and not in the allow list.
	"""

	self._update_allow_dup_in_symbols(input_symbols)
	collated_symbols = {}
	violating_dup = {}
	for sym in input_symbols:
	dup = collated_symbols.get(sym.symbol)
	if dup:
	if sym.allow_dup:
	dup.merge(sym)
	else:
	dup.merge(sym)
	violating_dup[sym.symbol] = dup
	else:
	# No duplicates.
	collated_symbols[sym.symbol] = sym
	return collated_symbols, violating_dup


	class LocationDB:
	"""Database in locations.h

	This class manages the mapping between error location symbol and their
	values in locations.h.
	"""

	GENERATED_START = ('// Start of generated content. '
	'Do NOT modify after this line.')
	GENERATED_END = '// End of generated content.'
	EXISTING_RECORDS_RE = (r'\/\\s([a-zA-Z0-9:/_.= \n]\|\s)\s\\/\s'
	r'\s+([a-zA-Z][a-zA-Z0-9])\s\=\s([0-9]+)\s,')

	def __init__(self, path, dup_allowlist):
	"""Constructor for LocationDB.

	Args:
	path (str): The path to locations.h.
	dup_allowlist (Set[str]): Duplication allowlist, see
	Verifier.__init__'s documentation.
	"""

	# 'path' is the path to locations.h.
	self.path = path

	# '_dup_allowlist' is a set that holds the allowlist of symbols that
	# can be used multiple times in the source tree. See Verifier.__init__
	# for more info.
	self._dup_allowlist = dup_allowlist

	# 'symbols' is a dict that maps the symbol's representation (as a str)
	# to the Symbol object. It is None if we are not loaded yet.
	self.symbols = None

	# 'value_to_symbol' is a dict that maps the symbol's value (the integer
	# value of the enum) to the Symbol object. It is None if we are not
	# loaded yet.
	self.value_to_symbol = None

	# '_lines' holds the content of the locations.h file. It is None if we
	# are not loaded yet.
	self._lines = None

	# '_start_line' is the line number in locations.h at which the enum
	# section starts. It is None if we are not loaded yet.
	# Line number starts from 1.
	self._start_line = None

	# '_end_line' is the line number in locations.h at which the enum
	# section ends. It is None if we are not loaded yet.
	# Line number starts from 1.
	self._end_line = None

	# '_next_value' is the next available enum value in locations.h.
	# It is None if we are not loaded yet.
	self._next_value = None

	def _find_generated_marker(self):
	"""Finds and sets the start and end of generated marker.

	Returns:
	bool: True iff only one pair of generated marker is found.
	"""

	self._start_line = None
	self._end_line = None
	for line_num_index, line in enumerate(self._lines):
	line_num = line_num_index + 1
	if line.strip() == LocationDB.GENERATED_START:
	if self._start_line is not None:
	logging.error(('Multiple generated starting marker at %d'
	' and %d'), self._start_line, line_num)
	return False
	self._start_line = line_num
	if line.strip() == LocationDB.GENERATED_END:
	if self._end_line is not None:
	logging.error(('Multiple generated ending marker at %d '
	'and %d'), self._end_line, line_num)
	return False
	self._end_line = line_num
	if self._start_line is None:
	logging.error('No generated starting marker in locations.h')
	return False
	if self._end_line is None:
	logging.error('No generated ending marker in locations.h')
	return False
	return True

	def _scan_for_existing_records(self, content):
	"""Parse all existing records in 'content'."""
	pat = re.compile(LocationDB.EXISTING_RECORDS_RE)
	self.symbols = {}
	for m in pat.finditer(content):
	s = Symbol(m.group(2))
	s.value = int(m.group(3))
	self.symbols[s.symbol] = s
	return len(self.symbols)

	def _build_reverse_map(self):
	"""Populate self.value_to_symbol."""
	self.value_to_symbol = {}
	for sym in self.symbols:
	value = self.symbols[sym].value
	if value is not None:
	# symbols are guaranteed to be unique in existing locations.h.
	assert value not in self.value_to_symbol
	self.value_to_symbol[value] = sym

	def _get_generated_lines(self):
	assert self._start_line is not None
	assert self._end_line is not None
	return '\n'.join(self._lines[self._start_line:self._end_line-1])

	def load(self):
	"""Load from locations.h.

	This method will load the content of locations.h from `self.path`.

	Returns:
	bool: True if successful.
	"""

	with open(self.path, 'r') as f:
	self._lines = f.readlines()
	if not self._find_generated_marker():
	return False
	self._scan_for_existing_records(self._get_generated_lines())
	self._build_reverse_map()
	return True

	def _generate_header_lines(self):
	symbols_list = [s for s in self.symbols.values()]
	symbols_list.sort(key=operator.attrgetter('value'))
	return [sym.generate_lines() for sym in symbols_list]

	def store(self):
	"""Save the state in this object back into locations.h.

	This method will convert the state in this object into string content
	to be written back into locations.h, then it'll write the result into
	`self.path`.
	"""

	assert self._lines is not None and self.symbols is not None
	result_lines = []
	# Include the portion that is before the generated content.
	result_lines += self._lines[0:self._start_line]
	# Add the generated portion
	result_lines += sum(self._generate_header_lines(), [])
	# Include the portion that is after the generated content.
	result_lines += self._lines[self._end_line-1:]
	with open(self.path, 'w') as f:
	f.write(''.join(result_lines))
	# Invalidate the variables to ensure stale data isn't left behind.
	self.symbols = None
	self._lines = None
	self._start_line = None
	self._end_line = None
	self._next_value = None
	self.value_to_symbol = None
	# Format the result
	subprocess.call(['clang-format', '-i', self.path])

	def update_from_scan_result(self, result):
	"""Update the state of this object.

	This method will update the internal state within this object from
	the Symbols found in `result`.

	Args:
	result (Dict[str, Symbol]): A dict of symbols found in source tree.
	"""

	# Clear relevant fields in the current symbols.
	# The enums start at 100 because we want to reserve the first 100 enum
	# in case there's any special use case.
	self._next_value = 100
	for sym in self.symbols.values():
	sym.line_num = []
	sym.path = []
	sym.index_in_file = []
	sym.allow_dup = sym.symbol in self._dup_allowlist
	if sym.value:
	self._next_value = max(self._next_value, sym.value+1)

	for sym in result.values():
	if sym.symbol in self.symbols:
	self.symbols[sym.symbol].merge(sym)
	else:
	self.symbols[sym.symbol] = sym
	self.symbols[sym.symbol].value = self._next_value
	self._next_value += 1

	self._build_reverse_map()


	class DBTool:
	"""Bridge for various classes above.

	This class is in charge of calling the various classes above and bridge
	their input/outputs to each other.
	"""

	ALLOWED_SRC_EXT = frozenset({'.cc', '.h'})
	SCAN_DENYLIST = frozenset({'./error/location_utils.h'})
	LOCATIONS_H_PATH = './error/locations.h'

	def __init__(self, allowlist_path):
	"""Constructor for DBTool.

	Args:
	allowlist_path (str): The path to the file that stores the content
	of dup_allowlist. Each line is a symbol that is in the allowlist,
	thus each line is a symbol that can appear multiple times in the
	code base.
	"""

	# 'db_path' is the path to locations.h.
	self.db_path = DBTool.LOCATIONS_H_PATH

	# 'allowlist_path' is the path to duplication allowlist configuration.
	# See comment in DBTool.__init__() above.
	self.allowlist_path = allowlist_path

	# '_dup_allowlist' is the duplication allowlist, see comment in
	# Verifier.__init__().
	self._dup_allowlist = set({})
	self._load_dup_allowlist()

	# 'verifier' is an instance of Verifier for verifying symbols.
	self.verifier = Verifier(self._dup_allowlist)

	# 'db' is an instance of LocationDB for loading/storing locations.h.
	self.db = LocationDB(self.db_path, self._dup_allowlist)

	def _load_dup_allowlist(self):
	"""Load self._dup_allowlist from file."""

	with open(self.allowlist_path, 'r') as f:
	lines = f.readlines()
	lines = [line.strip() for line in lines]
	lines = [line for line in lines if len(line) > 0 and line[0] != '#']
	for line in lines:
	self._dup_allowlist.add(line)

	def check_sources(self):
	"""Scan the codebase and check for errors.

	Returns:
	Tuple[bool, Dict[str, Symbol]]: Returns (success, symbols), where
	by success is a True iff the operation is successful and there's no
	error found, and in that case Symbol will be the symbols found in
	the code base.
	"""

	# Scan for all symbols.
	all_symbols = SourceScanner.scan_directory('.', DBTool.ALLOWED_SRC_EXT)
	all_symbols = [r for r in all_symbols
	if r.path[0] not in DBTool.SCAN_DENYLIST]
	collated_symbols, violations = self.verifier.collate_and_verify(
	all_symbols)
	# Notify the user on any violations.
	if len(violations) != 0:
	print('Please remove duplicate usage of error location in code:')
	for s in violations:
	print(violations[s])
	return False, None
	return True, collated_symbols

	def update_location_db(self):
	"""Scan the code base and update locations.h

	Scan the code base for all usage of error symbols, then process them
	to see if there's any error. If there's no error, update locations.h.

	Returns:
	bool: True if successful.
	"""

	if not self._load_full_db():
	return False
	self.db.store()

	return True

	def _load_full_db(self):
	success, symbols = self.check_sources()
	if not success:
	return False

	# Load the content of the locations.h
	self.db.load()
	self.db.update_from_scan_result(symbols)
	return True

	def lookup_symbol(self, value):
	"""Print the usage location for an error ID node.

	Given an error ID node, as in, a symbol, locate where it is used and
	print it out.

	Args:
	value (str): The symbol.
	"""

	self._load_full_db()
	if value not in self.db.value_to_symbol:
	print('Value %s not found' % value)
	return False
	symbol = self.db.symbols[self.db.value_to_symbol[value]]
	print('Value %s is %s and can be found at:' % (symbol.value,
	symbol.symbol))
	for path, line in zip(symbol.path, symbol.line_num):
	print('%s:%d' % (path, line))
	return True

	def decode_stack(self, locs):
	"""Print the stack for an error ID.

	Given an error ID (dash-separated symbols), decode the symbols and
	print out their location in the code base.

	Args:
	locs (str): A dash-separated symbols string.
	"""

	self._load_full_db()

	stack = [int(x) for x in locs.split('-')]
	for val in stack:
	if val not in self.db.value_to_symbol:
	print('Value %s not found' % val)
	else:
	symbol = self.db.symbols[self.db.value_to_symbol[val]]
	print('%s' % (symbol,))


	class DBToolCommandLine:
	"""This class handles the command line operations for the tool."""

	def __init__(self):
	"""Constructor for DBToolCommandLine."""

	# 'parser' is an ArgumentParser instance for parsing command line
	# arguments.
	self.parser = None

	# 'args' is the arguments parsed by self.parser.
	self.args = None

	# 'db_tool' is an instance of DBTool for carrying out the operations
	# specified in arguments.
	self.db_tool = None

	# 'allowlist_path' is the path to the duplication allowlist
	# configuration.
	self.allowlist_path = None

	def _setup_logging(self):
	logging.basicConfig(level=logging.INFO)

	def _parse_args(self):
	self.parser = argparse.ArgumentParser(description=
	'Tool for handling error '
	'location in locations.h')
	self.parser.add_argument('--update',
	help=('Scan the source directory'
	' and update the locations.h db'),
	action='store_true')
	self.parser.add_argument('--check',
	help=('Scan the source directory and check '
	'that cryptohome error is used '
	'correctly.'),
	action='store_true')
	self.parser.add_argument('--lookup',
	help='Lookup a single error location code',
	default=None)
	self.parser.add_argument('--decode',
	help=('Decode a stack of error location, ex.'
	'42-7-15'),
	default=None)
	self.parser.add_argument('--src',
	help=('The cryptohome source '
	'directory'), default=None)
	self.args = self.parser.parse_args()

	def _goto_srcdir(self):
	assert self.args is not None
	srcdir = self.args.src
	if srcdir is None:
	srcdir = os.path.join(os.path.dirname(__file__), '..', '..')
	srcdir = os.path.abspath(srcdir)
	logging.info('Using cryptohome source at: %s', srcdir)
	os.chdir(srcdir)

	def _get_dup_allowlist_path(self):
	path = os.path.join(os.path.dirname(__file__), 'dup_allowlist.txt')
	path = os.path.abspath(path)
	return path

	def main(self):
	"""The main function for this command line tool.

	Returns:
	int: The exit code.
	"""
	self._parse_args()
	self._setup_logging()
	self.allowlist_path = self._get_dup_allowlist_path()
	self._goto_srcdir()
	self.db_tool = DBTool(self.allowlist_path)
	if self.args.update:
	self.db_tool.update_location_db()
	elif self.args.check:
	result, _ = self.db_tool.check_sources()
	if not result:
	return 1
	elif self.args.lookup is not None:
	self.db_tool.lookup_symbol(int(self.args.lookup))
	elif self.args.decode is not None:
	self.db_tool.decode_stack(self.args.decode)
	else:
	logging.error('No action specified, please see --help')
	return 1
	return 0


	# Invoke the main function for the tool.
	if __name__ == '__main__':
	cmdline = DBToolCommandLine()
	return_value = cmdline.main()
	sys.exit(return_value)