INSTALL_MASK: index patterns anchored with leading slash (bug 675826)

For scalability, use a tree structure to index patterns that are
anchored with a leading slash.

Patterns anchored with leading slash are indexed by leading non-glob
components, making it possible to minimize the number of fnmatch
calls. For example:

  /foo*/bar -> {'.': ['/foo*/bar']}

  /foo/bar* -> {'foo': {'.': ['/foo/bar*']}}

  /foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}}

Bug: https://bugs.gentoo.org/675826
Signed-off-by: Zac Medico <zmedico@gentoo.org>
(cherry picked from commit eb2674c1c2d84b2c9e9923e1c1666cb7a596c36c)

BUG=chromium:969797
TEST=ran `build_image --board=betty base test`

Change-Id: Icf0ec621a3f68b2502d03d932bf7077d7ffd7464
Reviewed-on: https://chromium-review.googlesource.com/1707372
Tested-by: Will Bradley <wbbradley@chromium.org>
Commit-Ready: Mike Frysinger <vapier@chromium.org>
Legacy-Commit-Queue: Commit Bot <commit-bot@chromium.org>
Reviewed-by: Chris McDonald <cjmcdonald@chromium.org>
Reviewed-by: Mike Frysinger <vapier@chromium.org>
Reviewed-by: Allen Webb <allenwebb@google.com>
Reviewed-by: Alex Klein <saklein@chromium.org>
diff --git a/lib/portage/tests/util/test_install_mask.py b/lib/portage/tests/util/test_install_mask.py
index f651eb4..6a29db7 100644
--- a/lib/portage/tests/util/test_install_mask.py
+++ b/lib/portage/tests/util/test_install_mask.py
@@ -119,6 +119,42 @@
 					),
 				)
 			),
+			(
+				'/usr/share/locale '
+				'-/usr/share/locale/en* '
+				'-/usr/share/locale/kf5_all_languages '
+				'-/usr/share/locale/locale.alias',
+				(
+					(
+						'usr/share/locale/en',
+						False,
+					),
+					(
+						'usr/share/locale/en_GB',
+						False,
+					),
+					(
+						'usr/share/locale/en/kf5_all_languages',
+						False,
+					),
+					(
+						'usr/share/locale/locale.alias',
+						False,
+					),
+					(
+						'usr/share/locale/es',
+						True,
+					),
+					(
+						'usr/share/locale/fr',
+						True,
+					),
+					(
+						'usr/share/locale',
+						True,
+					),
+				)
+			),
 		)
 
 		for install_mask_str, paths in cases:
diff --git a/lib/portage/util/install_mask.py b/lib/portage/util/install_mask.py
index 32627eb..a8c0cbd 100644
--- a/lib/portage/util/install_mask.py
+++ b/lib/portage/util/install_mask.py
@@ -3,8 +3,11 @@
 
 __all__ = ['install_mask_dir', 'InstallMask']
 
+import collections
 import errno
 import fnmatch
+import functools
+import operator
 import sys
 
 from portage import os, _unicode_decode
@@ -18,13 +21,82 @@
 	_unicode = unicode
 
 
+def _defaultdict_tree():
+	return collections.defaultdict(_defaultdict_tree)
+
+
+_pattern = collections.namedtuple('_pattern', (
+	'orig_index',
+	'is_inclusive',
+	'pattern',
+	'leading_slash',
+))
+
+
 class InstallMask(object):
 	def __init__(self, install_mask):
 		"""
 		@param install_mask: INSTALL_MASK value
 		@type install_mask: str
 		"""
-		self._install_mask = install_mask.split()
+		# Patterns not anchored with leading slash
+		self._unanchored = []
+
+		# Patterns anchored with leading slash are indexed by leading
+		# non-glob components, making it possible to minimize the
+		# number of fnmatch calls. For example:
+		# /foo*/bar -> {'.': ['/foo*/bar']}
+		# /foo/bar* -> {'foo': {'.': ['/foo/bar*']}}
+		# /foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}}
+		self._anchored = _defaultdict_tree()
+		for orig_index, pattern in enumerate(install_mask.split()):
+			# if pattern starts with -, possibly exclude this path
+			is_inclusive = not pattern.startswith('-')
+			if not is_inclusive:
+				pattern = pattern[1:]
+			pattern_obj = _pattern(orig_index, is_inclusive, pattern, pattern.startswith('/'))
+			# absolute path pattern
+			if pattern_obj.leading_slash:
+				current_dir = self._anchored
+				for component in list(filter(None, pattern.split('/'))):
+					if '*' in component:
+						break
+					else:
+						current_dir = current_dir[component]
+				current_dir.setdefault('.', []).append(pattern_obj)
+
+			# filename
+			else:
+				self._unanchored.append(pattern_obj)
+
+	def _iter_relevant_patterns(self, path):
+		"""
+		Iterate over patterns that may be relevant for the given path.
+
+		Patterns anchored with leading / are indexed by leading
+		non-glob components, making it possible to minimize the
+		number of fnmatch calls.
+		"""
+		current_dir = self._anchored
+		components = list(filter(None, path.split('/')))
+		patterns = []
+		patterns.extend(current_dir.get('.', []))
+		for component in components:
+			next_dir = current_dir.get(component, None)
+			if next_dir is None:
+				break
+			current_dir = next_dir
+			patterns.extend(current_dir.get('.', []))
+
+		if patterns:
+			# Sort by original pattern index, since order matters for
+			# non-inclusive patterns.
+			patterns.extend(self._unanchored)
+			if any(not pattern.is_inclusive for pattern in patterns):
+				patterns.sort(key=operator.attrgetter('orig_index'))
+			return iter(patterns)
+
+		return iter(self._unanchored)
 
 	def match(self, path):
 		"""
@@ -34,13 +106,11 @@
 		@return: True if path matches INSTALL_MASK, False otherwise
 		"""
 		ret = False
-		for pattern in self._install_mask:
-			# if pattern starts with -, possibly exclude this path
-			is_inclusive = not pattern.startswith('-')
-			if not is_inclusive:
-				pattern = pattern[1:]
+
+		for pattern_obj in self._iter_relevant_patterns(path):
+			is_inclusive, pattern = pattern_obj.is_inclusive, pattern_obj.pattern
 			# absolute path pattern
-			if pattern.startswith('/'):
+			if pattern_obj.leading_slash:
 				# handle trailing slash for explicit directory match
 				if path.endswith('/'):
 					pattern = pattern.rstrip('/') + '/'