biod/study/analysis-tool/bootstrap.py - third_party/platform2 - Git at Google

 #!/usr/bin/env python3
 # Copyright 2022 The ChromiumOS Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """

 References
 ----------
 .. [0] https://github.com/scipy/scipy/blob/main/scipy/stats/_resampling.py
 .. [1] B. Efron and R. J. Tibshirani, An Introduction to the Bootstrap,
     Chapman & Hall/CRC, Boca Raton, FL, USA (1993)
 .. [2] Nathaniel E. Helwig, "Bootstrap Confidence Intervals",
     http://users.stat.umn.edu/~helwig/notes/bootci-Notes.pdf
 .. [3] Bootstrapping (statistics), Wikipedia,
     https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29
 """

 # This allows us to use the class name as type identifiers inside the
 # class itself.
 # https://github.com/google/styleguide/blob/gh-pages/pyguide.md#3193-forward-declarations
 from __future__ import annotations

 import multiprocessing as mp
 import time
 from typing import Any, Callable, ClassVar, Iterable, Optional

 from experiment import Experiment
 import fpsutils
 import numpy as np
 import numpy.typing as npt


 # There are many issues and performance bottlenecks when using
 # multiprocessing. Checkout some of the articles on
 # https://thelaziestprogrammer.com/python/a-multiprocessing-pool-pickle .


 class BootstrapResults:
     def __init__(self, bootstrap_samples: list[int]) -> None:
         self._samples = bootstrap_samples

     def samples(self) -> list[int]:
         return self._samples

     def num_samples(self) -> int:
         return len(self._samples)

     def confidence_interval(
         self, confidence_percent: int = 95
     ) -> tuple[int, int]:
         """Calculate confidence interval as documented in ISO 2006.

         This simply uses percentiles.
         """
         # The output distributions seem to still be right-skewed.
         # We should probably use BCa for FAR and FRR.
         #
         # https://en.wikipedia.org/wiki/Skewness
         # https://en.wikipedia.org/wiki/Bootstrapping_(statistics)
         # * Bias-corrected bootstrap – adjusts for bias in the bootstrap
         #   distribution.
         # * Accelerated bootstrap – The bias-corrected and accelerated (BCa)
         #   bootstrap, by Efron (1987),[14] adjusts for both bias and skewness
         #   in the bootstrap distribution. This approach is accurate in a wide
         #   variety of settings, has reasonable computation requirements, and
         #   produces reasonably narrow intervals.[14]

         ci_percent_lower = (100 - confidence_percent) / 2
         ci_percent_upper = 100 - ci_percent_lower

         # 95% C.I.
         boot_limits = np.percentile(
             self._samples, [ci_percent_lower, ci_percent_upper]
         )
         return boot_limits


 class Bootstrap:
     """The base class for implementing a bootstrap sample over an Experiment."""

     USE_GLOBAL_SHARING = False
     """Enables the use of sharing process data by global inheritance.

     Enable this when the serialization of the `self` data has become too much
     of a parallelization bottleneck. You can see this IO overhead when running
     the sample. Looking at the CPU monitor, if CPU loads are not being fully
     utilized, this could mean that they are blocked on IO. Try enabling this
     flag.
     """

     __data: ClassVar[Bootstrap]
     __rng: ClassVar[np.random.Generator]

     def __init__(self, exp: Experiment, verbose: bool = False) -> None:
         self._verbose = verbose
         if self._verbose:
             print("# Initializing Runtime and Caches.")
         self._time_init_start = time.time()
         self._init(exp)
         self._time_init_end = time.time()

         self._time_rng_start = 0.0
         self._time_rng_end = 0.0
         self._time_pool_startup_start = 0.0
         self._time_pool_startup_end = 0.0
         self._time_samples_start = 0.0
         self._time_samples_end = 0.0

     def _init(self, exp: Experiment) -> None:
         """Initialize runtime caches.

         Implement this.

         This will be run once before any `_sample` invocations occur.
         Save the absolute least amount of data in `self` as possible, since
         this data must be copied to each worker process in certain runtime
         configurations.

         Do not save a copy of `exp` itself, since tables may be modified upon
         accessing them. This can cause unnecessary overhead for each worker
         thread that could have simply be done once in this init function.
         """
         return

     def _sample(self, rng: np.random.Generator) -> int:
         """Complete a single bootstrap sample using.

         Implement this.

         Args:
             rng: The initialized random number generator to use for the sample.

         Return:
             Returns that count of values being observed in the sample.
         """
         return 0

     @classmethod
     def _process_global_init(cls, data: Bootstrap):
         cls.__rng = np.random.default_rng()
         cls.__data = data

     @classmethod
     def _process_global_sample(cls, sample_id: int) -> int:
         return cls.__data._sample(cls.__rng)

     def __run_single_process(
         self,
         num_samples: int,
         progress: Callable[[Iterable[Any], int], Iterable[Any]],
     ) -> list[int]:
         boot_counts = list()

         if self._verbose:
             print("# Initializing RNG.")
         self._time_rng_start = time.time()
         rng = np.random.default_rng()
         self._time_rng_end = time.time()

         if self._verbose:
             print("# Starting Bootstrap Samples.")
         self._time_samples_start = time.time()
         for _ in progress(range(num_samples), num_samples):
             boot_counts.append(self._sample(rng))
         self._time_samples_end = time.time()

         return boot_counts

     def __run_multi_process(
         self,
         num_samples: int,
         num_proc: Optional[int],
         progress: Callable[[Iterable[Any], int], Iterable[Any]],
     ) -> list[int]:
         # All processes must have their own unique seed for their pseudo random
         # number generator, otherwise they will all generate the same random values.
         #
         # For reproducibility and testing, you can use np.random.SeedSequence.spawn
         # or [np.random.default_rng(i) for i in range(BOOTSTRAP_SAMPLES)]
         # See https://numpy.org/doc/stable/reference/random/parallel.html .
         #
         # We could possible generate fewer random number generators, since we only
         # need one per process. This might be achievable by using the Pool
         # constructor's initializer function to add a process local global
         # rng variable. For now, it is just safer to generate BOOTSTRAP_SAMPLES
         # random number generators. Do note that you can't really parallelize
         # the rng initialization, since the CPU doesn't produce enough entropy
         # to satisfy threads concurrently. They end up just being blocked
         # on IO.

         if num_proc == 0:
             num_proc = mp.cpu_count()

         if self._verbose:
             print(
                 f"# Dispatching {num_samples} bootstrap samples over "
                 f"{num_proc} processes."
             )

         if self.USE_GLOBAL_SHARING:
             # It is my understanding, from experimentation and documentation,
             # that any normal object passed into the pool.map call will be
             # serialized and sent to the worker processes. If the objects being
             # sent are large, this serialization/deserialization can take some
             # time and can be error prone if there are circular references.
             #
             # The worker processes will inherit [without serialization] global
             # and local context for executing the `initializer` with `initargs`.
             # We maintain the global context, but loose local references when
             # executing subsequent pool executions operations.
             #
             # There are probably other performant mechanisms for sharing data
             # efficiently on the same machine, but one of the fastest mechanisms
             # I have experienced is exploiting the process initializer's lack of
             # serialization to save the local object reference in each
             # processes' global state.
             # Although all permutations of this mechanism are subtle hack,
             # using a classmethod to silo the global data to the class itself
             # adds a small amount of scoping. Furthermore, this isn't
             # necessarily a scope collision with other purposes, since the
             # global scope is only clobbered within each pool process, not
             # the parent process. the assumption is that you much reinitialize
             # a new pool for each Bootsrap sampling.
             self._time_pool_startup_start = time.time()
             with mp.Pool(
                 initializer=self._process_global_init,
                 initargs=(self,),
                 processes=num_proc,
             ) as pool:
                 self._time_pool_startup_end = time.time()

                 if self._verbose:
                     print("# Blastoff.")
                 self._time_samples_start = time.time()
                 boot_counts = pool.map(
                     self._process_global_sample,
                     progress(range(num_samples), num_samples),
                 )
                 self._time_samples_end = time.time()

                 return boot_counts
         else:
             self._time_pool_startup_start = time.time()
             with mp.Pool(processes=num_proc) as pool:
                 self._time_pool_startup_end = time.time()

                 if self._verbose:
                     print("# Initializing RNGs.")
                 self._time_rng_start = time.time()
                 rngs = progress(
                     (np.random.default_rng(i) for i in range(num_samples)),
                     num_samples,
                 )
                 self._time_rng_end = time.time()

                 if self._verbose:
                     print("# Blastoff.")
                 self._time_samples_start = time.time()
                 boot_counts = pool.map(self._sample, rngs)
                 self._time_samples_end = time.time()

                 return boot_counts

     def run(
         self,
         num_samples: int = 5000,
         num_proc: Optional[int] = 0,
         progress: Callable[
             [Iterable[Any], int], Iterable[Any]
         ] = lambda it, total: iter(it),
     ) -> BootstrapResults:
         """Run all bootstrap samples.

         Args:
             num_samples: Number of bootstrap samples should be acquired.
                          1000 samples for 95% CI
                          5000 samples for 99% CI
             num_proc: Number of simultaneous processes to use.
                       (None = Sequential | 0 = Max Processors | # = Specific #)
             progress: An interable function that can track the samples progress.
         """

         if num_proc is None:
             boot_counts = self.__run_single_process(num_samples, progress)
         else:
             boot_counts = self.__run_multi_process(
                 num_samples, num_proc, progress
             )

         if self._verbose:
             self.print_timing()

         return BootstrapResults(boot_counts)

     def print_timing(self):
         """Print the saved timing values."""

         delta_init = self._time_init_end - self._time_init_start
         delta_rng = self._time_rng_end - self._time_rng_start
         delta_pool_startup = (
             self._time_pool_startup_end - self._time_pool_startup_start
         )
         delta_samples = self._time_samples_end - self._time_samples_start

         total = delta_init + delta_rng + delta_pool_startup + delta_samples

         print(f"Cache init took {delta_init:.6f}s.")
         print(f"RNG init took {delta_rng:.6f}s.")
         print(
             f"Process pool setup took {delta_pool_startup:.6f}s"
             f'{self.USE_GLOBAL_SHARING and " (includes RNG init)" or ""}.'
         )
         print(f"Bootstrap sampling took {delta_samples:.6f}s.")
         print("-----------------------------------------")
         print(f"Total combined runtime was {total:.6f}s.")


 class BootstrapFARFlat(Bootstrap):
     # For the 100000 sample case, enabling this reduces overall time by
     # 3 seconds.
     USE_GLOBAL_SHARING = True

     def _init(self, exp: Experiment) -> None:
         far = exp.far_decisions()
         assert fpsutils.has_columns(far, [Experiment.TableCol.Decision])
         self.far_list: npt.NDArray[np.bool_] = np.array(
             far[Experiment.TableCol.Decision.value]
             == Experiment.Decision.Accept.value,
             dtype=bool,
         )

     def _sample(self, rng: np.random.Generator) -> int:
         """Complete a single flat bootstrap sample"""

         sample = fpsutils.boot_sample(self.far_list, rng=rng)
         return np.sum(sample)


 class BootstrapFullFARHierarchy(Bootstrap):
     # For the 100000 sample case, enabling this reduces overall time by
     # 3 seconds.
     USE_GLOBAL_SHARING = True

     def _init(self, exp: Experiment) -> None:
         fa_table = exp.fa_table()
         # The accepted tuples for fa_set and fa_trie.
         fa_set_tuple = [
             Experiment.TableCol.Verify_User.value,
             Experiment.TableCol.Enroll_User.value,
             Experiment.TableCol.Verify_Finger.value,
             Experiment.TableCol.Enroll_Finger.value,
             Experiment.TableCol.Verify_Sample.value,
         ]
         assert fpsutils.has_columns(fa_table, fa_set_tuple)
         self.fa_set = fpsutils.DataFrameSetAccess(fa_table, fa_set_tuple)
         self.fa_trie = fpsutils.DataFrameCountTrieAccess(fa_table, fa_set_tuple)

         self.users_list = exp.user_list()
         self.fingers_list = exp.finger_list()
         self.samples_list = exp.sample_list()

         # If you add exp, fa_table, and far_decisions to the self object,
         # it becomes no longer runtime feasible to use USE_GLOBAL_SHARING=False.
         # For some reason, the total runtime grows with respect to process
         # count.

     def _sample(self, rng: np.random.Generator) -> int:
         """Complete a single bootstrap sample using full hierarchy bootstrap method.

         The important parts of this approach are the following:
         * Avoiding DataFrame queries during runtime. We use fa_set cache instead.
         This is the difference between hundreds of _us_ vs. hundreds of _ns_
         per query
         * Using rng.choice (now boot_sample) on either a scalar, np.array, or list
         is important. Other methods are very slow.
         * The big finale that gets the single bootstrap sample time down from
         tens of _s_ to hundreds of _ms_ is checking for the loop abort path
         using the fa_trie.

         Additional Performance:
         * Use boot_sample_range instead os boot_sample.
         * Flatten loops.
         """

         user_list = self.users_list
         fingers_list = self.fingers_list
         samples_list = self.samples_list

         fa_set = self.fa_set
         fa_trie = self.fa_trie

         sample = []
         # 72 users
         for v in fpsutils.boot_sample(user_list, rng=rng):
             if not fa_trie.isin((v,)):
                 continue
             # 72 other template users (same user with different finger is allowed)
             for t in fpsutils.boot_sample(user_list, rng=rng):
                 if not fa_trie.isin((v, t)):
                     continue
                 # 6 fingers
                 for fv in fpsutils.boot_sample(fingers_list, rng=rng):
                     if not fa_trie.isin((v, t, fv)):
                         continue

                     # FIXME: This is a hack to try to fix the possible
                     # sample pool issue described below.
                     # This is less bad, since there are only 6 fingers.
                     # mod_finger_list = np.array([0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5])
                     #
                     # It should be known that having this fix here or not
                     # having this fix here does not change the output
                     # percentiles + mean. It also doesn't seem to change the
                     # histogram skew.
                     mod_finger_list = fingers_list
                     # if v == t:
                     #     mod_finger_list = np.array(np.where(~(mod_finger_list == fv)))

                     for ft in fpsutils.boot_sample(mod_finger_list, rng=rng):
                         # FIXME: See below conversation.
                         # FIXME: We could have this boot_sample keep running
                         # until it has seen len(fingers_list) or
                         # len(finger_list)-1 legitimate fingers.
                         if not fa_trie.isin((v, t, fv, ft)):
                             continue
                         # 60 verification samples
                         for a in fpsutils.boot_sample(samples_list, rng=rng):
                             # We don't avoid the invalid matching case,
                             # (v == t) and (fv == ft), since it is slower than
                             # simply querying for the results.
                             # We actually don't care about omitting these
                             # cases because it will always receive a 0 from the
                             # query.
                             # As long as we always use the sum operation to
                             # aggregate all subsample data, no problems should
                             # arise.
                             #
                             # TODO: Reverify that allowing the selection of
                             # an invalid match doesn't skew the probabilities,
                             # since you could end up with a sample of all
                             # invalid fingers (given replace=True).
                             # On second thought, this seems like it could be
                             # slightly wrong, but I don't recall why I thought
                             # this should work out to be the same as selecting
                             # always proper matches.
                             #
                             # I guess you only need to reduce the number of
                             # fingers in the selection pool when the users are
                             # the same.
                             #
                             # I think the original logic was that, if we aren't
                             # keeping track of the true accepts, then
                             # why should it matter if there are more
                             # (invalid true-accepts), but I think this is
                             # wrong because it reduces the chance
                             # of getting legitimate false accepts.
                             query = (v, t, fv, ft, a)
                             sample.append(fa_set.isin(query))
                             # pass
         return np.sum(sample)


 class BootstrapFullFRRHierarchy(Bootstrap):
     # For the 100000 sample case, enabling this reduces overall time by
     # 3 seconds.
     USE_GLOBAL_SHARING = True

     def _init(self, exp: Experiment) -> None:
         fr_table = exp.fr_table()
         # The accepted tuples for fa_set and fa_trie.
         fr_set_tuple = [
             Experiment.TableCol.Enroll_User.value,
             Experiment.TableCol.Enroll_Finger.value,
             Experiment.TableCol.Verify_Sample.value,
         ]
         assert fpsutils.has_columns(fr_table, fr_set_tuple)
         self.fa_set = fpsutils.DataFrameSetAccess(fr_table, fr_set_tuple)
         self.fa_trie = fpsutils.DataFrameCountTrieAccess(fr_table, fr_set_tuple)

         self.users_list = exp.user_list()
         self.fingers_list = exp.finger_list()
         self.samples_list = exp.sample_list()

         # If you add exp, fa_table, and far_decisions to the self object,
         # it becomes no longer runtime feasible to use USE_GLOBAL_SHARING=False.
         # For some reason, the total runtime grows with respect to process
         # count.

     def _sample(self, rng: np.random.Generator) -> int:
         """Complete a single bootstrap sample using full hierarchy bootstrap method.

         The important parts of this approach are the following:
         * Avoiding DataFrame queries during runtime. We use fa_set cache instead.
         This is the difference between hundreds of _us_ vs. hundreds of _ns_
         per query
         * Using rng.choice (now boot_sample) on either a scalar, np.array, or list
         is important. Other methods are very slow.
         * The big finale that gets the single bootstrap sample time down from
         tens of _s_ to hundreds of _ms_ is checking for the loop abort path
         using the fa_trie.

         Additional Performance:
         * Use boot_sample_range instead os boot_sample.
         * Flatten loops.
         """

         user_list = self.users_list
         fingers_list = self.fingers_list
         samples_list = self.samples_list

         fa_set = self.fa_set
         fa_trie = self.fa_trie

         sample = []
         # 72 users
         for u in fpsutils.boot_sample(user_list, rng=rng):
             if not fa_trie.isin((u,)):
                 continue
             # 6 fingers
             for f in fpsutils.boot_sample(fingers_list, rng=rng):
                 if not fa_trie.isin((u, f)):
                     continue
                 # 60 verification samples
                 for s in fpsutils.boot_sample(samples_list, rng=rng):
                     query = (u, f, s)
                     sample.append(fa_set.isin(query))
         return np.sum(sample)
	#!/usr/bin/env python3
	# Copyright 2022 The ChromiumOS Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""

	References
	----------
	.. [0] https://github.com/scipy/scipy/blob/main/scipy/stats/_resampling.py
	.. [1] B. Efron and R. J. Tibshirani, An Introduction to the Bootstrap,
	Chapman & Hall/CRC, Boca Raton, FL, USA (1993)
	.. [2] Nathaniel E. Helwig, "Bootstrap Confidence Intervals",
	http://users.stat.umn.edu/~helwig/notes/bootci-Notes.pdf
	.. [3] Bootstrapping (statistics), Wikipedia,
	https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29
	"""

	# This allows us to use the class name as type identifiers inside the
	# class itself.
	# https://github.com/google/styleguide/blob/gh-pages/pyguide.md#3193-forward-declarations
	from __future__ import annotations

	import multiprocessing as mp
	import time
	from typing import Any, Callable, ClassVar, Iterable, Optional

	from experiment import Experiment
	import fpsutils
	import numpy as np
	import numpy.typing as npt


	# There are many issues and performance bottlenecks when using
	# multiprocessing. Checkout some of the articles on
	# https://thelaziestprogrammer.com/python/a-multiprocessing-pool-pickle .


	class BootstrapResults:
	def __init__(self, bootstrap_samples: list[int]) -> None:
	self._samples = bootstrap_samples

	def samples(self) -> list[int]:
	return self._samples

	def num_samples(self) -> int:
	return len(self._samples)

	def confidence_interval(
	self, confidence_percent: int = 95
	) -> tuple[int, int]:
	"""Calculate confidence interval as documented in ISO 2006.

	This simply uses percentiles.
	"""
	# The output distributions seem to still be right-skewed.
	# We should probably use BCa for FAR and FRR.
	#
	# https://en.wikipedia.org/wiki/Skewness
	# https://en.wikipedia.org/wiki/Bootstrapping_(statistics)
	# * Bias-corrected bootstrap – adjusts for bias in the bootstrap
	# distribution.
	# * Accelerated bootstrap – The bias-corrected and accelerated (BCa)
	# bootstrap, by Efron (1987),[14] adjusts for both bias and skewness
	# in the bootstrap distribution. This approach is accurate in a wide
	# variety of settings, has reasonable computation requirements, and
	# produces reasonably narrow intervals.[14]

	ci_percent_lower = (100 - confidence_percent) / 2
	ci_percent_upper = 100 - ci_percent_lower

	# 95% C.I.
	boot_limits = np.percentile(
	self._samples, [ci_percent_lower, ci_percent_upper]
	)
	return boot_limits


	class Bootstrap:
	"""The base class for implementing a bootstrap sample over an Experiment."""

	USE_GLOBAL_SHARING = False
	"""Enables the use of sharing process data by global inheritance.

	Enable this when the serialization of the `self` data has become too much
	of a parallelization bottleneck. You can see this IO overhead when running
	the sample. Looking at the CPU monitor, if CPU loads are not being fully
	utilized, this could mean that they are blocked on IO. Try enabling this
	flag.
	"""

	__data: ClassVar[Bootstrap]
	__rng: ClassVar[np.random.Generator]

	def __init__(self, exp: Experiment, verbose: bool = False) -> None:
	self._verbose = verbose
	if self._verbose:
	print("# Initializing Runtime and Caches.")
	self._time_init_start = time.time()
	self._init(exp)
	self._time_init_end = time.time()

	self._time_rng_start = 0.0
	self._time_rng_end = 0.0
	self._time_pool_startup_start = 0.0
	self._time_pool_startup_end = 0.0
	self._time_samples_start = 0.0
	self._time_samples_end = 0.0

	def _init(self, exp: Experiment) -> None:
	"""Initialize runtime caches.

	Implement this.

	This will be run once before any `_sample` invocations occur.
	Save the absolute least amount of data in `self` as possible, since
	this data must be copied to each worker process in certain runtime
	configurations.

	Do not save a copy of `exp` itself, since tables may be modified upon
	accessing them. This can cause unnecessary overhead for each worker
	thread that could have simply be done once in this init function.
	"""
	return

	def _sample(self, rng: np.random.Generator) -> int:
	"""Complete a single bootstrap sample using.

	Implement this.

	Args:
	rng: The initialized random number generator to use for the sample.

	Return:
	Returns that count of values being observed in the sample.
	"""
	return 0

	@classmethod
	def _process_global_init(cls, data: Bootstrap):
	cls.__rng = np.random.default_rng()
	cls.__data = data

	@classmethod
	def _process_global_sample(cls, sample_id: int) -> int:
	return cls.__data._sample(cls.__rng)

	def __run_single_process(
	self,
	num_samples: int,
	progress: Callable[[Iterable[Any], int], Iterable[Any]],
	) -> list[int]:
	boot_counts = list()

	if self._verbose:
	print("# Initializing RNG.")
	self._time_rng_start = time.time()
	rng = np.random.default_rng()
	self._time_rng_end = time.time()

	if self._verbose:
	print("# Starting Bootstrap Samples.")
	self._time_samples_start = time.time()
	for _ in progress(range(num_samples), num_samples):
	boot_counts.append(self._sample(rng))
	self._time_samples_end = time.time()

	return boot_counts

	def __run_multi_process(
	self,
	num_samples: int,
	num_proc: Optional[int],
	progress: Callable[[Iterable[Any], int], Iterable[Any]],
	) -> list[int]:
	# All processes must have their own unique seed for their pseudo random
	# number generator, otherwise they will all generate the same random values.
	#
	# For reproducibility and testing, you can use np.random.SeedSequence.spawn
	# or [np.random.default_rng(i) for i in range(BOOTSTRAP_SAMPLES)]
	# See https://numpy.org/doc/stable/reference/random/parallel.html .
	#
	# We could possible generate fewer random number generators, since we only
	# need one per process. This might be achievable by using the Pool
	# constructor's initializer function to add a process local global
	# rng variable. For now, it is just safer to generate BOOTSTRAP_SAMPLES
	# random number generators. Do note that you can't really parallelize
	# the rng initialization, since the CPU doesn't produce enough entropy
	# to satisfy threads concurrently. They end up just being blocked
	# on IO.

	if num_proc == 0:
	num_proc = mp.cpu_count()

	if self._verbose:
	print(
	f"# Dispatching {num_samples} bootstrap samples over "
	f"{num_proc} processes."
	)

	if self.USE_GLOBAL_SHARING:
	# It is my understanding, from experimentation and documentation,
	# that any normal object passed into the pool.map call will be
	# serialized and sent to the worker processes. If the objects being
	# sent are large, this serialization/deserialization can take some
	# time and can be error prone if there are circular references.
	#
	# The worker processes will inherit [without serialization] global
	# and local context for executing the `initializer` with `initargs`.
	# We maintain the global context, but loose local references when
	# executing subsequent pool executions operations.
	#
	# There are probably other performant mechanisms for sharing data
	# efficiently on the same machine, but one of the fastest mechanisms
	# I have experienced is exploiting the process initializer's lack of
	# serialization to save the local object reference in each
	# processes' global state.
	# Although all permutations of this mechanism are subtle hack,
	# using a classmethod to silo the global data to the class itself
	# adds a small amount of scoping. Furthermore, this isn't
	# necessarily a scope collision with other purposes, since the
	# global scope is only clobbered within each pool process, not
	# the parent process. the assumption is that you much reinitialize
	# a new pool for each Bootsrap sampling.
	self._time_pool_startup_start = time.time()
	with mp.Pool(
	initializer=self._process_global_init,
	initargs=(self,),
	processes=num_proc,
	) as pool:
	self._time_pool_startup_end = time.time()

	if self._verbose:
	print("# Blastoff.")
	self._time_samples_start = time.time()
	boot_counts = pool.map(
	self._process_global_sample,
	progress(range(num_samples), num_samples),
	)
	self._time_samples_end = time.time()

	return boot_counts
	else:
	self._time_pool_startup_start = time.time()
	with mp.Pool(processes=num_proc) as pool:
	self._time_pool_startup_end = time.time()

	if self._verbose:
	print("# Initializing RNGs.")
	self._time_rng_start = time.time()
	rngs = progress(
	(np.random.default_rng(i) for i in range(num_samples)),
	num_samples,
	)
	self._time_rng_end = time.time()

	if self._verbose:
	print("# Blastoff.")
	self._time_samples_start = time.time()
	boot_counts = pool.map(self._sample, rngs)
	self._time_samples_end = time.time()

	return boot_counts

	def run(
	self,
	num_samples: int = 5000,
	num_proc: Optional[int] = 0,
	progress: Callable[
	[Iterable[Any], int], Iterable[Any]
	] = lambda it, total: iter(it),
	) -> BootstrapResults:
	"""Run all bootstrap samples.

	Args:
	num_samples: Number of bootstrap samples should be acquired.
	1000 samples for 95% CI
	5000 samples for 99% CI
	num_proc: Number of simultaneous processes to use.
	(None = Sequential \| 0 = Max Processors \| # = Specific #)
	progress: An interable function that can track the samples progress.
	"""

	if num_proc is None:
	boot_counts = self.__run_single_process(num_samples, progress)
	else:
	boot_counts = self.__run_multi_process(
	num_samples, num_proc, progress
	)

	if self._verbose:
	self.print_timing()

	return BootstrapResults(boot_counts)

	def print_timing(self):
	"""Print the saved timing values."""

	delta_init = self._time_init_end - self._time_init_start
	delta_rng = self._time_rng_end - self._time_rng_start
	delta_pool_startup = (
	self._time_pool_startup_end - self._time_pool_startup_start
	)
	delta_samples = self._time_samples_end - self._time_samples_start

	total = delta_init + delta_rng + delta_pool_startup + delta_samples

	print(f"Cache init took {delta_init:.6f}s.")
	print(f"RNG init took {delta_rng:.6f}s.")
	print(
	f"Process pool setup took {delta_pool_startup:.6f}s"
	f'{self.USE_GLOBAL_SHARING and " (includes RNG init)" or ""}.'
	)
	print(f"Bootstrap sampling took {delta_samples:.6f}s.")
	print("-----------------------------------------")
	print(f"Total combined runtime was {total:.6f}s.")


	class BootstrapFARFlat(Bootstrap):
	# For the 100000 sample case, enabling this reduces overall time by
	# 3 seconds.
	USE_GLOBAL_SHARING = True

	def _init(self, exp: Experiment) -> None:
	far = exp.far_decisions()
	assert fpsutils.has_columns(far, [Experiment.TableCol.Decision])
	self.far_list: npt.NDArray[np.bool_] = np.array(
	far[Experiment.TableCol.Decision.value]
	== Experiment.Decision.Accept.value,
	dtype=bool,
	)

	def _sample(self, rng: np.random.Generator) -> int:
	"""Complete a single flat bootstrap sample"""

	sample = fpsutils.boot_sample(self.far_list, rng=rng)
	return np.sum(sample)


	class BootstrapFullFARHierarchy(Bootstrap):
	# For the 100000 sample case, enabling this reduces overall time by
	# 3 seconds.
	USE_GLOBAL_SHARING = True

	def _init(self, exp: Experiment) -> None:
	fa_table = exp.fa_table()
	# The accepted tuples for fa_set and fa_trie.
	fa_set_tuple = [
	Experiment.TableCol.Verify_User.value,
	Experiment.TableCol.Enroll_User.value,
	Experiment.TableCol.Verify_Finger.value,
	Experiment.TableCol.Enroll_Finger.value,
	Experiment.TableCol.Verify_Sample.value,
	]
	assert fpsutils.has_columns(fa_table, fa_set_tuple)
	self.fa_set = fpsutils.DataFrameSetAccess(fa_table, fa_set_tuple)
	self.fa_trie = fpsutils.DataFrameCountTrieAccess(fa_table, fa_set_tuple)

	self.users_list = exp.user_list()
	self.fingers_list = exp.finger_list()
	self.samples_list = exp.sample_list()

	# If you add exp, fa_table, and far_decisions to the self object,
	# it becomes no longer runtime feasible to use USE_GLOBAL_SHARING=False.
	# For some reason, the total runtime grows with respect to process
	# count.

	def _sample(self, rng: np.random.Generator) -> int:
	"""Complete a single bootstrap sample using full hierarchy bootstrap method.

	The important parts of this approach are the following:
	* Avoiding DataFrame queries during runtime. We use fa_set cache instead.
	This is the difference between hundreds of _us_ vs. hundreds of _ns_
	per query
	* Using rng.choice (now boot_sample) on either a scalar, np.array, or list
	is important. Other methods are very slow.
	* The big finale that gets the single bootstrap sample time down from
	tens of _s_ to hundreds of _ms_ is checking for the loop abort path
	using the fa_trie.

	Additional Performance:
	* Use boot_sample_range instead os boot_sample.
	* Flatten loops.
	"""

	user_list = self.users_list
	fingers_list = self.fingers_list
	samples_list = self.samples_list

	fa_set = self.fa_set
	fa_trie = self.fa_trie

	sample = []
	# 72 users
	for v in fpsutils.boot_sample(user_list, rng=rng):
	if not fa_trie.isin((v,)):
	continue
	# 72 other template users (same user with different finger is allowed)
	for t in fpsutils.boot_sample(user_list, rng=rng):
	if not fa_trie.isin((v, t)):
	continue
	# 6 fingers
	for fv in fpsutils.boot_sample(fingers_list, rng=rng):
	if not fa_trie.isin((v, t, fv)):
	continue

	# FIXME: This is a hack to try to fix the possible
	# sample pool issue described below.
	# This is less bad, since there are only 6 fingers.
	# mod_finger_list = np.array([0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5])
	#
	# It should be known that having this fix here or not
	# having this fix here does not change the output
	# percentiles + mean. It also doesn't seem to change the
	# histogram skew.
	mod_finger_list = fingers_list
	# if v == t:
	# mod_finger_list = np.array(np.where(~(mod_finger_list == fv)))

	for ft in fpsutils.boot_sample(mod_finger_list, rng=rng):
	# FIXME: See below conversation.
	# FIXME: We could have this boot_sample keep running
	# until it has seen len(fingers_list) or
	# len(finger_list)-1 legitimate fingers.
	if not fa_trie.isin((v, t, fv, ft)):
	continue
	# 60 verification samples
	for a in fpsutils.boot_sample(samples_list, rng=rng):
	# We don't avoid the invalid matching case,
	# (v == t) and (fv == ft), since it is slower than
	# simply querying for the results.
	# We actually don't care about omitting these
	# cases because it will always receive a 0 from the
	# query.
	# As long as we always use the sum operation to
	# aggregate all subsample data, no problems should
	# arise.
	#
	# TODO: Reverify that allowing the selection of
	# an invalid match doesn't skew the probabilities,
	# since you could end up with a sample of all
	# invalid fingers (given replace=True).
	# On second thought, this seems like it could be
	# slightly wrong, but I don't recall why I thought
	# this should work out to be the same as selecting
	# always proper matches.
	#
	# I guess you only need to reduce the number of
	# fingers in the selection pool when the users are
	# the same.
	#
	# I think the original logic was that, if we aren't
	# keeping track of the true accepts, then
	# why should it matter if there are more
	# (invalid true-accepts), but I think this is
	# wrong because it reduces the chance
	# of getting legitimate false accepts.
	query = (v, t, fv, ft, a)
	sample.append(fa_set.isin(query))
	# pass
	return np.sum(sample)


	class BootstrapFullFRRHierarchy(Bootstrap):
	# For the 100000 sample case, enabling this reduces overall time by
	# 3 seconds.
	USE_GLOBAL_SHARING = True

	def _init(self, exp: Experiment) -> None:
	fr_table = exp.fr_table()
	# The accepted tuples for fa_set and fa_trie.
	fr_set_tuple = [
	Experiment.TableCol.Enroll_User.value,
	Experiment.TableCol.Enroll_Finger.value,
	Experiment.TableCol.Verify_Sample.value,
	]
	assert fpsutils.has_columns(fr_table, fr_set_tuple)
	self.fa_set = fpsutils.DataFrameSetAccess(fr_table, fr_set_tuple)
	self.fa_trie = fpsutils.DataFrameCountTrieAccess(fr_table, fr_set_tuple)

	self.users_list = exp.user_list()
	self.fingers_list = exp.finger_list()
	self.samples_list = exp.sample_list()

	# If you add exp, fa_table, and far_decisions to the self object,
	# it becomes no longer runtime feasible to use USE_GLOBAL_SHARING=False.
	# For some reason, the total runtime grows with respect to process
	# count.

	def _sample(self, rng: np.random.Generator) -> int:
	"""Complete a single bootstrap sample using full hierarchy bootstrap method.

	The important parts of this approach are the following:
	* Avoiding DataFrame queries during runtime. We use fa_set cache instead.
	This is the difference between hundreds of _us_ vs. hundreds of _ns_
	per query
	* Using rng.choice (now boot_sample) on either a scalar, np.array, or list
	is important. Other methods are very slow.
	* The big finale that gets the single bootstrap sample time down from
	tens of _s_ to hundreds of _ms_ is checking for the loop abort path
	using the fa_trie.

	Additional Performance:
	* Use boot_sample_range instead os boot_sample.
	* Flatten loops.
	"""

	user_list = self.users_list
	fingers_list = self.fingers_list
	samples_list = self.samples_list

	fa_set = self.fa_set
	fa_trie = self.fa_trie

	sample = []
	# 72 users
	for u in fpsutils.boot_sample(user_list, rng=rng):
	if not fa_trie.isin((u,)):
	continue
	# 6 fingers
	for f in fpsutils.boot_sample(fingers_list, rng=rng):
	if not fa_trie.isin((u, f)):
	continue
	# 60 verification samples
	for s in fpsutils.boot_sample(samples_list, rng=rng):
	query = (u, f, s)
	sample.append(fa_set.isin(query))
	return np.sum(sample)