biod/study/analysis-tool/run.py - third_party/platform2 - Git at Google

 #!/usr/bin/env python3
 # Copyright 2024 The ChromiumOS Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """A statistical analysis tool for fingerprint evaluation tool results.
 """

 from __future__ import annotations

 import argparse
 import pathlib
 import sys
 from typing import Optional

 import bootstrap
 from experiment import Experiment
 from fpc_bet_results import FPCBETResults
 import fpsutils
 import numpy as np
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objs as go
 from report_pandoc import Report2
 import scipy.stats as st
 from test_case import TestCase
 from tqdm.autonotebook import tqdm  # Auto detect notebook or console.


 def print_far_value(
     val: float,
     k_comparisons: list[int] = [20, 50, 100, 500],
 ) -> str:
     """Print the science notation"""
     str = f"{val:.4e}"
     for c in k_comparisons:
         str += f" = {val * 100 * c*1000:.3f}% of 1/{c}k"
     # TODO: Make nice output like "1 in 234k".
     return str


 # Ultimately, we want to do a histogram over the entire FAR/FRR Decision
 # dataset, but doing so directly with the large DataFrame is much too slow
 # and will actually hang plotly. We are mimicking the following histogram
 # operation:
 # go.Histogram(histfunc="sum", x=far['EnrollUser'], y=far['Decision'])
 #
 # A similar method might be using the following, which runs in about 300ms:
 # far[['EnrollUser', 'Decision']].groupby(['EnrollUser']).sum()
 #
 # The fastest method is by reverse constructing the complete counts table
 # by using the pre-aggregated fa_table. This runs in about 66ms, which is
 # primarily the time to run exp.user_list().


 def fa_count_figure(
     exp: Experiment,
     cols: list[Experiment.TableCol],
     title: str,
     xaxis_title: str,
 ) -> go.Figure:
     fa_counts = pd.DataFrame({c.value: exp.fa_counts_by(c) for c in cols})
     fa_counts.rename(
         columns={c.value: f"FA Counts by {c.value}" for c in cols}, inplace=True
     )
     non_zero_labels = fa_counts.loc[(fa_counts > 0).any(axis=1)].index
     # non_zero_labels = list(non_zero_counts_enroll.index) + \
     #     list(non_zero_counts_verify.index)
     # non_zero_labels.sort()

     # It is nice to keep the blank space between non-zero bars to be able to
     # identify possible abnormal clusters of high false acceptance users.
     fig = px.bar(
         fa_counts,
         #  pattern_shape='variable', pattern_shape_sequence=['/', '\\'],
         #  text_auto=True,
         #  labels={'EnrollUser': 'FA Counts by EnrollUser',
         #          'VerifyUser': 'FA Counts by VerifyUser'}
         #  orientation='h',
     )
     # fig.update_xaxes(type='category')
     # fig.update_layout(barmode='overlay')
     # fig.update_layout(barmode='group')
     # Reduce opacity to see both histograms
     # fig.update_traces(opacity=0.75)
     # fig.update_traces(opacity=0.50)
     # fig.update_traces(marker_size=10)
     # fig.update_traces(marker_line_color = 'blue', marker_line_width = 0.25)
     fig.update_layout(
         # title_text='False Accepts by User ID of Enroll and Verify',
         title=title,
         # xaxis_title_text='User ID',
         xaxis_title=xaxis_title,
         yaxis_title_text="False Accept Count",
         legend_title="",
         barmode="group",
         # barmode='overlay',
         # bargap=0.2,  # gap between bars of adjacent location coordinates
         # bargroupgap=0.1  # gap between bars of the same location coordinates
         bargap=0.0,
         bargroupgap=0.0,
         xaxis=dict(type="category", tickmode="array", tickvals=non_zero_labels),
     )
     fig.update_layout(
         legend=dict(
             # orientation='h',
             yanchor="top",
             y=0.99,
             xanchor="left",
             x=0.01,
         )
     )
     return fig


 def fr_count_figure(
     exp: Experiment,
     cols: list[Experiment.TableCol],
     title: str,
     xaxis_title: str,
 ) -> go.Figure:
     fr_counts = pd.DataFrame({c.value: exp.fr_counts_by(c) for c in cols})
     fr_counts.rename(
         columns={c.value: f"FR Counts by {c.value}" for c in cols}, inplace=True
     )
     non_zero_labels = fr_counts.loc[(fr_counts > 0).any(axis=1)].index
     # non_zero_labels = list(non_zero_counts_enroll.index) + \
     #     list(non_zero_counts_verify.index)
     # non_zero_labels.sort()

     fr_tests_total = exp.fr_trials_count()
     fr_category_size = len(fr_counts.index)
     fr_per_category = float(fr_tests_total) / float(fr_category_size)
     percents = (
         np.array(fr_counts.values, dtype=float) / fr_per_category
     ) * 100.0

     # It is nice to keep the blank space between non-zero bars to be able to
     # identify possible abnormal clusters of high false acceptance users.
     # fig = go.Figure()
     # fig.add_trace(go.Bar(x=fr_counts.index, y=fr_counts.values[:, 0]))
     fig: go.Figure = px.bar(
         fr_counts,
         #  pattern_shape='variable', pattern_shape_sequence=['/', '\\'],
         #  text_auto=True,
         #  labels={'EnrollUser': 'FA Counts by EnrollUser',
         #          'VerifyUser': 'FA Counts by VerifyUser'}
         #  orientation='h',
         hover_data={
             "percent": (":.3f", percents[:, 0]),
         },
     )
     # fr_tests_total = exp.fr_trials_count()
     # fr_category_size = len(fr_counts.index)
     # fr_per_category = float(fr_tests_total) / float(fr_category_size)
     # percents = (np.array(fr_counts.values, dtype=float) / fr_per_category) * 100.0
     # fr_percents = pd.Series(percents, index=fr_counts.index, name='Percents')
     # fig.add_trace(go.Bar(x=fr_counts.index, y=percents[:, 0], yaxis='y2'))

     # fig.update_xaxes(type='category')
     # fig.update_layout(barmode='overlay')
     # fig.update_layout(barmode='group')
     # Reduce opacity to see both histograms
     # fig.update_traces(opacity=0.75)
     # fig.update_traces(opacity=0.50)
     # fig.update_traces(marker_size=10)
     # fig.update_traces(marker_line_color = 'blue', marker_line_width = 0.25)
     fig.update_layout(
         # title_text='False Rejects by User ID of Enroll and Verify',
         title=title,
         # xaxis_title_text='User ID',
         xaxis_title=xaxis_title,
         # yaxis_title_text='False Rejects Count',
         legend_title="",
         barmode="group",
         # barmode='overlay',
         # bargap=0.2,  # gap between bars of adjacent location coordinates
         # bargroupgap=0.1  # gap between bars of the same location coordinates
         bargap=0.0,
         bargroupgap=0.0,
         xaxis=dict(type="category", tickmode="array", tickvals=non_zero_labels),
         yaxis=dict(
             title="False Rejects Count",
             showline=True,
         ),
         yaxis2=dict(
             title="False Reject Percent",
             side="right",
             showline=True,
         ),
     )
     fig.update_layout(
         legend=dict(
             # orientation='h',
             yanchor="top",
             y=0.99,
             xanchor="left",
             x=0.01,
         )
     )
     return fig


 def cmd_report(opts: argparse.Namespace) -> int:
     """Conduct a full analysis of all test cases and generate a final report."""
     user_groups_csv: Optional[pathlib.Path] = opts.user_groups_csv
     testcases_decisions_dir: pathlib.Path = opts.testcases_decisions_dir
     analysis_dir: pathlib.Path = opts.analysis_dir

     if not user_groups_csv:
         user_groups_csv = testcases_decisions_dir / "User_groups.csv"

     analysis_dir.mkdir(exist_ok=True)
     source_dir = pathlib.Path(__file__).parent
     rpt = Report2(analysis_dir, source_dir / "templates")

     ################# Import Data From BET Results #################

     print("# Read in data")

     bet = FPCBETResults(testcases_decisions_dir)

     # FIXME: Only enable one test case for speed of testing.
     # test_cases = [FPCBETResults.TestCase.TUDisabled]
     test_cases = FPCBETResults.TestCase.all()

     far_decisions = bet.read_files(
         list(
             zip(
                 test_cases,
                 [FPCBETResults.TableType.FAR_Decision] * len(test_cases),
             )
         )
     )

     frr_decisions = bet.read_files(
         list(
             zip(
                 test_cases,
                 [FPCBETResults.TableType.FRR_Decision] * len(test_cases),
             )
         )
     )

     exps = {
         test_cases[i]: Experiment(
             num_verification=80,
             num_fingers=6,
             num_users=72,
             far_decisions=far,
             frr_decisions=frr,
         )
         for i, (far, frr) in enumerate(zip(far_decisions, frr_decisions))
     }

     for tc in exps:
         exps[tc].add_groups_from_csv(user_groups_csv)

     ################# Generate Report Test cases #################

     print("# Setup report test cases")

     rpt_tc = {
         tc: rpt.test_case_add(str(tc), tc.description()) for tc in test_cases
     }

     # Ultimately, we want to do a histogram over the entire FAR/FRR Decision
     # dataset, but doing so directly with the large DataFrame is much too slow
     # and will actually hang plotly. We are mimicking the following histogram
     # operation:
     # go.Histogram(histfunc="sum", x=far['EnrollUser'], y=far['Decision'])
     #
     # A similar method might be using the following, which runs in about 300ms:
     # far[['EnrollUser', 'Decision']].groupby(['EnrollUser']).sum()
     #
     # The fastest method is by reverse constructing the complete counts table
     # by using the pre-aggregated fa_table. This runs in about 66ms, which is
     # primarily the time to run exp.user_list().

     ################# Histograms #################

     print("# Add main histograms to report")

     for tc in test_cases:
         exp = exps[tc]
         section = rpt_tc[tc].add_subsection("hist")

         # A high FA count for an EnrollUser would indicate some template(s) for a given
         # user allows more false accepts from other users.
         # A high FA count for a VerifyUser would indicate that some match attempts
         # with this user's fingers yields more false accepts.

         # User
         fig = fa_count_figure(
             exp,
             [Experiment.TableCol.Enroll_User, Experiment.TableCol.Verify_User],
             "False Accepts by User ID of Enroll and Verify",
             "User ID",
         )
         section.add_figure(
             "FA_by_User", "False Accepts by User ID of Enroll and Verify.", fig
         )
         fig = fr_count_figure(
             exp,
             [Experiment.TableCol.Verify_User],
             "False Rejects by User ID",
             "User ID",
         )
         section.add_figure("FR_by_User", "False Rejects by User ID.", fig)

         # Finger
         fig = fa_count_figure(
             exp,
             [
                 Experiment.TableCol.Enroll_Finger,
                 Experiment.TableCol.Verify_Finger,
             ],
             "False Accepts by Finger ID of Enroll and Verify",
             "Finger ID",
         )
         section.add_figure(
             "FA_by_Finger",
             "False Accepts by Finger ID of Enroll and Verify.",
             fig,
         )
         fig = fr_count_figure(
             exp,
             [Experiment.TableCol.Verify_Finger],
             "False Rejects by Finger ID",
             "Finger ID",
         )
         section.add_figure("FR_by_Finger", "False Rejects by Finger ID.", fig)

         # Sample
         # Keep in mind that different test cases may select different samples
         # for verification.
         fig = fa_count_figure(
             exp,
             [Experiment.TableCol.Verify_Sample],
             "False Accepts by Verify Sample ID",
             "Sample ID",
         )
         section.add_figure(
             "FA_by_Sample", "False Accepts by Verify Sample ID.", fig
         )
         fig = fr_count_figure(
             exp,
             [Experiment.TableCol.Verify_Sample],
             "False Rejects by Sample ID. "
             "Keep in mind that different test cases may use "
             "different samples for verification.",
             "Sample ID",
         )
         fr_counts = exp.fr_counts_by(Experiment.TableCol.Verify_Sample)
         line = st.linregress(fr_counts.index, fr_counts.values)
         line_x = np.array(fr_counts.index)
         line = st.linregress(line_x, np.array(fr_counts.values))
         line_y = line.slope * line_x + line.intercept
         fig.add_trace(go.Line(x=line_x, y=line_y, name="Linear Regression"))
         section.add_figure("FR_by_Sample", "False Rejects by Sample ID.", fig)

         # Group
         fig = fa_count_figure(
             exp,
             [
                 Experiment.TableCol.Enroll_Group,
                 Experiment.TableCol.Verify_Group,
             ],
             "False Accepts by Group of Enroll and Verify",
             "Group",
         )
         section.add_figure(
             "FA_by_Group", "False Accepts by Group of Enroll and Verify.", fig
         )
         fig = fr_count_figure(
             exp,
             [Experiment.TableCol.Verify_Group],
             "False Rejects by Group",
             "Group",
         )
         section.add_figure("FR_by_Group", "False Rejects by Group.", fig)

     # rpt_tc[DISPLAY_TC].display(display)

     print("# Add remaining histograms to report")

     ### FA_by_User

     s1 = {
         "EnrollUser_"
         + tc.name: exps[tc].fa_counts_by(Experiment.TableCol.Enroll_User)
         for tc in test_cases
     }
     s2 = {
         "VerifyUser_"
         + tc.name: exps[tc].fa_counts_by(Experiment.TableCol.Verify_User)
         for tc in test_cases
     }
     df = pd.DataFrame(s1 | s2)
     # [tc.name for tc in test_cases]
     fig = px.bar(df)
     fig.update_layout(
         title="False Accepts by User",
         xaxis_title="User",
         yaxis_title_text="Count",
         legend_title="Category + Test Case",
         barmode="group",
         # height=2049,
         # barmode='overlay',
         # bargap=0.2,  # gap between bars of adjacent location coordinates
         # bargroupgap=0.1  # gap between bars of the same location coordinates
         bargap=0.0,
         bargroupgap=0.0,
         xaxis=dict(
             type="category",
             # tickmode='array',
             # tickvals=non_zero_labels,
         ),
     )
     fig.update_layout(
         legend=dict(
             # orientation='h',
             yanchor="top",
             y=0.99,
             xanchor="left",
             x=0.01,
         )
     )
     # fig.show()
     rpt.overall_section().add_figure("FA_by_User", "False Accepts by User", fig)

     ### FR_by_User

     df = pd.DataFrame(
         {
             tc.name: exps[tc].fr_counts_by(Experiment.TableCol.Verify_User)
             for tc in test_cases
         }
     )
     # [tc.name for tc in test_cases]
     fig = px.bar(
         df,
         # orientation='h',
     )
     fig.update_layout(
         title="False Rejects by User",
         xaxis_title="User",
         yaxis_title_text="Count",
         legend_title="Test Case",
         barmode="group",
         # height=2049,
         # barmode='overlay',
         # bargap=0.2,  # gap between bars of adjacent location coordinates
         # bargroupgap=0.1  # gap between bars of the same location coordinates
         bargap=0.0,
         bargroupgap=0.0,
         xaxis=dict(
             type="category",
             # tickmode='array',
             # tickvals=non_zero_labels,
         ),
     )
     fig.update_layout(
         legend=dict(
             # orientation='h',
             yanchor="top",
             y=0.99,
             xanchor="left",
             x=0.01,
         )
     )
     # fig.show()
     rpt.overall_section().add_figure("FR_by_User", "False Rejects by User", fig)

     ### FA_by_Sample

     df = pd.DataFrame(
         {
             tc.name: exps[tc].fa_counts_by(Experiment.TableCol.Verify_Sample)
             for tc in test_cases
         }
     )
     # [tc.name for tc in test_cases]
     fig = px.bar(
         df,
         # orientation='h',
     )
     fig.update_layout(
         # title_text='False Rejects by User ID of Enroll and Verify',
         title="False Accepts by Sample",
         xaxis_title="Group",
         yaxis_title_text="Count",
         legend_title="Test Case",
         barmode="group",
         # height=2049,
         # barmode='overlay',
         # bargap=0.2,  # gap between bars of adjacent location coordinates
         # bargroupgap=0.1  # gap between bars of the same location coordinates
         bargap=0.0,
         bargroupgap=0.0,
         # xaxis=dict(type='category',
         #             tickmode='array',
         #             tickvals=non_zero_labels),
         xaxis=dict(
             type="category",
             # tickmode='array',
             # tickvals=non_zero_labels,
         ),
     )
     # fig.update_layout(
     #     # title_text='False Accepts by User ID of Enroll and Verify',
     #     title=title,
     #     # xaxis_title_text='User ID',
     #     xaxis_title=xaxis_title,
     #     yaxis_title_text='False Accept Count',
     #     legend_title='',
     #     barmode='group',
     #     # barmode='overlay',
     #     # bargap=0.2,  # gap between bars of adjacent location coordinates
     #     # bargroupgap=0.1  # gap between bars of the same location coordinates
     #     bargap=0.0,
     #     bargroupgap=0.0,
     #     xaxis=dict(type='category',
     #                 tickmode='array',
     #                 tickvals=non_zero_labels),
     # )
     fig.update_layout(
         legend=dict(
             # orientation='h',
             yanchor="top",
             y=0.99,
             xanchor="left",
             x=0.01,
         )
     )
     # fig.show()
     rpt.overall_section().add_figure(
         "FA_by_Sample", "False Accepts by Sample", fig
     )

     ### FA_by_Finger

     s1 = {
         "EnrollFinger_"
         + tc.name: exps[tc].fa_counts_by(Experiment.TableCol.Enroll_Finger)
         for tc in test_cases
     }
     s2 = {
         "VerifyFinger_"
         + tc.name: exps[tc].fa_counts_by(Experiment.TableCol.Verify_Finger)
         for tc in test_cases
     }
     df = pd.DataFrame(s1 | s2)
     # [tc.name for tc in test_cases]
     fig = px.bar(df)
     fig.update_layout(
         title="False Accepts by Finger",
         xaxis_title="Finger",
         yaxis_title_text="Count",
         legend_title="Category + Test Case",
         barmode="group",
     )
     # fig.show()
     rpt.overall_section().add_figure(
         "FA_by_Finger", "False Accepts by Finger", fig
     )

     ### FR_by_Finger

     df = pd.DataFrame(
         {
             tc.name: exps[tc].fr_counts_by(Experiment.TableCol.Verify_Finger)
             for tc in test_cases
         }
     )
     # [tc.name for tc in test_cases]
     fig = px.bar(df)
     fig.update_layout(
         title="False Rejects by Finger",
         xaxis_title="Finger",
         yaxis_title_text="Count",
         legend_title="Test Case",
         barmode="group",
     )
     # fig.show()
     rpt.overall_section().add_figure(
         "FR_by_Finger", "False Rejects by Finger", fig
     )

     ### FA_by_Group

     s1 = {
         "EnrollGroup_"
         + tc.name: exps[tc].fa_counts_by(Experiment.TableCol.Enroll_Group)
         for tc in test_cases
     }
     s2 = {
         "VerifyGroup_"
         + tc.name: exps[tc].fa_counts_by(Experiment.TableCol.Verify_Group)
         for tc in test_cases
     }
     df = pd.DataFrame(s1 | s2)
     # [tc.name for tc in test_cases]
     fig = px.bar(df)
     fig.update_layout(
         title="False Accepts by Group",
         xaxis_title="Group",
         yaxis_title_text="Count",
         legend_title="Category + Test Case",
         barmode="group",
     )
     # fig.show()
     rpt.overall_section().add_figure(
         "FA_by_Group", "False Accepts by Group", fig
     )

     ### FR_by_Group

     df = pd.DataFrame(
         {
             tc.name: exps[tc].fr_counts_by(Experiment.TableCol.Verify_Group)
             for tc in test_cases
         }
     )
     # [tc.name for tc in test_cases]
     fig = px.bar(df)
     fig.update_layout(
         # title_text='False Rejects by User ID of Enroll and Verify',
         title="False Rejects by Group",
         xaxis_title="Group",
         yaxis_title_text="Count",
         legend_title="Test Case",
         barmode="group",
     )
     # fig.show()
     rpt.overall_section().add_figure(
         "FR_by_Group", "False Rejects by Group", fig
     )

     ################# Bootstrap Sampling #################

     print("# Run bootstrap samples")

     # 1000 samples is 95%
     # 5000 samples is 99%
     # BOOTSTRAP_SAMPLES = 1000
     # BOOTSTRAP_SAMPLES = 5000  # 5000 samples in 2 seconds (128 cores)
     BOOTSTRAP_SAMPLES = 100000  # 100000 samples in 16 seconds (128 cores)
     CONFIDENCE_PERCENT = 95
     FAR_THRESHOLD = 1 / 100000.0
     FRR_THRESHOLD = 10 / 100.0

     far_boot_results: dict[TestCase, bootstrap.BootstrapResults] = dict()
     frr_boot_results: dict[TestCase, bootstrap.BootstrapResults] = dict()
     far_figures: dict[TestCase, go.Figure] = dict()
     frr_figures: dict[TestCase, go.Figure] = dict()
     for tc in test_cases:
         print(f"Running Test Case {tc}.")
         exp = exps[tc]
         section = rpt_tc[tc].add_subsection("score")
         info = section.add_data("Info")

         #### FAR ####

         # Run FAR bootstrap
         boot = bootstrap.BootstrapFullFARHierarchy(exp, verbose=True)
         # boot = bootstrap.BootstrapFARFlat(exp, verbose=True)
         boot_results = boot.run(
             num_samples=BOOTSTRAP_SAMPLES,
             num_proc=0,
             progress=lambda it, total: tqdm(it, total=total),
         )
         far_boot_results[tc] = boot_results
         # Showing raw values works because we take som many bootstrap samples,
         # which fills in a lot of gaps in a "unique" value (bin_size=1) histogram.
         bins, counts = fpsutils.discrete_hist(boot_results.samples())
         df = pd.DataFrame(
             {
                 # X-Axes
                 "False Accepts in Bootstrap Sample": bins,
                 "FAR": np.array(bins, dtype=float) / exp.fa_trials_count(),
                 # Y-Axes
                 "Number of Bootstrap Samples Observed": counts,
             }
         )
         fig = px.bar(
             df,
             x="FAR",
             y="Number of Bootstrap Samples Observed",
             hover_data=["False Accepts in Bootstrap Sample"],
             title="Frequency of FAR in Bootstrap Samples",
         )
         fig.update_layout(hovermode="x unified")

         ci_lower, ci_upper = boot_results.confidence_interval()
         frr_ci_lower = ci_lower / exp.fa_trials_count()
         frr_ci_upper = ci_upper / exp.fa_trials_count()
         frr_mean = np.mean(boot_results.samples()) / exp.fa_trials_count()
         frr_std = np.std(boot_results.samples()) / exp.fa_trials_count()
         fig.add_vline(
             x=frr_ci_lower,
             annotation_text=f"lower {CONFIDENCE_PERCENT}%",
             annotation_position="top right",
             line_width=1,
             line_dash="dash",
             line_color="yellow",
         )
         fig.add_vline(
             x=frr_ci_upper,
             annotation_text=f"upper {CONFIDENCE_PERCENT}%",
             annotation_position="top left",
             line_width=1,
             line_dash="dash",
             line_color="yellow",
         )
         fig.add_vline(
             x=frr_mean,
             annotation_text="mean",
             annotation_position="top left",
             line_width=1,
             line_dash="dash",
             line_color="yellow",
         )

         # Enable this to compare against 1/50k.
         # Enabling this has the undesirable side effect of shifting
         # focus of the histogram plots to include 1/50k, which might
         # be very far from the mean.
         # fig.add_vline(
         #     x=1 / 50000.0,
         #     annotation_text="1/50",
         #     line_width=2,
         #     line_dash="dash",
         #     line_color="red",
         # )
         fig.add_vline(
             x=1 / 100000.0,
             annotation_text="1/100k",
             line_width=1,
             line_dash="dash",
             line_color="red",
         )
         fig.add_vline(
             x=1 / 200000.0,
             annotation_text="1/200k",
             line_width=1,
             line_dash="dash",
             line_color="red",
         )
         far_figures[tc] = fig

         section.add_figure(
             "FAR_Bootstrap",
             "The hierarchical FAR bootstrap sampling histogram.",
             fig,
         )

         info.set("FAR_Confidence", CONFIDENCE_PERCENT)
         info.set("FAR_Trials", exp.fa_trials_count())
         info.set("FAR_False_Accepts", exp.fa_count())
         info.set("FAR_CI_Lower", frr_ci_lower)
         info.set("FAR_CI_Upper", frr_ci_upper)
         info.set("FAR_Mean", frr_mean)
         info.set("FAR_Std", frr_std)
         info.set("FAR_Threshold", f"1/{1 / (FAR_THRESHOLD*1000)}k")
         info.set("FAR_Pass", frr_ci_upper < FAR_THRESHOLD)

         #### FRR ####

         # Run FRR bootstrap
         boot = bootstrap.BootstrapFullFRRHierarchy(exp, verbose=True)
         # boot = bootstrap.BootstrapFARFlat(exp, verbose=True)
         boot_results = boot.run(
             num_samples=BOOTSTRAP_SAMPLES,
             num_proc=0,
             progress=lambda it, total: tqdm(it, total=total),
         )
         frr_boot_results[tc] = boot_results

         # Showing raw values works because we take som many bootstrap samples,
         # which fills in a lot of gaps in a "unique" value (bin_size=1) histogram.
         bins, counts = fpsutils.discrete_hist(boot_results.samples())
         df = pd.DataFrame(
             {
                 # X-Axes
                 "False Accepts in Bootstrap Sample": bins,
                 "FRR": np.array(bins, dtype=float) / exp.fr_trials_count(),
                 # Y-Axes
                 "Number of Bootstrap Samples Observed": counts,
             }
         )
         fig = px.bar(
             df,
             x="FRR",
             y="Number of Bootstrap Samples Observed",
             hover_data=["False Accepts in Bootstrap Sample"],
             title="Frequency of FRR in Bootstrap Samples",
         )
         fig.update_layout(
             hovermode="x unified",
             xaxis=dict(tickformat="%"),
             bargap=0.0,
             bargroupgap=0.0,
         )

         ci_lower, ci_upper = boot_results.confidence_interval()
         frr_ci_lower = ci_lower / exp.fr_trials_count()
         frr_ci_upper = ci_upper / exp.fr_trials_count()
         frr_mean = np.mean(boot_results.samples()) / exp.fr_trials_count()
         frr_std = np.std(boot_results.samples()) / exp.fr_trials_count()
         fig.add_vline(
             x=frr_ci_lower,
             annotation_text=f"lower {CONFIDENCE_PERCENT}%",
             annotation_position="top right",
             line_width=1,
             line_dash="dash",
             line_color="yellow",
         )
         fig.add_vline(
             x=frr_ci_upper,
             annotation_text=f"upper {CONFIDENCE_PERCENT}%",
             annotation_position="top left",
             line_width=1,
             line_dash="dash",
             line_color="yellow",
         )
         fig.add_vline(
             x=frr_mean,
             annotation_text="mean",
             annotation_position="top left",
             line_width=1,
             line_dash="dash",
             line_color="yellow",
         )

         fig.add_vline(
             x=10 / 100,
             annotation_text="10%",
             line_width=2,
             line_dash="dash",
             line_color="red",
         )

         frr_figures[tc] = fig

         section.add_figure(
             "FRR_Bootstrap",
             "The hierarchical FRR bootstrap sampling histogram.",
             fig,
         )

         info.set("FRR_Confidence", CONFIDENCE_PERCENT)
         info.set("FRR_Trials", exp.fr_trials_count())
         info.set("FRR_False_Accepts", exp.fr_count())
         info.set("FRR_CI_Lower", frr_ci_lower)
         info.set("FRR_CI_Upper", frr_ci_upper)
         info.set("FRR_Mean", frr_mean)
         info.set("FRR_Std", frr_std)
         info.set("FRR_Threshold", f"{FRR_THRESHOLD * 100}%")
         info.set("FRR_Pass", frr_ci_upper < FRR_THRESHOLD)

     #### Generate Report ####

     print("# Generate final report")

     rpt.generate(
         {
             # 'pdf',
             # 'md',
             "html",
         }
     )

     print(f"View {(analysis_dir / 'index.html').absolute}")

     return 0


 def cmd_groups_discover(opts: argparse.Namespace) -> int:
     """Discover the user-group mapping from a raw collection dir structure.

     Write this table out to a CSV file, which typically is called
     User_groups.csv.
     """
     src_collection_dir: pathlib.Path = opts.src_collection_dir
     user_groups_csv: pathlib.Path = opts.user_groups_csv

     exp = Experiment(0, 0, 0)
     exp.add_groups_from_collection_dir(src_collection_dir)
     exp.user_groups_table_to_csv(user_groups_csv)
     return 0


 def main(argv: list[str]) -> int:
     parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
     subparsers = parser.add_subparsers(
         dest="subcommand", required=True, title="subcommands"
     )

     # Parser for "report" subcommand.
     parser_report = subparsers.add_parser("report", help=cmd_report.__doc__)
     parser_report.set_defaults(func=cmd_report)
     parser_report.add_argument(
         "--user-groups-csv",
         type=pathlib.Path,
         help="Path to the user-group mapping CSV file. "
         "(default: <testcases_decisions_dir>/User_groups.csv).",
     )
     parser_report.add_argument(
         "testcases_decisions_dir",
         type=pathlib.Path,
         help="Directory of directories that holds the matcher decisions for each test case",
     )
     parser_report.add_argument(
         "analysis_dir",
         default="analysis",
         type=pathlib.Path,
         help="Directory to output the analysis report",
     )

     # Parser for "groups-discover" subcommand.
     parser_groups_discover = subparsers.add_parser(
         "groups-discover", help=cmd_groups_discover.__doc__
     )
     parser_groups_discover.set_defaults(func=cmd_groups_discover)
     parser_groups_discover.add_argument(
         "src_collection_dir",
         type=pathlib.Path,
         help="Path to raw collection directory where we will learn the "
         "participant groups from",
     )
     parser_groups_discover.add_argument(
         "user_groups_csv",
         type=pathlib.Path,
         default="User_groups.csv",
         help="The path to the User_groups.csv we will write to",
     )

     args = parser.parse_args(argv)

     if args.subcommand == "report":
         if args.user_groups_csv and not args.user_groups_csv.is_file():
             parser.error("user-groups-csv must be a CSV file")
         if not args.testcases_decisions_dir.is_dir():
             parser.error("testcases_decisions_dir must be a directory")
         if args.analysis_dir.exists() and not args.analysis_dir.is_dir():
             parser.error("analysis_dir must be a directory")
     elif args.subcommand == "groups-discover":
         if not args.src_collection_dir.is_dir():
             parser.error("src_collection_dir must be a directory")
         if args.user_groups_csv.exists():
             parser.error(
                 f"user_groups_csv {args.user_groups_csv} already exists"
             )

     return args.func(args)


 if __name__ == "__main__":
     sys.exit(main(sys.argv[1:]))