suite_sets: add more complex validation checks

Add checks ensuring  all csuite ids are unique, all sub-csuites exist,
and no cycles exist.

BUG=b:323238878
TEST=ran config presubmits

Change-Id: Ic2207990c5d27139fe84e43feef252e6c3a05e6d
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/dev-util/+/5315436
Commit-Queue: Jack Gelinas <jackgelinas@google.com>
Reviewed-by: Brett Brotherton <bbrotherton@google.com>
Tested-by: Jack Gelinas <jackgelinas@google.com>
Reviewed-by: Derek Beckett <dbeckett@chromium.org>
diff --git a/src/chromiumos/test/python/src/tools/prep_centralized_suites.py b/src/chromiumos/test/python/src/tools/prep_centralized_suites.py
index 4140813..f52a429 100644
--- a/src/chromiumos/test/python/src/tools/prep_centralized_suites.py
+++ b/src/chromiumos/test/python/src/tools/prep_centralized_suites.py
@@ -15,11 +15,12 @@
 
 
 def main(args):
-    suite_set_utils.prep_suites(
-        args.suites_input, args.suites_output, args.test_metadata_dir
-    )
-    suite_set_utils.prep_suite_sets(
-        args.suite_sets_input, args.suite_sets_output
+    suite_set_utils.prep_centralized_suites(
+        args.suites_input,
+        args.suites_output,
+        args.suite_sets_input,
+        args.suite_sets_output,
+        args.test_metadata_dir,
     )
 
 
diff --git a/src/chromiumos/test/python/src/tools/suite_set_utils.py b/src/chromiumos/test/python/src/tools/suite_set_utils.py
index 6181f4e..a002516 100644
--- a/src/chromiumos/test/python/src/tools/suite_set_utils.py
+++ b/src/chromiumos/test/python/src/tools/suite_set_utils.py
@@ -7,7 +7,7 @@
 import os
 import pathlib
 import sys
-from typing import List, NamedTuple, Optional, Set
+from typing import Dict, List, NamedTuple, Optional, Set, Union
 
 from google.protobuf import json_format  # pylint: disable=import-error
 
@@ -67,19 +67,39 @@
     suites: Set[str]
 
 
-def prep_suites(src: List[str], dest: str, test_metadata_dir: str) -> None:
-    """Aggregates all Suites and writes them to the given file.
+CentralizedSuite = Union[SuiteSet, Suite]
 
-    The Suites are aggregated from protos defined in the config and
-    config-internal repos. The test metadata is used to filter out tests not
-    relevant to the board being built.
+
+def prep_centralized_suites(
+    suites_input: List[str],
+    suites_output: str,
+    suite_sets_input: List[str],
+    suite_sets_output: str,
+    test_metadata_dir: str,
+) -> None:
+    """Aggregates all CSuites and writes them to the given out files.
+
+    The Suites/SuiteSets are aggregated from protos defined in
+    suite_input/suite_set_input files. The test metadata is used to
+    filter out tests not relevant to the board being built.
 
     Args:
-        src: Files to read the jsonpb SuiteList protos from.
-        dest: File path to write the resulting SuiteList proto to.
+        suites_input: Files to read the jsonpb SuiteList protos from.
+        suites_output: File path to write the resulting SuiteList proto to.
+        suite_sets_input: Files to read the jsonpb SuiteSetList protos from.
+        suite_sets_output: File path to write the resulting SuiteSetList
+            proto to.
         test_metadata_dir: Path to directory containing test metadata proto
             files.
     """
+    suites = _aggregate_suites(suites_input, test_metadata_dir)
+    suite_sets = _aggregate_suite_sets(suite_sets_input)
+    _write_suites_to_file(suites, suites_output)
+    _write_suite_sets_to_file(suite_sets, suite_sets_output)
+
+
+def _aggregate_suites(src: List[str], test_metadata_dir: str) -> List[Suite]:
+    """Aggregates all Suites across input files."""
     suites = load_suites(src)
 
     # Merge Suites with the same name. This is needed support Suites that are
@@ -90,32 +110,20 @@
     # metadata generated for that board and filter out tests from the Suites
     # that are not part of the test metadata.
     test_ids = _load_test_ids(test_metadata_dir)
-    suites = _remove_tests_not_in_list(suites, test_ids)
-
-    _write_suites_to_file(suites, dest)
+    return _remove_tests_not_in_list(suites, test_ids)
 
 
-def prep_suite_sets(src: List[str], dest: str):
-    """Aggregates all SuiteSets and writes them to the given file.
-
-    The SuiteSets are aggregated from protos defined in the config and
-    config-internal repos.
-
-    Args:
-        src: Files to read the jsonpb SuiteSetList protos from.
-        dest: File path to write the resulting SuiteSetList proto to.
-    """
+def _aggregate_suite_sets(src: List[str]) -> List[SuiteSet]:
+    """Aggregates all SuiteSets across input files."""
     suite_sets = load_suite_sets(src)
 
     # Merge SuiteSets with the same name. This is needed support SuiteSets that
     # are defined in both config and config-internal as a result of the SuiteSet
     # containing both public and private Suite(Set)s.
-    suite_sets = _union_suite_sets(suite_sets)
-
-    _write_suite_sets_to_file(suite_sets, dest)
+    return _union_suite_sets(suite_sets)
 
 
-def _load_test_ids(test_metadata_dir):
+def _load_test_ids(test_metadata_dir: str) -> Set[str]:
     """Gets a list of test ids from the test metadata files in the given dir."""
     test_metadata_list_protos = proto_utils.load_protos_from_dir(
         test_metadata_dir,
@@ -126,7 +134,14 @@
 
 
 def load_suites(files: List[str]) -> List[Suite]:
-    """Gets a list of Suites from the protos defined in the config repos."""
+    """Gets a list of Suites from the protos defined in the given files.
+
+    Args:
+        files: Files to read the jsonpb SuiteList protos from.
+
+    Returns:
+        A list that is the concatenation of all the suite lists.
+    """
     suite_list_protos = proto_utils.load_protos_from_files(
         files,
         suite_set_pb2.SuiteList,
@@ -136,7 +151,14 @@
 
 
 def load_suite_sets(files: List[str] = None):
-    """Gets a list of SuiteSets from the protos defined in the config repos."""
+    """Gets a list of SuiteSets from the protos defined in the given files.
+
+    Args:
+        files: Files to read the jsonpb SuiteList protos from.
+
+    Returns:
+        A list that is the concatenation of all the suite lists.
+    """
 
     suite_set_list_protos = proto_utils.load_protos_from_files(
         files,
@@ -213,6 +235,7 @@
 
 
 def _validate_suite(suite_proto: suite_set_pb2.Suite) -> Optional[str]:
+    """Validates the Suite is well-formed."""
     suite_str = json_format.MessageToJson(suite_proto)
 
     if not (suite_proto.id and suite_proto.id.value):
@@ -229,6 +252,7 @@
 def _validate_suite_set(
     suite_set_proto: suite_set_pb2.SuiteSet,
 ) -> Optional[str]:
+    """Validates the SuiteSet is well-formed."""
     suite_set_str = json_format.MessageToJson(suite_set_proto)
 
     if not (suite_set_proto.id and suite_set_proto.id.value):
@@ -241,8 +265,21 @@
     if not (suite_set_proto.suite_sets or suite_set_proto.suites):
         return f"SuiteSet must contain a Suite or SuiteSet: {suite_set_str}"
 
+    if suite_set_proto.suite_sets:
+        sub_suite_sets = [s.value for s in suite_set_proto.suite_sets]
+        if _has_dupes(sub_suite_sets):
+            return (
+                f"SuiteSet can't have duplicate sub-SuiteSets: {suite_set_str}"
+            )
+
+    if suite_set_proto.suites:
+        sub_suites = [s.value for s in suite_set_proto.suites]
+        if _has_dupes(sub_suites):
+            return f"SuiteSet can't have duplicate sub-suites: {suite_set_str}"
+
 
 def _validate_metadata(metadata: suite_set_pb2.Metadata) -> Optional[str]:
+    """Validates the Metadata is well-formed."""
     if not metadata.owners:
         return "metadata missing owners"
     if not (metadata.bug_component and metadata.bug_component.value):
@@ -251,6 +288,11 @@
         return "metadata missing criteria"
 
 
+def _has_dupes(l: List) -> bool:
+    """Returns true if the list has duplicate elements, false otherwise."""
+    return len(l) != len(set(l))
+
+
 def _union_suites(suites: List[Suite]) -> List[Suite]:
     """Dedupes Suites with the same id by unioning their test lists."""
     suite_id_to_suite = {}
@@ -288,6 +330,76 @@
     ]
 
 
+def validate_centralized_suites(
+    csuites: List[CentralizedSuite], ensure_descendants_exist: bool = True
+) -> None:
+    """Validates csuite mappings are wellformed.
+
+    Ensures all csuite ids are unique, all sub-csuites exist (if
+    ensure_descendants_exist is True), and no cycles
+    exist with the csuites.
+
+    Args:
+        csuites: The list of centralized suites to validate.
+        ensure_descendants_exist: If an error should be thrown if a csuite
+            descendant does not exist in the mappings.
+    """
+    mappings = {}
+    for csuite in csuites:
+        csuite_id = _get_csuite_id(csuite)
+        if csuite_id in mappings:
+            raise RuntimeError(f"csuite id not is unique: {csuite_id}")
+        mappings[csuite_id] = csuite
+    for csuite_id in mappings:
+        _assert_acyclic_csuite(csuite_id, mappings, ensure_descendants_exist)
+
+
+def _get_csuite_id(csuite: CentralizedSuite) -> str:
+    """Returns the id of the given csuite."""
+    if isinstance(csuite, Suite):
+        return csuite.suite_id
+    elif isinstance(csuite, SuiteSet):
+        return csuite.suite_set_id
+    else:
+        raise ValueError(f"unsupported datatype: {type(csuite)}")
+
+
+def _get_sub_csuite_ids(csuite: CentralizedSuite) -> List[str]:
+    """Returns the csuties contained within the given csuite."""
+    if isinstance(csuite, Suite):
+        return []
+    elif isinstance(csuite, SuiteSet):
+        return list(csuite.suite_sets) + list(csuite.suites)
+    else:
+        raise ValueError(f"unsupported datatype: {type(csuite)}")
+
+
+def _assert_acyclic_csuite(
+    csuite_id: str,
+    mappings: Dict[str, CentralizedSuite],
+    ensure_descendants_exist: bool,
+) -> None:
+    """Validates csuite is acyclic and all sub-csuites."""
+
+    def _assert_acyclic_csuite_inner(csuite_id: str, visted: Set[str]) -> None:
+        if csuite_id in visted:
+            raise RuntimeError(f"cycle detected with csuite: {csuite_id}")
+        if csuite_id not in mappings:
+            if ensure_descendants_exist:
+                raise RuntimeError(f"csuite does not exist: {csuite_id}")
+            else:
+                return
+        # The visited list is copied so that recursive calls don't mutate each
+        # other's visited list.
+        new_visted = visted.copy()
+        new_visted.add(csuite_id)
+        csuite = mappings[csuite_id]
+        for sub_csuite_id in _get_sub_csuite_ids(csuite):
+            _assert_acyclic_csuite_inner(sub_csuite_id, new_visted)
+
+    _assert_acyclic_csuite_inner(csuite_id, set())
+
+
 def _write_suites_to_file(suites: List[Suite], dest: str) -> None:
     """Writes the Suites to the given file as a SuiteList text proto."""
     suite_list_proto = _convert_to_suite_list_proto(suites)