| # -*- coding: utf-8 -*- |
| # Copyright 2020 The ChromiumOS Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Utilities for parsing structured.xml. |
| |
| Functions in this module raise an error if constraints on the format of the |
| structured.xml file are not met. |
| |
| Functions use the concept of 'compound' and 'text' XML nodes. |
| - compound nodes can have attributes and child nodes, but no text |
| - text nodes can have text, but no attributes or child nodes |
| """ |
| |
| import collections |
| import re |
| |
| |
| def error(elem, msg): |
| """Raise a nicely formatted error with some context.""" |
| name = elem.attrib.get("name", None) |
| name = name + " " if name else "" |
| msg = f"Structured metrics error, {elem.tag} node {name}: {msg}." |
| raise ValueError(msg) |
| |
| |
| def get_attr(elem, tag, regex=None): |
| """Get an attribute. |
| |
| Error if it is missing, optionally error if it doesn't match the provided |
| regex. |
| """ |
| attr = elem.attrib.get(tag, None) |
| if not attr: |
| error(elem, f"missing attribute '{tag}'") |
| if regex and not re.match(regex, attr): |
| error( |
| elem, |
| f"has '{tag}' attribute '{attr}' which does " |
| f"not match regex '{regex}'", |
| ) |
| return attr |
| |
| |
| def get_compound_children(elem, tag): |
| """Get all child nodes of `elem` with tag `tag`. |
| |
| Error if none exist, or a child is not a compound node. |
| """ |
| children = elem.findall(tag) |
| if not children: |
| error(elem, f"missing node '{tag}'") |
| for child in children: |
| if child.text.strip(): |
| error(child, "contains text, but shouldn't") |
| return children |
| |
| |
| def get_compound_child(elem, tag): |
| """Get the child of `elem` with tag `tag`. |
| |
| Error if there isn't exactly one matching child, or it isn't compound. |
| """ |
| children = elem.findall(tag) |
| if len(children) != 1: |
| error(elem, f"needs exactly one '{tag}' node") |
| return children[0] |
| |
| |
| def get_text_children(elem, tag, regex=None): |
| """Get the text of all child nodes of `elem` with tag `tag`. |
| |
| Error if none exist, or a child is not a text node. Optionally ensure the |
| text matches `regex`. |
| """ |
| children = elem.findall(tag) |
| if not children: |
| error(elem, f"missing node '{tag}'") |
| |
| result = [] |
| for child in children: |
| check_attributes(child, set()) |
| check_children(child, set()) |
| text = child.text.strip() |
| if not text: |
| error(elem, f"missing text in '{tag}'") |
| if regex and not re.match(regex, text): |
| error( |
| elem, |
| f"has '{tag}' node '{text}' which does " |
| f"not match regex '{regex}'", |
| ) |
| result.append(text) |
| return result |
| |
| |
| def get_text_child(elem, tag, regex=None): |
| """Get the text of the child of `elem` with tag `tag`. |
| |
| Error if there isn't exactly one matching child, or it isn't a text node. |
| Optionally ensure the text matches `regex`. |
| """ |
| result = get_text_children(elem, tag, regex) |
| if len(result) != 1: |
| error(elem, f"needs exactly one '{tag}' node") |
| return result[0] |
| |
| |
| def check_attributes(elem, expected_attrs): |
| """Ensure `elem` has no attributes except those in `expected_attrs`.""" |
| actual_attrs = elem.attrib.keys() |
| unexpected_attrs = actual_attrs - set(expected_attrs) |
| if unexpected_attrs: |
| attrs = " ".join(unexpected_attrs) |
| error(elem, f"has unexpected attributes: {attrs}") |
| |
| |
| def check_children(elem, expected_children): |
| """Ensure `elem` has no children without tags in `expected_children`.""" |
| actual_children = {child.tag for child in elem} |
| unexpected_children = actual_children - set(expected_children) |
| if unexpected_children: |
| children = " ".join(unexpected_children) |
| error(elem, f"has unexpected nodes: {children}") |
| |
| |
| def check_child_names_unique(elem, tag): |
| """Ensure uniqueness of the 'name' of all children of `elem` with `tag`.""" |
| names = [ |
| child.attrib.get("name", None) for child in elem if child.tag == tag |
| ] |
| name_counts = collections.Counter(names) |
| has_duplicates = any(c > 1 for c in name_counts.values()) |
| if has_duplicates: |
| error(elem, f"has {tag} nodes with duplicate names") |