apache_log_metrics: Fix endpoint explosion
By not matching a pattern against the file name uploaded by /static/
endpoints, we risk emitting too many streams to Monarch. Fix this by
providing a canonical name for a fixed set of filename patterns and
emitting filename='' otherwise.
Also, require milestone to match 'R\d+' to avoid having non-milestone
strings being matched.
BUG=chromium:630799, chromium:630797
TEST=unittests pass.
Change-Id: Ia47c29cf1bb92a0f3d9282b0eb9ff9f8fa28f04e
Reviewed-on: https://chromium-review.googlesource.com/362698
Reviewed-by: Aviv Keshet <akeshet@chromium.org>
Tested-by: Paul Hobbs <phobbs@google.com>
diff --git a/apache_log_metrics.py b/apache_log_metrics.py
index e94edd6..0bd1a4d 100755
--- a/apache_log_metrics.py
+++ b/apache_log_metrics.py
@@ -81,6 +81,33 @@
for (base, mask) in LAB_SUBNETS)
+MILESTONE_PATTERN = re.compile(r'R\d+')
+
+FILENAME_CONSTANTS = [
+ 'stateful.tgz',
+ 'client-autotest.tar.bz2',
+ 'chromiumos_test_image.bin',
+ 'autotest_server_package.tar.bz2',
+]
+
+FILENAME_PATTERNS = [(re.compile(s), s) for s in FILENAME_CONSTANTS] + [
+ (re.compile(r'dep-.*\.bz2'), 'dep-*.bz2'),
+ (re.compile(r'chromeos_.*_delta_test\.bin-.*'),
+ 'chromeos_*_delta_test.bin-*'),
+ (re.compile(r'chromeos_.*_full_test\.bin-.*'),
+ 'chromeos_*_full_test.bin-*'),
+ (re.compile(r'test-.*\.bz2'), 'test-*.bz2'),
+ (re.compile(r'dep-.*\.bz2'), 'dep-*.bz2'),
+]
+
+
+def MatchAny(needle, patterns, default=''):
+ for pattern, value in patterns:
+ if pattern.match(needle):
+ return value
+ return default
+
+
def ParseStaticEndpoint(endpoint):
"""Parses a /static/.* URL path into build_config, milestone, and filename.
@@ -99,8 +126,11 @@
if len(parts) >= 2:
version = parts[1]
milestone = version[:version.index('-')]
+ if not MILESTONE_PATTERN.match(milestone):
+ milestone = ''
if len(parts) >= 3:
- filename = parts[-1]
+ filename = MatchAny(parts[-1], FILENAME_PATTERNS)
+
except IndexError as e:
logging.debug('%s failed to parse. Caught %s' % (endpoint, str(e)))