apache_log_metrics: Fix endpoint explosion

By not matching a pattern against the file name uploaded by /static/
endpoints, we risk emitting too many streams to Monarch. Fix this by
providing a canonical name for a fixed set of filename patterns and
emitting filename='' otherwise.

Also, require milestone to match 'R\d+' to avoid having non-milestone
strings being matched.

BUG=chromium:630799, chromium:630797
TEST=unittests pass.

Change-Id: Ia47c29cf1bb92a0f3d9282b0eb9ff9f8fa28f04e
Reviewed-on: https://chromium-review.googlesource.com/362698
Reviewed-by: Aviv Keshet <akeshet@chromium.org>
Tested-by: Paul Hobbs <phobbs@google.com>
diff --git a/apache_log_metrics.py b/apache_log_metrics.py
index e94edd6..0bd1a4d 100755
--- a/apache_log_metrics.py
+++ b/apache_log_metrics.py
@@ -81,6 +81,33 @@
              for (base, mask) in LAB_SUBNETS)
 
 
+MILESTONE_PATTERN = re.compile(r'R\d+')
+
+FILENAME_CONSTANTS = [
+    'stateful.tgz',
+    'client-autotest.tar.bz2',
+    'chromiumos_test_image.bin',
+    'autotest_server_package.tar.bz2',
+]
+
+FILENAME_PATTERNS = [(re.compile(s), s) for s in FILENAME_CONSTANTS] + [
+    (re.compile(r'dep-.*\.bz2'), 'dep-*.bz2'),
+    (re.compile(r'chromeos_.*_delta_test\.bin-.*'),
+     'chromeos_*_delta_test.bin-*'),
+    (re.compile(r'chromeos_.*_full_test\.bin-.*'),
+     'chromeos_*_full_test.bin-*'),
+    (re.compile(r'test-.*\.bz2'), 'test-*.bz2'),
+    (re.compile(r'dep-.*\.bz2'), 'dep-*.bz2'),
+]
+
+
+def MatchAny(needle, patterns, default=''):
+  for pattern, value in patterns:
+    if pattern.match(needle):
+      return value
+  return default
+
+
 def ParseStaticEndpoint(endpoint):
   """Parses a /static/.* URL path into build_config, milestone, and filename.
 
@@ -99,8 +126,11 @@
     if len(parts) >= 2:
       version = parts[1]
       milestone = version[:version.index('-')]
+      if not MILESTONE_PATTERN.match(milestone):
+        milestone = ''
     if len(parts) >= 3:
-      filename = parts[-1]
+      filename = MatchAny(parts[-1], FILENAME_PATTERNS)
+
   except IndexError as e:
     logging.debug('%s failed to parse. Caught %s' % (endpoint, str(e)))