[devserver] Added an apache_log_metrics.py script

This script uploads metrics to Monarch based on apache's request log.

BUG=chromium:621745
TEST=None

Change-Id: I47bf61913da25d44e3fe23fc9502e9c9caebf8a4
Reviewed-on: https://chromium-review.googlesource.com/356299
Commit-Ready: Paul Hobbs <phobbs@google.com>
Tested-by: Paul Hobbs <phobbs@google.com>
Reviewed-by: Don Garrett <dgarrett@chromium.org>
diff --git a/apache_log_metrics.py b/apache_log_metrics.py
new file mode 100755
index 0000000..bfcc6ae
--- /dev/null
+++ b/apache_log_metrics.py
@@ -0,0 +1,117 @@
+#!/usr/bin/python2
+
+# Copyright 2016 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Script to upload metrics from apache logs to Monarch.
+
+We are interested in static file bandwidth, so it parses out GET requests to
+/static and uploads the sizes to a cumulative metric.
+"""
+from __future__ import print_function
+
+import argparse
+import logging
+import re
+import sys
+
+from devserver import MakeLogHandler
+
+from chromite.lib import ts_mon_config
+from chromite.lib import metrics
+from infra_libs import ts_mon
+
+
+STATIC_GET_MATCHER = re.compile(
+    r'^(?P<ip_addr>\d+\.\d+\.\d+\.\d+) '
+    r'.*GET /static/\S*[^"]*" '
+    r'200 (?P<size>\S+) .*')
+
+STATIC_GET_METRIC_NAME = 'chromeos/devserver/apache/static_response_size'
+
+
+LAB_SUBNETS = (
+    ("172.17.40.0", 22),
+    ("100.107.160.0", 19),
+    ("100.115.128.0", 17),
+    ("100.115.254.126", 25),
+    ("100.107.141.128", 25),
+    ("172.27.212.0", 22),
+    ("100.107.156.192", 26),
+    ("172.22.29.0", 25),
+    ("172.22.38.0", 23),
+    ("100.107.224.0", 23),
+    ("100.107.226.0", 25),
+    ("100.107.126.0", 25),
+)
+
+def IPToNum(ip):
+  return reduce(lambda seed, x: seed * 2**8 + int(x), ip.split('.'), 0)
+
+
+def MatchesSubnet(ip, base, mask):
+  ip_value = IPToNum(ip)
+  base_value = IPToNum(base)
+  mask = (2**mask - 1) << (32 - mask)
+  return (ip_value & mask) == (base_value & mask)
+
+
+def InLab(ip):
+  return any(MatchesSubnet(ip, base, mask)
+             for (base, mask) in LAB_SUBNETS)
+
+
+def EmitStaticRequestMetric(m):
+  """Emits a Counter metric for sucessful GETs to /static endpoints."""
+  ipaddr, size = m.groups()
+  try:
+    size = int(size)
+  except ValueError:  # Zero is represented by "-"
+    size = 0
+
+  metrics.Counter(STATIC_GET_METRIC_NAME).increment_by(
+      size, fields={
+          'builder': '',
+          'in_lab': InLab(ipaddr),
+          'endpoint': ''})
+
+
+def RunMatchers(stream, matchers):
+  """Parses lines of |stream| using patterns and emitters from |matchers|"""
+  for line in stream:
+    for matcher, emitter in matchers:
+      m = matcher.match(line)
+      if m:
+        emitter(m)
+  # The input might terminate if the log gets rotated. Make sure that Monarch
+  # flushes any pending metrics before quitting.
+  ts_mon.close()
+
+
+# TODO(phobbs) add a matcher for all requests, not just static files.
+MATCHERS = [
+    (STATIC_GET_MATCHER, EmitStaticRequestMetric),
+]
+
+
+def ParseArgs():
+  """Parses command line arguments."""
+  p = argparse.ArgumentParser(
+      description='Parses apache logs and emits metrics to Monarch')
+  p.add_argument('--logfile')
+  return p.parse_args()
+
+
+def main():
+  """Sets up logging and runs matchers against stdin"""
+  args = ParseArgs()
+  root = logging.getLogger()
+  root.addHandler(MakeLogHandler(args.logfile))
+  root.setLevel(logging.DEBUG)
+  ts_mon_config.SetupTsMonGlobalState('devserver_apache_log_metrics')
+  RunMatchers(sys.stdin, MATCHERS)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/apache_log_metrics_unittest.py b/apache_log_metrics_unittest.py
new file mode 100755
index 0000000..8e3f3df
--- /dev/null
+++ b/apache_log_metrics_unittest.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python2
+
+# Copyright 2016 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Unit tests for apache_log_metrics.py"""
+
+from __future__ import print_function
+
+import unittest
+
+import apache_log_metrics
+
+
+STATIC_REQUEST_LINE = (
+    '172.24.26.30 - - [30/Jun/2016:15:34:40 -0700] '
+    '"GET /static/veyron_minnie-release/R52-8350.46.0/'
+    'autotest_server_package.tar.bz2'
+    ' HTTP/1.1" 200 13805917 "-" "Wget/1.15    (linux-gnu)'
+)
+
+
+class TestParsers(unittest.TestCase):
+  """Tests the parsing functions in apache_log_metrics."""
+
+  def testParseStaticResponse(self):
+    match = apache_log_metrics.STATIC_GET_MATCHER.match(
+        STATIC_REQUEST_LINE)
+    self.assertTrue(match)
+
+    ip = match.group('ip_addr')
+    self.assertEqual(ip, '172.24.26.30')
+    self.assertFalse(apache_log_metrics.InLab(ip))
+
+    self.assertEqual(match.group('size'), '13805917')
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/devserver.py b/devserver.py
index a3511a5..936966e 100755
--- a/devserver.py
+++ b/devserver.py
@@ -1441,7 +1441,7 @@
   parser.add_option_group(group)
 
 
-def _MakeLogHandler(logfile):
+def MakeLogHandler(logfile):
   """Create a LogHandler instance used to log all messages."""
   hdlr_cls = handlers.TimedRotatingFileHandler
   hdlr = hdlr_cls(logfile, when=_LOG_ROTATION_TIME,
@@ -1492,7 +1492,7 @@
   else:
     cherrypy.config.update({'log.error_file': '',
                             'log.access_file': ''})
-    hdlr = _MakeLogHandler(options.logfile)
+    hdlr = MakeLogHandler(options.logfile)
     # Pylint can't seem to process these two calls properly
     # pylint: disable=E1101
     cherrypy.log.access_log.addHandler(hdlr)