blob: 769fa662699513018311dabf885db088f2e2bf02 [file] [log] [blame]
# -*- coding: utf-8 -*-
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""This script generate monarch metrics from Gs Cache server logs.
It parses the Nginx access log file of Gs Cache, e.g.
/var/log/nginx/gs-cache-server.access.log, and generate monarch metrics for Gs
Cache performance.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import re
import sys
from logging import handlers
from chromite.lib import cros_logging as logging
from chromite.lib import metrics
from chromite.lib import ts_mon_config
_LOG_ROTATION_TIME = 'H'
_LOG_ROTATION_INTERVAL = 24 # hours
_LOG_ROTATION_BACKUP = 14 # Keep for 14 days.
_METRIC_NAME = 'chromeos/gs_cache/nginx/response_metrics'
# An example Nginx access log line is:
# <ip> 2018-07-26T18:13:49-07:00 "GET URL HTTP/1.1" 200 <size> "<agent>" HIT
_SUCCESS_RESPONSE_MATCHER = re.compile(
r'^(?P<ip_addr>\d+\.\d+\.\d+\.\d+) '
r'(?P<timestamp>\d+\-\d+\-\d+T\d+:\d+:\d+[+\-]\d+:\d+) '
r'"(?P<http_method>\S+) (?P<url_path>\S*)[^"]*" '
r'(?P<status_code>\d+) (?P<size>\S+) "[^"]+" (?P<cache_status>\S+)')
# Define common regexes.
_COMMON_PARTIAL_REGEX = (r'(?P<build>[^/]+)/(?P<milestone>\S\d+)'
r'\-(?P<version>[^/]+)')
_COMMON_FULL_REGEX = (r'^/(?P<action>[^/]+)/(?P<bucket>[^/]+)/%s' %
_COMMON_PARTIAL_REGEX)
# Regex for all accepted actions.
_URL_PATH_MATCHER = {
'decompress': re.compile(r'%s/(?P<filename>[^\?]+)' % _COMMON_FULL_REGEX),
'download': re.compile(r'%s/(?P<filename>[^\?]+)' % _COMMON_FULL_REGEX),
'extract': re.compile(r'%s/(?P<package>[^\?]+)\?file=(?P<filename>[^\?]+)'
% _COMMON_FULL_REGEX),
'list_dir': re.compile(r'%s' % _COMMON_FULL_REGEX),
# TODO(crbug.com/1122319): Remove all fake RPCs once all devserver clients
# have been migrated to TLW/TLS caching API.
'fake_check_health': re.compile(r'/(?P<action>[^/]+)'),
'fake_list_image_dir': re.compile(r'/(?P<action>[^/]+)'),
'fake_is_staged': re.compile(r'/(?P<action>[^/]+)'),
'fake_stage': re.compile(r'/(?P<action>[^/]+)'),
'setup_telemetry': re.compile(r'/(?P<action>[^/]+)\?archive_url=gs://'
r'(?P<bucket>[^/]+)/%s' %
_COMMON_PARTIAL_REGEX),
'static': re.compile(r'/(?P<action>[^/]+)/(?P<filename>[^\?]+)'),
'update': re.compile(r'^/(?P<action>[^/]+)/%s.*' % _COMMON_PARTIAL_REGEX),
}
def emit_successful_response_metric(m):
"""Emit a Counter metric for a successful response.
We parse the response line and uploads the size of response to a cumulative
metric. Especially, the metric has a designate field of 'cache' which
indicates the upstream cache status, e.g. HIT/MISS etc. For all list of cache
status, see
http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_cache_status
Args:
m: A regex match object or None.
"""
if not m:
return
logging.debug('Emitting successful response metric.')
metric_fields = {
'cache': m.group('cache_status'),
'action': '',
'bucket': '',
'build': '',
'milestone': '',
'endpoint': '',
'status_code': m.group('status_code'),
}
metric_fields.update(match_url_path(m.group('url_path')))
metrics.Counter(_METRIC_NAME).increment_by(int(m.group('size')),
fields=metric_fields)
def match_url_path(url):
"""Extract information from the url by matching it with the appropriate regex.
Args:
url: The url to be matched.
Returns:
A dictionary containing all matched fields and their respective values.
"""
# The url is either in the format /<action_name>?<param1>=<value1>.. or
# /<action_name>/<arg1>/<arg2>/..?<param1>=<value1>..
# To single out the action name from these urls, first strip the leading '/'
# and then handle the cases where the action name can be followed by a '/'
# or a '?'.
action_name = url.strip('/').split('/')[0].split('?')[0]
try:
info = _URL_PATH_MATCHER[action_name].match(url)
matched_group_dict = info.groupdict()
return {
'action': matched_group_dict.get('action', ''),
'bucket': matched_group_dict.get('bucket', ''),
'build': matched_group_dict.get('build', ''),
'milestone': matched_group_dict.get('milestone', ''),
'endpoint': matched_group_dict.get('filename', ''),
}
except KeyError:
logging.warning('The URL: %s did not match with any of the regexes. May '
'be it is new?', url)
except Exception as e:
logging.error('Could not extract fields names for %s due to exception: %s',
url, e)
return {}
def input_log_file_type(filename):
"""A argparse type function converting input filename to file object.
It converts '-' to sys.stdin.
"""
if filename == '-':
return sys.stdin
return argparse.FileType('r')(filename)
def parse_args(argv):
"""Parses command line arguments."""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'-i', '--input-log-file', metavar='NGINX_ACCESS_LOG_FILE',
dest='input_fd', required=True,
type=input_log_file_type,
help=('Nginx log file of Gs Cache '
'(use "-" to indicate reading from sys.stdin).')
)
parser.add_argument(
'-l', '--log-file', default=sys.stdout,
help='Log file of this script (default is sys.stdout).'
)
return parser.parse_args(argv)
def main(argv):
"""Main function."""
args = parse_args(argv)
logger = logging.getLogger()
if args.log_file is sys.stdout:
logger.addHandler(logging.StreamHandler(stream=sys.stdout))
else:
logger.addHandler(handlers.TimedRotatingFileHandler(
args.log_file, when=_LOG_ROTATION_TIME,
interval=_LOG_ROTATION_INTERVAL,
backupCount=_LOG_ROTATION_BACKUP))
logger.setLevel(logging.DEBUG)
with ts_mon_config.SetupTsMonGlobalState('gs_cache_nginx_log_metrics',
indirect=True):
for line in iter(args.input_fd.readline, b''):
logging.debug('Parsing line: %s', line.strip())
emit_successful_response_metric(_SUCCESS_RESPONSE_MATCHER.match(line))
if __name__ == "__main__": # pylint: disable=invalid-string-quote
sys.exit(main(sys.argv[1:]))