blob: 1faf349f75002bb3a27180dc29b4ab7f05cec2c7 [file] [log] [blame]
# -*- coding: utf-8 -*-
# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Script to merge logs together."""
from __future__ import print_function
import collections
import datetime
import os
import re
import sys
import dateutil # pylint: disable=import-error
import dateutil.parser # pylint: disable=import-error
from chromite.lib import commandline
from chromite.lib import cros_logging as logging
from chromite.lib import gs
assert sys.version_info >= (3, 6), 'This module requires Python 3.6+'
# Default timezone to assume for logs if one is not specified.
DEFAULT_TIMEZONE = 'America/Los_Angeles'
Log = collections.namedtuple('Log', ('filename', 'date', 'log'))
def GetParser():
"""Creates the argparse parser."""
parser = commandline.ArgumentParser(description=__doc__)
parser.add_argument('files', type=str, nargs='*', action='store',
help='Log filenames.')
parser.add_argument('--filelist', '-f', type=str, action='store',
help='File that contains a list of files to view.')
parser.add_argument('--html', action='store_true',
help='Generate HTML.')
parser.add_argument('--raw', action='store_true',
help='Generate raw output.')
parser.add_argument('--nosort', action='store_false', dest='sort',
help='Do not sort the results.')
parser.add_argument('--base', type=str, action='store',
help='Base path to pre-pend to all files.')
parser.add_argument('--notrim_base', action='store_false', dest='trim_base',
help='Do not trim the file path prefixes.')
parser.add_argument('--trim_path', action='store_true',
help='Trim the file path.')
return parser
def Now():
"""Returns the current datetime.
Added as separate function to allow it to be mockable during tests.
a datetime.
# The following are basic timestamp parsing functions. All are expected
# to return a timezone, defaulting to DEFAULT_TIMEZONE. Some of the functions
# are expected to fill in a year (assumes current year) if a year is not
# parseable.
def ParseDate(timestamp):
"""Basic timestamp to datetime parser.
timestamp: a string.
a datetime.
naive_dt = dateutil.parser.parse(timestamp)
dt = naive_dt.replace(tzinfo=naive_dt.tzinfo or
return dt
def ParseAutoservDate(timestamp):
"""Autoserv log format timestamp to datetime parser.
timestamp: a string.
a datetime.
year = str(Now().year)
naive_dt = dateutil.parser.parse(year + '/' + timestamp)
dt = naive_dt.replace(tzinfo=naive_dt.tzinfo or
return dt
def ParseChromeDate(timestamp):
"""Chrome log format timestamp to datetime parser.
timestamp: a string.
a datetime.
year = str(Now().year)
naive_dt = datetime.datetime.strptime(timestamp + ' ' + year,
'%m%d/%H%M%S.%f %Y')
dt = naive_dt.replace(tzinfo=naive_dt.tzinfo or
return dt
def ParsePowerdDate(timestamp):
"""Powerd log format timestamp to datetime parser.
timestamp: a string.
a datetime.
year = str(Now().year)
naive_dt = datetime.datetime.strptime(timestamp + ' ' + year,
'%m%d/%H%M%S %Y')
dt = naive_dt.replace(tzinfo=naive_dt.tzinfo or
return dt
# The following is all the file/date contents handled. There is an regexp
# pattern which if it matches, specifies a function to call to parse the date
# and a lambda to extract a timestamp via the regexp.
Pattern = collections.namedtuple('Patern', ('regexp', 'func', 'key'))
# pylint: disable=line-too-long
# [0731/] System uptime: 5s
# 2017-07-31 07:05:10.257860139-07:00: Starting arc-removable-media
ARC_DATE_RE = re.compile(
r'^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d.\d\d\d\d\d\d)\d\d\d([+-]\d\d:\d\d):')
ARCPatterns = [
Pattern(ARC_DATE_RE, ParseDate, lambda m: +
# 07/31 09:17:27.919 DEBUG| utils:0212| Running 'test -d /tmp/sysinfo/autoserv-LOMvzK'
AUTOSERV_DATE_RE = re.compile(r'^(\d\d/\d\d \d\d:\d\d:\d\d.\d\d\d) ')
AutoservPatterns = [
Pattern(AUTOSERV_DATE_RE, ParseAutoservDate, lambda m:
# [8525:8525:0731/] OnPortalDetectionCompleted Online
CHROME_DATE_RE = re.compile(r'^\[\d+:\d+:(\d{4}/\d{6}.\d{6}):')
ChromePatterns = [
Pattern(CHROME_DATE_RE, ParseChromeDate, lambda m:
# 2017/07/31 09:18:08.871 DEBUG| remote_access:0659| The temporary working directory on the device is /mnt/stateful_partition/unencrypted/preserve/cros-update/tmp.S7y5vF3xQE
CROS_DATE_RE = re.compile(r'^(\d\d\d\d/\d\d/\d\d \d\d:\d\d:\d\d.\d\d\d) ')
CrOSPatterns = [Pattern(CROS_DATE_RE, ParseDate, lambda m:]
# [0731/] System uptime: 5s
POWERD_DATE_RE = re.compile(r'^\[(\d{4}/\d{6}):')
PowerdPatterns = [
Pattern(POWERD_DATE_RE, ParsePowerdDate, lambda m:
# 2017-07-31 07:00:46,650 - DEBUG - Running hook: /usr/local/bin/hooks/check_ethernet.hook
RECOVER_DUTS_DATE_RE = re.compile(
r'^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),(\d\d\d) ')
RecoverDutsPatterns = [
ParseDate, lambda m: + '.' +
# START provision_AutoUpdate provision_AutoUpdate timestamp=1501517822 localtime=Jul 31 09:17:02
STATUS_LOG_DATE_RE = re.compile(r'.*localtime=(... \d\d \d\d:\d\d:\d\d)')
StatusLogPatterns = [
Pattern(STATUS_LOG_DATE_RE, ParseDate, lambda m:
# 2017-07-31T09:17:25.907285Z NOTICE ag[9829]: autotest server[stack::get_tmp_dir|run|wrapper] -> ssh_run(mktemp -d /tmp/autoserv-XXXXXX)
SYSINFO_DATE_NEW_RE = re.compile(
r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\d\d\d\dZ) ')
# 2017-07-31T09:17:25.907285-07:00 NOTICE ag[9829]: autotest server[stack::get_tmp_dir|run|wrapper] -> ssh_run(mktemp -d /tmp/autoserv-XXXXXX)
SYSINFO_DATE_OLD_RE = re.compile(
r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\d\d\d\d[+-]\d\d:\d\d) ')
# Syslog format now uses UTC instead of local time zone.
SysinfoPatterns = [
Pattern(SYSINFO_DATE_NEW_RE, ParseDate, lambda m:,
Pattern(SYSINFO_DATE_OLD_RE, ParseDate, lambda m:,
AllPatterns = (ARCPatterns + AutoservPatterns + ChromePatterns +
CrOSPatterns + PowerdPatterns + RecoverDutsPatterns +
StatusLogPatterns + SysinfoPatterns)
class LogParser(object):
"""Line-based log parsing class.
Allows sub-typing via the patterns field to handle specific file types
without looping through all patterns for each file.
def __init__(self, filename, patterns=None):
"""LogParser constructor.
filename: a string of the file to be parsed.
patterns: a list of Pattern namedtuples
self.filename = filename
self.patterns = patterns or AllPatterns
def ParseDate(self, line):
"""Parse the date out of a single log line.
line: a string of the log line.
a datetime if one can be parsed, None otherwise.
for pattern, func, key in self.patterns:
m = pattern.match(line)
if m:
dt = func(key(m))
return dt
return None
def ParseLine(self, line, previous_dt=None):
"""Parse a single log line.
line: a string of the log line.
previous_dt: a datetime of the previous log to use if one cannot be found.
a Log namedtuple.
dt = self.ParseDate(line)
if dt is None:
dt = previous_dt
return Log(filename=self.filename, date=dt, log=line)
# Patterns to match filename to date parsing patterns.
(re.compile(p), dp) for p, dp in [
(r'(.*/|)arc.*.log', ARCPatterns),
(r'(.*/|)debug/client.*', AutoservPatterns),
(r'(.*/|)debug/autoserv.*', AutoservPatterns),
(r'(.*/|)[^/]+.(DEBUG|INFO|WARNING|ERROR)', AutoservPatterns),
(r'(.*/|)CrOS_update_[^/]*log', CrOSPatterns),
(r'(.*/|)powerd', PowerdPatterns),
(r'(.*/|)recover_duts.log', RecoverDutsPatterns),
(r'(.*/|)status(.log)?', StatusLogPatterns),
(r'(.*/|)(messages|secure|tlsdate.log)', SysinfoPatterns),
# chrome will match directory components, so leave it last.
(r'(.*/|)(chrome|ui)(\.[^/]*)?$', ChromePatterns),
def FindParser(filename):
"""Select a parser based on filename.
Generates a sub-types parser suitable for handling the dates of a given file.
filename: a string of the log file to be parsed.
LogParser object suitable for parsing that filetype.
for file_pattern, date_patterns in FILE_PATTERNS:
if file_pattern.match(filename):
return LogParser(filename, date_patterns)
logging.warning('Could not find parser for file: %s', filename)
return LogParser(filename)
def ParseFileContents(filename, content):
"""Parse the log contents belonging to a single file.
filename: a string of the file's filename to parse.
content: a list of string of the log lines of the file.
a list of Log namedtuples.
parser = FindParser(filename)
logs = []
first_dt = None
lines = content.splitlines()
previous_dt = None
for line in lines:
log = parser.ParseLine(line.rstrip(), previous_dt)
previous_dt =
# Keep track of the first non-None datetime that is parsed.
if first_dt is None and previous_dt is not None:
first_dt = previous_dt
# Ensure that all log entries have a time, using now if one cannot be
# determined.
if first_dt is None:
first_dt = Now()
for log in logs:
if is None: = first_dt
return logs
GS_RE = re.compile(r'gs://')
def ParseURL(url):
"""Parse the files specified by a URL or filename.
If url is a gs:// URL, globbing is supported.
url: a string of a GS URL or a flat filename.
a list of Log namedtuples.
logs = []
if GS_RE.match(url):
ctx = gs.GSContext()
files = ctx.LS(url)
except gs.GSNoSuchKey:
files = []
for filename in files:
content = ctx.Cat(filename)
logs.extend(ParseFileContents(filename, content))
except gs.GSNoSuchKey:
logging.warning("Couldn't find file %s for url %s.", filename, url)
with open(url) as f:
content =
logs.extend(ParseFileContents(url, content))
return logs
def PrintLog(log):
"""Prints a log to stdout.
log: a Log namedtuple.
print('%s: %s' % (log.filename, log.log))
def TrimLogPrefix(log, base):
"""Removes the prefix |base| from |log|'s filename.
log: a Log namedtuple
base: a string prefix to trim the filenames by.
fname = log.filename
if fname.startswith(base):
fname = fname[len(base):]
return Log(fname,, log.log)
def TrimLogPath(log):
"""Removes the prefix path from |log|'s filename.
log: a Log namedtuple
return Log(os.path.basename(log.filename),, log.log)
def PrintHtmlHeader():
"""Prints an HTML header for log output."""
print(' <head>')
print(' <style>')
print(' .line {font-family: monospace;}')
print(' .filename {color: red;}')
print(' .date {color: blue; display: none;}')
print(' .log {white-space: pre;}')
print(' </style>')
print(' </head>')
print(' <body>')
def PrintHtmlFooter():
"""Prints an HTML footer for log output."""
print(' </body>')
def PrintHtml(log):
"""Prints a log as HTML.
log: a Log namedtuple.
def Tag(tag, cls, value):
return '<%s class="%s">%s</%s>' % (tag, cls, value, tag)
classes = ['filename', 'date', 'log']
line = ' '.join([Tag('span', cls, value) for cls, value in zip(classes, log)])
print(Tag('div', 'line', line))
def main(argv):
parser = GetParser()
options = parser.parse_args(argv)
# Determine list of files to show.
files = options.files
if options.filelist:
with open(options.filelist) as f:
found = [l.strip() for l in f.readlines()]
files.extend(x for x in found if x)
if options.base:
files = [os.path.join(options.base, f) for f in files]
# Parse all the files.
logs = []
for filename in files:
if options.sort:
logs.sort(key=lambda log:
if options.trim_base and options.base:
logs = [TrimLogPrefix(log, options.base) for log in logs]
if options.trim_path:
logs = [TrimLogPath(log) for log in logs]
# TODO(davidriley): This should dump JSON as well.
if options.html:
printer = PrintHtml
elif options.raw:
printer = print
printer = PrintLog
for l in logs:
if options.html: