client/site_tests/platform_PrinterPpds/log_reader.py - mirrors/cros/chromiumos/third_party/autotest - Git at Google

 # Copyright 2018 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import re
 import subprocess
 import threading
 import time

 # A path of the log file
 _PATH_LOG_FILE = '/var/log/messages'
 # Time margin as a number of seconds. It is used during parsing of the log file.
 # Sometimes, logs are not in strict datetime order. This value defines maximum
 # possible datetime error in logs sequence.
 _TIME_MARGIN = 2
 # Max time in seconds to wait for CUPS logs to appear in the log file.
 _TIME_MAX_WAIT = 100
 # A list of filters that are installed by loading components.
 # Currently, they are Star and Epson drivers.
 _COMPONENT_FILTERS = ['rastertostar', 'rastertostarlm', 'epson-escpr-wrapper']


 class LogReader():
     """
     An instance of this class is responsible for gathering data about CUPS jobs
     and extracting their results and pipelines by parsing the main log file. It
     can check for internal errors during a job execution and build a bash script
     simulating an executed pipeline. CUPS server must have logging level set to
     'debug' to make a pipeline extraction possible.

     """

     def _read_last_lines(self, lines_count):
         """
         The function reads given number of lines from the end of the log file.
         The last line is omitted if it is not ended with EOL character. If a
         whole file was read, count of returned lines may be smaller than
         requested number. Always at least one line is returned; if the file has
         no EOL characters an exception is raised.

         @param lines_count: a number of lines to read

         @returns a list of lines

         """
         assert lines_count > 0
         argv = ['tail', '-n', '%d' % (lines_count+1), _PATH_LOG_FILE]
         p1 = subprocess.Popen(argv, stdout=subprocess.PIPE)
         out,err = p1.communicate()
         lines = out.split('\n')
         lines.pop()
         if len(lines) > lines_count:
             if len(lines) == 0:
                 raise Exception('The file %s is empty' % _PATH_LOG_FILE)
             lines = lines[1:]
         return lines


     def _get_datetime_from_line(self, line):
         """
         Parse datetime from the given line read from the log file.

         @param line_count: single line from the log files

         @returns a datetime as a number of seconds from the epoch (float)

         """
         return time.mktime(time.strptime(line[:20], "%Y-%m-%dT%H:%M:%S."))


     def __init__(self):
         """
         Constructor.

         """
         # the last line read from the log file
         self._last_line_read = self._read_last_lines(1)[0]
         # logs obtained from the log file after preprocessing
         # key = a tuple (CUPS pid, job id)
         # value = list with logs
         self._logs = dict()
         # data extracted from logs
         # key = a tuple (CUPS pid, job id)
         # value = a tuple (result(bool),envp(dict),argv(list),filters(list))
         self._results = dict()
         # mapping from printer's name to a list of CUPS jobs
         # key = printer name
         # value = a list of tuples (CUPS pid, job id)
         self._printer_name_to_key = dict()
         # a lock for synchronization
         self._critical_section = threading.Lock()


     def _parse_logs(self):
         """
         This method read and parses new lines from the log file (all complete
         lines after the line saved in self._last_line_read). The lines are
         filtered and saved in self._logs. Dictionaries self._results and
         self._printer_name_to_key are updated accordingly.

         """
         # ==============================================
         # try to read lines from the end of the log file until the line from
         # self._last_line_read is found
         dt_last_line_read = self._get_datetime_from_line(self._last_line_read)
         lines_count = 1024
         pos_last_line_read = None
         while True:
             lines = self._read_last_lines(lines_count)
             # search for self._last_line_read in lines read from the file
             for index,line in enumerate(lines):
                 dt_line = self._get_datetime_from_line(line)
                 if dt_line > dt_last_line_read + _TIME_MARGIN:
                     # a datetime of the line is far behind, exit the loop
                     break
                 if line == self._last_line_read:
                     # the line was found, save its index and exit the loop
                     pos_last_line_read = index
                     break
             # if the self._last_line_read was found, exit the 'while' loop
             if pos_last_line_read is not None:
                 break
             # check if we can try to read more lines from the file to repeat
             # the search, if not then throw an exception
             if ( len(lines) < lines_count or
                     self._get_datetime_from_line(lines[0]) +
                     _TIME_MARGIN < dt_last_line_read ):
                 raise Exception('Cannot find the last read line in the log'
                         ' file (maybe it has been erased?)')
             # increase the number of lines to read
             lines_count = lines_count * 2
         # check if there are any new lines, exit if not
         if pos_last_line_read + 1 == len(lines):
             return
         # update the field with the last line read from the log file
         self._last_line_read = lines[-1]
         # ==============================================
         # parse logs, select these ones from CUPS jobs and save them to
         # self._logs
         updated_jobs = set()
         for line in lines[pos_last_line_read+1:]:
             tokens = line.split(' ', 5)
             if ( len(tokens) < 6 or tokens[2][:6] != 'cupsd[' or
                     tokens[3] != '[Job' ):
                 continue
             pid = int(tokens[2][6:-2])
             job_id = int(tokens[4][:-1])
             message = tokens[5]
             key = (pid,job_id)
             if key not in self._logs:
                 self._logs[key] = []
             self._logs[key].append(message)
             updated_jobs.add(key)
         # ==============================================
         # try to interpret logs
         for key in updated_jobs:
             number_of_processes_to_check = None
             printer_name = None
             result = True
             envp = dict()
             argv = []
             filters = []
             for msg in self._logs[key]:
                 if number_of_processes_to_check is None:
                     regexp = re.match(r'(\d) filters for job:', msg)
                     if regexp is not None:
                         number_of_processes_to_check = int(regexp.group(1)) + 1
                         assert number_of_processes_to_check > 0
                 elif printer_name is None:
                     regexp = re.match(r'(\S+) \(\S+ to (\S+), cost \d+\)', msg)
                     if regexp is not None:
                         fltr = regexp.group(1)
                         if fltr == '-':
                             number_of_processes_to_check -= 1
                         else:
                             filters.append(fltr)
                         trg = regexp.group(2)
                         if trg.startswith('printer/') and '/' not in trg[8:]:
                             printer_name = trg[8:]
                 else:
                     regexp = re.match(r'envp\[\d+\]="(\S+)=(\S+)"', msg)
                     if regexp is not None:
                         envp[regexp.group(1)] = regexp.group(2)
                         continue
                     regexp = re.match(r'argv\[(\d+)\]="(.+)"', msg)
                     if regexp is not None:
                         if len(argv) != int(regexp.group(1)):
                             raise Exception('Error when parsing argv for %s'
                                     % printer_name)
                         argv.append(regexp.group(2))
                         continue
                     if number_of_processes_to_check > 0:
                         regexp = re.match(r'PID \d+ \(\S+\) ', msg)
                         if regexp is not None:
                             number_of_processes_to_check -= 1
                             if ( not msg.endswith(' exited with no errors.') and
                                     'was terminated normally with signal'
                                     not in msg ):
                                 result = False
             if number_of_processes_to_check == 0:
                 # if it is a new job, update self._printer_name_to_key
                 if key not in self._results:
                     if printer_name not in self._printer_name_to_key:
                         self._printer_name_to_key[printer_name] = [key]
                     else:
                         self._printer_name_to_key[printer_name].append(key)
                 # update results
                 self._results[key] = (result,envp,argv,filters)


     def _build_pipeline(
             self, printer_name, envp, argv, filters,
             path_ppd, path_doc, path_workdir):
         """
         This function tries to build a bash script containing a pipeline used
         internally by CUPS. All printer's parameters are passed by the field
         self._printers_data. Resultant script is saved in self._pipelines.

         @param printer_name: a printer's name
         @param envp: a dictionary with environment variables for the pipeline
         @param argv: a list with command line parameters for filters
         @param filters: a list of filters
         @param path_ppd: a path to PPD file (string)
         @param path_doc: a path to input document (string)
         @param path_workdir: a path to a directory, that should be used as home
                 directory and temporary location by filters

         @returns a pipeline as a bash script (string)

         """
         # update paths set in environment variables
         envp['PPD'] = path_ppd
         envp['HOME'] = path_workdir
         envp['TMPDIR'] = path_workdir
         # drop the last argument, it is an input file
         argv.pop()
         # replace filter names by full path for Star and Epson drivers
         if filters[-1] in _COMPONENT_FILTERS:
             find_cmd = ['find', '/run/imageloader/', '-type', 'f', '-name']
             find_cmd.append(filters[-1])
             filters[-1] = subprocess.check_output(find_cmd).rstrip()
         # build and return the script
         script = '#!/bin/bash\nset -e\nset -o pipefail\n'
         for name, value in envp.iteritems():
             script += ('export %s=%s\n' % (name, value))
         for ind, filt in enumerate(filters):
             if ind > 0:
                 script += ('gzip -dc %d.doc.gz | ' % ind)
             script += ('(exec -a "%s" %s' % (argv[0], filt))
             for arg in argv[1:]:
                 script += (' "%s"' % arg)
             if ind == 0:
                 script += (' "%s"' % path_doc)
             script += (') 2>%d.err | gzip -9 >%d.doc.gz\n' % (ind+1, ind+1))
         return script


     def extract_result(
             self, printer_name,
             path_ppd=None, path_doc=None, path_workdir=None):
         """
         This function extract from the log file CUPS logs related to printer
         with given name. Extracted logs are also interpreted, the function
         checks for any failures. If all optional parameters are set, a pipeline
         used by CUPS is extracted from the logs. If some of required results
         cannot be extracted, an exception is raised. This function is
         thread-safe, it can be called simultaneously by many Python threads.
         However, all simultaneous calls must have different values of the
         printer_name parameter (calls with the same printer_name must be done
         sequentially).

         @param printer_name: a printer's name
         @param path_ppd: a path to PPD file (string), required for pipelinie
                 extraction only
         @param path_doc: a path to input document (string), required for
                 pipelinie extraction only
         @param path_workdir: a path to a directory, that should be used as home
                 directory and temporary location by filters, required for
                 pipelinie extraction only

         @returns a tuple of three variables:
                 1. True if no errors were detected, False otherwise
                 2. logs extracted from the log file (string)
                 3. pipeline as a bash script (string), None if not requested

         @raises Exception in case of any errors

         """
         # time when we started efforts to get logs
         time_start = time.time()
         # all three parameters must be set to extract a pipeline
         pipeline_required = (path_ppd is not None and path_doc is not None
                 and path_workdir is not None)
         try:
             # try to get and parse logs
             while True:
                 with self._critical_section:
                     # leave the loop if we have everything
                     if printer_name in self._printer_name_to_key:
                         if not pipeline_required:
                             break
                         key = self._printer_name_to_key[printer_name][0]
                         envp = self._results[key][1]
                         argv = self._results[key][2]
                         if len(envp) > 0 and len(argv) > 0:
                             break
                     # save current time and try to parse the log file
                     time_last_read = time.time()
                     self._parse_logs()
                     # leave the loop if we have everything
                     if printer_name in self._printer_name_to_key:
                         if not pipeline_required:
                             break
                         key = self._printer_name_to_key[printer_name][0]
                         envp = self._results[key][1]
                         argv = self._results[key][2]
                         if len(envp) > 0 and len(argv) > 0:
                             break
                 # raise an exception if we have been waiting too long
                 if time_last_read - time_start > _TIME_MAX_WAIT:
                     raise Exception('Cannot parse logs for %s or they did not'
                             ' appear in %s within %d seconds from printing the'
                             ' document' % (printer_name, _PATH_LOG_FILE,
                             _TIME_MAX_WAIT))
                 # wait 1 second and try again to parse acquired logs
                 time.sleep(1)
             # retrieve required data
             with self._critical_section:
                 if pipeline_required:
                     # key, envp, argv are already set in the loop above
                     filters = self._results[key][3]
                 else:
                     key = self._printer_name_to_key[printer_name][0]
                 no_errors = self._results[key][0]
                 logs = '\n'.join(self._logs[key])
             # build a pipeline script if required
             if pipeline_required:
                 pipeline = self._build_pipeline(printer_name, envp, argv,
                         filters, path_ppd, path_doc, path_workdir)
             else:
                 pipeline = None
             # return results
             return no_errors, logs, pipeline
         finally:
             # cleanup
             if printer_name in self._printer_name_to_key:
                 with self._critical_section:
                     key = self._printer_name_to_key[printer_name].pop(0)
                     if len(self._printer_name_to_key[printer_name]) == 0:
                         self._printer_name_to_key.pop(printer_name)
                     self._logs.pop(key)
                     self._results.pop(key)
	# Copyright 2018 The Chromium OS Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import re
	import subprocess
	import threading
	import time

	# A path of the log file
	_PATH_LOG_FILE = '/var/log/messages'
	# Time margin as a number of seconds. It is used during parsing of the log file.
	# Sometimes, logs are not in strict datetime order. This value defines maximum
	# possible datetime error in logs sequence.
	_TIME_MARGIN = 2
	# Max time in seconds to wait for CUPS logs to appear in the log file.
	_TIME_MAX_WAIT = 100
	# A list of filters that are installed by loading components.
	# Currently, they are Star and Epson drivers.
	_COMPONENT_FILTERS = ['rastertostar', 'rastertostarlm', 'epson-escpr-wrapper']


	class LogReader():
	"""
	An instance of this class is responsible for gathering data about CUPS jobs
	and extracting their results and pipelines by parsing the main log file. It
	can check for internal errors during a job execution and build a bash script
	simulating an executed pipeline. CUPS server must have logging level set to
	'debug' to make a pipeline extraction possible.

	"""

	def _read_last_lines(self, lines_count):
	"""
	The function reads given number of lines from the end of the log file.
	The last line is omitted if it is not ended with EOL character. If a
	whole file was read, count of returned lines may be smaller than
	requested number. Always at least one line is returned; if the file has
	no EOL characters an exception is raised.

	@param lines_count: a number of lines to read

	@returns a list of lines

	"""
	assert lines_count > 0
	argv = ['tail', '-n', '%d' % (lines_count+1), _PATH_LOG_FILE]
	p1 = subprocess.Popen(argv, stdout=subprocess.PIPE)
	out,err = p1.communicate()
	lines = out.split('\n')
	lines.pop()
	if len(lines) > lines_count:
	if len(lines) == 0:
	raise Exception('The file %s is empty' % _PATH_LOG_FILE)
	lines = lines[1:]
	return lines


	def _get_datetime_from_line(self, line):
	"""
	Parse datetime from the given line read from the log file.

	@param line_count: single line from the log files

	@returns a datetime as a number of seconds from the epoch (float)

	"""
	return time.mktime(time.strptime(line[:20], "%Y-%m-%dT%H:%M:%S."))


	def __init__(self):
	"""
	Constructor.

	"""
	# the last line read from the log file
	self._last_line_read = self._read_last_lines(1)[0]
	# logs obtained from the log file after preprocessing
	# key = a tuple (CUPS pid, job id)
	# value = list with logs
	self._logs = dict()
	# data extracted from logs
	# key = a tuple (CUPS pid, job id)
	# value = a tuple (result(bool),envp(dict),argv(list),filters(list))
	self._results = dict()
	# mapping from printer's name to a list of CUPS jobs
	# key = printer name
	# value = a list of tuples (CUPS pid, job id)
	self._printer_name_to_key = dict()
	# a lock for synchronization
	self._critical_section = threading.Lock()


	def _parse_logs(self):
	"""
	This method read and parses new lines from the log file (all complete
	lines after the line saved in self._last_line_read). The lines are
	filtered and saved in self._logs. Dictionaries self._results and
	self._printer_name_to_key are updated accordingly.

	"""
	# ==============================================
	# try to read lines from the end of the log file until the line from
	# self._last_line_read is found
	dt_last_line_read = self._get_datetime_from_line(self._last_line_read)
	lines_count = 1024
	pos_last_line_read = None
	while True:
	lines = self._read_last_lines(lines_count)
	# search for self._last_line_read in lines read from the file
	for index,line in enumerate(lines):
	dt_line = self._get_datetime_from_line(line)
	if dt_line > dt_last_line_read + _TIME_MARGIN:
	# a datetime of the line is far behind, exit the loop
	break
	if line == self._last_line_read:
	# the line was found, save its index and exit the loop
	pos_last_line_read = index
	break
	# if the self._last_line_read was found, exit the 'while' loop
	if pos_last_line_read is not None:
	break
	# check if we can try to read more lines from the file to repeat
	# the search, if not then throw an exception
	if ( len(lines) < lines_count or
	self._get_datetime_from_line(lines[0]) +
	_TIME_MARGIN < dt_last_line_read ):
	raise Exception('Cannot find the last read line in the log'
	' file (maybe it has been erased?)')
	# increase the number of lines to read
	lines_count = lines_count * 2
	# check if there are any new lines, exit if not
	if pos_last_line_read + 1 == len(lines):
	return
	# update the field with the last line read from the log file
	self._last_line_read = lines[-1]
	# ==============================================
	# parse logs, select these ones from CUPS jobs and save them to
	# self._logs
	updated_jobs = set()
	for line in lines[pos_last_line_read+1:]:
	tokens = line.split(' ', 5)
	if ( len(tokens) < 6 or tokens[2][:6] != 'cupsd[' or
	tokens[3] != '[Job' ):
	continue
	pid = int(tokens[2][6:-2])
	job_id = int(tokens[4][:-1])
	message = tokens[5]
	key = (pid,job_id)
	if key not in self._logs:
	self._logs[key] = []
	self._logs[key].append(message)
	updated_jobs.add(key)
	# ==============================================
	# try to interpret logs
	for key in updated_jobs:
	number_of_processes_to_check = None
	printer_name = None
	result = True
	envp = dict()
	argv = []
	filters = []
	for msg in self._logs[key]:
	if number_of_processes_to_check is None:
	regexp = re.match(r'(\d) filters for job:', msg)
	if regexp is not None:
	number_of_processes_to_check = int(regexp.group(1)) + 1
	assert number_of_processes_to_check > 0
	elif printer_name is None:
	regexp = re.match(r'(\S+) \(\S+ to (\S+), cost \d+\)', msg)
	if regexp is not None:
	fltr = regexp.group(1)
	if fltr == '-':
	number_of_processes_to_check -= 1
	else:
	filters.append(fltr)
	trg = regexp.group(2)
	if trg.startswith('printer/') and '/' not in trg[8:]:
	printer_name = trg[8:]
	else:
	regexp = re.match(r'envp\[\d+\]="(\S+)=(\S+)"', msg)
	if regexp is not None:
	envp[regexp.group(1)] = regexp.group(2)
	continue
	regexp = re.match(r'argv\[(\d+)\]="(.+)"', msg)
	if regexp is not None:
	if len(argv) != int(regexp.group(1)):
	raise Exception('Error when parsing argv for %s'
	% printer_name)
	argv.append(regexp.group(2))
	continue
	if number_of_processes_to_check > 0:
	regexp = re.match(r'PID \d+ \(\S+\) ', msg)
	if regexp is not None:
	number_of_processes_to_check -= 1
	if ( not msg.endswith(' exited with no errors.') and
	'was terminated normally with signal'
	not in msg ):
	result = False
	if number_of_processes_to_check == 0:
	# if it is a new job, update self._printer_name_to_key
	if key not in self._results:
	if printer_name not in self._printer_name_to_key:
	self._printer_name_to_key[printer_name] = [key]
	else:
	self._printer_name_to_key[printer_name].append(key)
	# update results
	self._results[key] = (result,envp,argv,filters)


	def _build_pipeline(
	self, printer_name, envp, argv, filters,
	path_ppd, path_doc, path_workdir):
	"""
	This function tries to build a bash script containing a pipeline used
	internally by CUPS. All printer's parameters are passed by the field
	self._printers_data. Resultant script is saved in self._pipelines.

	@param printer_name: a printer's name
	@param envp: a dictionary with environment variables for the pipeline
	@param argv: a list with command line parameters for filters
	@param filters: a list of filters
	@param path_ppd: a path to PPD file (string)
	@param path_doc: a path to input document (string)
	@param path_workdir: a path to a directory, that should be used as home
	directory and temporary location by filters

	@returns a pipeline as a bash script (string)

	"""
	# update paths set in environment variables
	envp['PPD'] = path_ppd
	envp['HOME'] = path_workdir
	envp['TMPDIR'] = path_workdir
	# drop the last argument, it is an input file
	argv.pop()
	# replace filter names by full path for Star and Epson drivers
	if filters[-1] in _COMPONENT_FILTERS:
	find_cmd = ['find', '/run/imageloader/', '-type', 'f', '-name']
	find_cmd.append(filters[-1])
	filters[-1] = subprocess.check_output(find_cmd).rstrip()
	# build and return the script
	script = '#!/bin/bash\nset -e\nset -o pipefail\n'
	for name, value in envp.iteritems():
	script += ('export %s=%s\n' % (name, value))
	for ind, filt in enumerate(filters):
	if ind > 0:
	script += ('gzip -dc %d.doc.gz \| ' % ind)
	script += ('(exec -a "%s" %s' % (argv[0], filt))
	for arg in argv[1:]:
	script += (' "%s"' % arg)
	if ind == 0:
	script += (' "%s"' % path_doc)
	script += (') 2>%d.err \| gzip -9 >%d.doc.gz\n' % (ind+1, ind+1))
	return script


	def extract_result(
	self, printer_name,
	path_ppd=None, path_doc=None, path_workdir=None):
	"""
	This function extract from the log file CUPS logs related to printer
	with given name. Extracted logs are also interpreted, the function
	checks for any failures. If all optional parameters are set, a pipeline
	used by CUPS is extracted from the logs. If some of required results
	cannot be extracted, an exception is raised. This function is
	thread-safe, it can be called simultaneously by many Python threads.
	However, all simultaneous calls must have different values of the
	printer_name parameter (calls with the same printer_name must be done
	sequentially).

	@param printer_name: a printer's name
	@param path_ppd: a path to PPD file (string), required for pipelinie
	extraction only
	@param path_doc: a path to input document (string), required for
	pipelinie extraction only
	@param path_workdir: a path to a directory, that should be used as home
	directory and temporary location by filters, required for
	pipelinie extraction only

	@returns a tuple of three variables:
	1. True if no errors were detected, False otherwise
	2. logs extracted from the log file (string)
	3. pipeline as a bash script (string), None if not requested

	@raises Exception in case of any errors

	"""
	# time when we started efforts to get logs
	time_start = time.time()
	# all three parameters must be set to extract a pipeline
	pipeline_required = (path_ppd is not None and path_doc is not None
	and path_workdir is not None)
	try:
	# try to get and parse logs
	while True:
	with self._critical_section:
	# leave the loop if we have everything
	if printer_name in self._printer_name_to_key:
	if not pipeline_required:
	break
	key = self._printer_name_to_key[printer_name][0]
	envp = self._results[key][1]
	argv = self._results[key][2]
	if len(envp) > 0 and len(argv) > 0:
	break
	# save current time and try to parse the log file
	time_last_read = time.time()
	self._parse_logs()
	# leave the loop if we have everything
	if printer_name in self._printer_name_to_key:
	if not pipeline_required:
	break
	key = self._printer_name_to_key[printer_name][0]
	envp = self._results[key][1]
	argv = self._results[key][2]
	if len(envp) > 0 and len(argv) > 0:
	break
	# raise an exception if we have been waiting too long
	if time_last_read - time_start > _TIME_MAX_WAIT:
	raise Exception('Cannot parse logs for %s or they did not'
	' appear in %s within %d seconds from printing the'
	' document' % (printer_name, _PATH_LOG_FILE,
	_TIME_MAX_WAIT))
	# wait 1 second and try again to parse acquired logs
	time.sleep(1)
	# retrieve required data
	with self._critical_section:
	if pipeline_required:
	# key, envp, argv are already set in the loop above
	filters = self._results[key][3]
	else:
	key = self._printer_name_to_key[printer_name][0]
	no_errors = self._results[key][0]
	logs = '\n'.join(self._logs[key])
	# build a pipeline script if required
	if pipeline_required:
	pipeline = self._build_pipeline(printer_name, envp, argv,
	filters, path_ppd, path_doc, path_workdir)
	else:
	pipeline = None
	# return results
	return no_errors, logs, pipeline
	finally:
	# cleanup
	if printer_name in self._printer_name_to_key:
	with self._critical_section:
	key = self._printer_name_to_key[printer_name].pop(0)
	if len(self._printer_name_to_key[printer_name]) == 0:
	self._printer_name_to_key.pop(printer_name)
	self._logs.pop(key)
	self._results.pop(key)