qmi2cpp/ebnf_cc/parser_generator.py - mirrors/cros/chromiumos/platform2 - Git at Google

 # Copyright 2018 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Module providing functionality for parser generation."""

 import inspect
 import logging
 import os.path
 import re
 import shutil

 from ebnf_cc import ebnf_parser
 from ebnf_cc import intermediate_language
 from ebnf_cc import symbol
 from ebnf_cc import token_stream


 class ParserGenerator(object):
     """Class responsible for the generation of parser given an EBNF file."""
     TAB = '    '

     _OPTIONAL_OFF = 0
     _OPTIONAL_IF = 1
     _OPTIONAL_ELIF = 2


     def __init__(self, ebnf_filename):
         """Creates a ParserGenerator object.

         Args:
           ebnf_filename: str name of the EBNF file containing the
               language specification.
         """
         self._ebnf_filename = ebnf_filename
         self._reset()

     def _reset(self):
         self._token_stream = None
         self._depth = 0
         self._output_str = ""

     def produce_parser(self, output_filename):
         """Top-level parser generation method.

         Parses the provided EBNF file and generates a recursive-descent parser
         matching the specified language.

         Args:
           output_filename: str name of the file to output the parser to.
         """
         self._reset()
         with open(self._ebnf_filename) as f:
             # Split text into tokens, where ' and " should be their own tokens.
             token_list = re.sub(r'([\"\'])', ' \\1 ', f.read()).split()
             self._token_stream = token_stream.TokenStream(token_list)

         # Parse EBNF file into Grammar object (which is the top-level
         # non-terminal in the EBNF specification of EBNF).
         ebnf_parser.Grammar.set_token_stream(self._token_stream)
         grammar = ebnf_parser.Grammar()
         logging.debug(grammar)

         # Add standard generated warning at the top
         self._output_str = (
             '# NOTE: THIS FILE IS AUTOMATICALLY GENERATED BY ebnf_cc')
         self._add_line('# ONLY CHANGE THIS FILE IF YOU DO NOT MIND '
                        'LOSING ALL YOUR CHANGES')

         self._add_description()

         # Import files
         self._add_line('import _symbol')
         self._add_line('import _token_stream')

         # Create parser of language by traversing the Grammar object
         self._add_grammar(grammar)

         # Import custom definitions
         self._add_line('# Import any user-provided parser classes')
         self._add_line('# Necessary if the EBNF specification is incomplete')
         self._add_line('try:')
         self._push_depth()
         self._add_line('from _custom_definitions import *')
         self._pop_depth()
         self._add_line('except ImportError:')
         self._push_depth()
         self._add_line('pass')
         self._pop_depth()
         self._add_line('')

         with open(output_filename, 'w') as f:
             f.write(self._output_str)

         # Copy token_stream and symbol modules over
         for module in (token_stream, symbol):
             src_name = inspect.getfile(module)
             if src_name[-1] == 'c':
                 src_name = src_name[:-1]
             shutil.copyfile(src_name, '_%s' % os.path.basename(src_name))

     def _add_description(self):
         """Add description to top of the file."""
         self._add_line('')
         self._add_line('# Description')
         self._add_line('# ###########')
         # TODO(akhouderchah) add full description + usage
         self._add_line('')

     def _add_grammar(self, grammar):
         """Add parser for the specified EBNF grammar."""
         for rule in grammar._children:
             self._add_rule(rule)

     def _add_rule(self, rule):
         """Add parser class for a single EBNF rule."""
         rule_name = rule.get_next()
         self._add_line('class %s(_symbol.BaseSymbol):' % rule_name)
         self._push_depth()
         self._add_line('def __init__(self):')
         self._push_depth()
         self._add_line('super(%s, self).__init__()' % rule_name)

         rhs = rule.get_next(ebnf_parser.Rhs)
         if rhs is None:
             raise Exception('Expected RHS in rule:\n%s' % rule)
         intermediate_repr = intermediate_language.ILGenerator().generate(rhs)
         self._add_intermediate(intermediate_repr)

         self._pop_depth()
         self._pop_depth()
         self._add_line('')
         self._add_line('')

     def _add_intermediate(self, intermediate, optional_mode=None):
         """Generates parser class body of a rule given its IL representation.

         Params:
           intermediate: intermediate_language._ILObject representing the
               rule to generate a parser class for.
           optional_mode: an _OPTIONAL_* value representing if the generated
               code currently resides in an if or elif statement.
         """
         if isinstance(intermediate, ebnf_parser.Terminal):
             self._add_terminal(intermediate, optional_mode)
         elif isinstance(intermediate, ebnf_parser.Identifier):
             self._add_identifier(intermediate, optional_mode)
         elif isinstance(intermediate, intermediate_language.Choice):
             self._add_choice(intermediate, optional_mode)
         elif isinstance(intermediate, intermediate_language.Sequence):
             self._add_sequence(intermediate, optional_mode)
         elif isinstance(intermediate, intermediate_language.Repetition):
             self._add_repetition(intermediate, optional_mode)
         elif isinstance(intermediate, intermediate_language.Optional):
             self._add_optional(intermediate, optional_mode)
         else:
             raise ValueError('Unexpected object type in IL representation')

     def _add_terminal(self, terminal, optional_mode):
         """Generate code to parse the given ebnf_parser.Terminal object.

         Params:
           terminal: ebnf_parser.Terminal representing the EBNF terminal.
           optional_mode: an _OPTIONAL_* value representing if the generated
               code currently resides in an if or elif statement.
         """
         terminal.get_next()  # Skip over ' token
         term = terminal.get_next()
         if not optional_mode:
             self._add_line("self.add_token('%s')" % term)
         elif optional_mode == self._OPTIONAL_IF:
             self._add_line("if self.add_token_optional('%s'):" % term)
             self._push_depth()
         elif optional_mode == self._OPTIONAL_ELIF:
             self._add_line("elif self.add_token_optional('%s'):" % term)
             self._push_depth()

     def _add_identifier(self, identifier, optional_mode):
         """Generate code to parse the given ebnf_parser.Identifier object.

         Params:
           identifier: ebnf_parser.Identnifier representing the EBNF identifier.
           optional_mode: an _OPTIONAL_* value representing if the generated
               code currently resides in an if or elif statement.
         """
         term = identifier.get_next()
         if not optional_mode:
             self._add_line('self.add(%s)' % term)
         elif optional_mode == self._OPTIONAL_IF:
             self._add_line('if self.add_optional(%s):' % term)
             self._push_depth()
         elif optional_mode == self._OPTIONAL_ELIF:
             self._add_line('elif self.add_optional(%s):' % term)
             self._push_depth()

     def _add_choice(self, il_choice, optional_mode):
         """Generate code to parse the given Choice object.

         Params:
           il_choice: intermediate_language.Choice representing the
               EBNF rule choice.
           optional_mode: an _OPTIONAL_* value representing if the generated
               code currently resides in an if or elif statement.
         """
         self._add_intermediate(il_choice._objs[0], self._OPTIONAL_IF)
         self._add_line('pass')
         self._pop_depth()
         for i in range(1, len(il_choice._objs)):
             self._add_intermediate(il_choice._objs[i],
                                    self._OPTIONAL_ELIF)
             self._add_line('pass')
             self._pop_depth()

         if optional_mode:
             self._push_depth()
         else:
             self._add_line('else:')
             self._push_depth()
             # TODO(akhouderchah) add error message
             self._add_line('raise _symbol.SymbolException(\'\')')
             self._pop_depth()

     def _add_sequence(self, il_sequence, optional_mode):
         """Generate code to parse the given Sequence object.

         Params:
           il_sequence: intermediate_language.Sequence representing the
               EBNF rule sequence.
           optional_mode: an _OPTIONAL_* value representing if the generated
               code currently resides in an if or elif statement.
         """
         self._add_intermediate(il_sequence._objs[0], optional_mode)
         for i in range(1, len(il_sequence._objs)):
             obj = il_sequence._objs[i]
             self._add_intermediate(obj)

     def _add_repetition(self, il_repetition, optional_mode):
         """Generate code to parse the given Repetition object.

         Params:
           il_optional: intermediate_language.Repetition representing the
               repeating EBNF rule sequence.
           optional_mode: an _OPTIONAL_* value representing if the generated
               code currently resides in an if or elif statement.
         """
         self._add_line('while True:')
         self._push_depth()
         self._add_intermediate(il_repetition._objs[0], self._OPTIONAL_IF)
         self._add_line('pass')
         self._pop_depth()
         self._add_line('else:')
         self._push_depth()
         self._add_line('break')
         self._pop_depth()
         self._pop_depth()

     def _add_optional(self, il_optional, optional_mode):
         """Generate code to parse the given Optional object.

         Params:
           il_optional: intermediate_language.Optional representing the
               optional EBNF rule sequence.
           optional_mode: an _OPTIONAL_* value representing if the generated
               code currently resides in an if or elif statement.
         """
         self._add_intermediate(il_optional._objs[0], self._OPTIONAL_IF)
         # Special casing optional terminals
         if not isinstance(il_optional._objs[0],
                           intermediate_language._ILObject):
             self._add_line('pass')
         self._pop_depth()

     def _add_line(self, line):
         """Adds line to file."""
         self._output_str += '\n%s%s' % (self.TAB * self._depth, line)

     def _push_depth(self):
         """Decrements indentation level for subsequent _add_line calls."""
         self._depth += 1

     def _pop_depth(self):
         """Increments indentation level for subsequent _add_line calls."""
         self._depth -= 1
	# Copyright 2018 The Chromium OS Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Module providing functionality for parser generation."""

	import inspect
	import logging
	import os.path
	import re
	import shutil

	from ebnf_cc import ebnf_parser
	from ebnf_cc import intermediate_language
	from ebnf_cc import symbol
	from ebnf_cc import token_stream


	class ParserGenerator(object):
	"""Class responsible for the generation of parser given an EBNF file."""
	TAB = ' '

	_OPTIONAL_OFF = 0
	_OPTIONAL_IF = 1
	_OPTIONAL_ELIF = 2


	def __init__(self, ebnf_filename):
	"""Creates a ParserGenerator object.

	Args:
	ebnf_filename: str name of the EBNF file containing the
	language specification.
	"""
	self._ebnf_filename = ebnf_filename
	self._reset()

	def _reset(self):
	self._token_stream = None
	self._depth = 0
	self._output_str = ""

	def produce_parser(self, output_filename):
	"""Top-level parser generation method.

	Parses the provided EBNF file and generates a recursive-descent parser
	matching the specified language.

	Args:
	output_filename: str name of the file to output the parser to.
	"""
	self._reset()
	with open(self._ebnf_filename) as f:
	# Split text into tokens, where ' and " should be their own tokens.
	token_list = re.sub(r'([\"\'])', ' \\1 ', f.read()).split()
	self._token_stream = token_stream.TokenStream(token_list)

	# Parse EBNF file into Grammar object (which is the top-level
	# non-terminal in the EBNF specification of EBNF).
	ebnf_parser.Grammar.set_token_stream(self._token_stream)
	grammar = ebnf_parser.Grammar()
	logging.debug(grammar)

	# Add standard generated warning at the top
	self._output_str = (
	'# NOTE: THIS FILE IS AUTOMATICALLY GENERATED BY ebnf_cc')
	self._add_line('# ONLY CHANGE THIS FILE IF YOU DO NOT MIND '
	'LOSING ALL YOUR CHANGES')

	self._add_description()

	# Import files
	self._add_line('import _symbol')
	self._add_line('import _token_stream')

	# Create parser of language by traversing the Grammar object
	self._add_grammar(grammar)

	# Import custom definitions
	self._add_line('# Import any user-provided parser classes')
	self._add_line('# Necessary if the EBNF specification is incomplete')
	self._add_line('try:')
	self._push_depth()
	self._add_line('from _custom_definitions import *')
	self._pop_depth()
	self._add_line('except ImportError:')
	self._push_depth()
	self._add_line('pass')
	self._pop_depth()
	self._add_line('')

	with open(output_filename, 'w') as f:
	f.write(self._output_str)

	# Copy token_stream and symbol modules over
	for module in (token_stream, symbol):
	src_name = inspect.getfile(module)
	if src_name[-1] == 'c':
	src_name = src_name[:-1]
	shutil.copyfile(src_name, '_%s' % os.path.basename(src_name))

	def _add_description(self):
	"""Add description to top of the file."""
	self._add_line('')
	self._add_line('# Description')
	self._add_line('# ###########')
	# TODO(akhouderchah) add full description + usage
	self._add_line('')

	def _add_grammar(self, grammar):
	"""Add parser for the specified EBNF grammar."""
	for rule in grammar._children:
	self._add_rule(rule)

	def _add_rule(self, rule):
	"""Add parser class for a single EBNF rule."""
	rule_name = rule.get_next()
	self._add_line('class %s(_symbol.BaseSymbol):' % rule_name)
	self._push_depth()
	self._add_line('def __init__(self):')
	self._push_depth()
	self._add_line('super(%s, self).__init__()' % rule_name)

	rhs = rule.get_next(ebnf_parser.Rhs)
	if rhs is None:
	raise Exception('Expected RHS in rule:\n%s' % rule)
	intermediate_repr = intermediate_language.ILGenerator().generate(rhs)
	self._add_intermediate(intermediate_repr)

	self._pop_depth()
	self._pop_depth()
	self._add_line('')
	self._add_line('')

	def _add_intermediate(self, intermediate, optional_mode=None):
	"""Generates parser class body of a rule given its IL representation.

	Params:
	intermediate: intermediate_language._ILObject representing the
	rule to generate a parser class for.
	optional_mode: an _OPTIONAL_* value representing if the generated
	code currently resides in an if or elif statement.
	"""
	if isinstance(intermediate, ebnf_parser.Terminal):
	self._add_terminal(intermediate, optional_mode)
	elif isinstance(intermediate, ebnf_parser.Identifier):
	self._add_identifier(intermediate, optional_mode)
	elif isinstance(intermediate, intermediate_language.Choice):
	self._add_choice(intermediate, optional_mode)
	elif isinstance(intermediate, intermediate_language.Sequence):
	self._add_sequence(intermediate, optional_mode)
	elif isinstance(intermediate, intermediate_language.Repetition):
	self._add_repetition(intermediate, optional_mode)
	elif isinstance(intermediate, intermediate_language.Optional):
	self._add_optional(intermediate, optional_mode)
	else:
	raise ValueError('Unexpected object type in IL representation')

	def _add_terminal(self, terminal, optional_mode):
	"""Generate code to parse the given ebnf_parser.Terminal object.

	Params:
	terminal: ebnf_parser.Terminal representing the EBNF terminal.
	optional_mode: an _OPTIONAL_* value representing if the generated
	code currently resides in an if or elif statement.
	"""
	terminal.get_next() # Skip over ' token
	term = terminal.get_next()
	if not optional_mode:
	self._add_line("self.add_token('%s')" % term)
	elif optional_mode == self._OPTIONAL_IF:
	self._add_line("if self.add_token_optional('%s'):" % term)
	self._push_depth()
	elif optional_mode == self._OPTIONAL_ELIF:
	self._add_line("elif self.add_token_optional('%s'):" % term)
	self._push_depth()

	def _add_identifier(self, identifier, optional_mode):
	"""Generate code to parse the given ebnf_parser.Identifier object.

	Params:
	identifier: ebnf_parser.Identnifier representing the EBNF identifier.
	optional_mode: an _OPTIONAL_* value representing if the generated
	code currently resides in an if or elif statement.
	"""
	term = identifier.get_next()
	if not optional_mode:
	self._add_line('self.add(%s)' % term)
	elif optional_mode == self._OPTIONAL_IF:
	self._add_line('if self.add_optional(%s):' % term)
	self._push_depth()
	elif optional_mode == self._OPTIONAL_ELIF:
	self._add_line('elif self.add_optional(%s):' % term)
	self._push_depth()

	def _add_choice(self, il_choice, optional_mode):
	"""Generate code to parse the given Choice object.

	Params:
	il_choice: intermediate_language.Choice representing the
	EBNF rule choice.
	optional_mode: an _OPTIONAL_* value representing if the generated
	code currently resides in an if or elif statement.
	"""
	self._add_intermediate(il_choice._objs[0], self._OPTIONAL_IF)
	self._add_line('pass')
	self._pop_depth()
	for i in range(1, len(il_choice._objs)):
	self._add_intermediate(il_choice._objs[i],
	self._OPTIONAL_ELIF)
	self._add_line('pass')
	self._pop_depth()

	if optional_mode:
	self._push_depth()
	else:
	self._add_line('else:')
	self._push_depth()
	# TODO(akhouderchah) add error message
	self._add_line('raise _symbol.SymbolException(\'\')')
	self._pop_depth()

	def _add_sequence(self, il_sequence, optional_mode):
	"""Generate code to parse the given Sequence object.

	Params:
	il_sequence: intermediate_language.Sequence representing the
	EBNF rule sequence.
	optional_mode: an _OPTIONAL_* value representing if the generated
	code currently resides in an if or elif statement.
	"""
	self._add_intermediate(il_sequence._objs[0], optional_mode)
	for i in range(1, len(il_sequence._objs)):
	obj = il_sequence._objs[i]
	self._add_intermediate(obj)

	def _add_repetition(self, il_repetition, optional_mode):
	"""Generate code to parse the given Repetition object.

	Params:
	il_optional: intermediate_language.Repetition representing the
	repeating EBNF rule sequence.
	optional_mode: an _OPTIONAL_* value representing if the generated
	code currently resides in an if or elif statement.
	"""
	self._add_line('while True:')
	self._push_depth()
	self._add_intermediate(il_repetition._objs[0], self._OPTIONAL_IF)
	self._add_line('pass')
	self._pop_depth()
	self._add_line('else:')
	self._push_depth()
	self._add_line('break')
	self._pop_depth()
	self._pop_depth()

	def _add_optional(self, il_optional, optional_mode):
	"""Generate code to parse the given Optional object.

	Params:
	il_optional: intermediate_language.Optional representing the
	optional EBNF rule sequence.
	optional_mode: an _OPTIONAL_* value representing if the generated
	code currently resides in an if or elif statement.
	"""
	self._add_intermediate(il_optional._objs[0], self._OPTIONAL_IF)
	# Special casing optional terminals
	if not isinstance(il_optional._objs[0],
	intermediate_language._ILObject):
	self._add_line('pass')
	self._pop_depth()

	def _add_line(self, line):
	"""Adds line to file."""
	self._output_str += '\n%s%s' % (self.TAB * self._depth, line)

	def _push_depth(self):
	"""Decrements indentation level for subsequent _add_line calls."""
	self._depth += 1

	def _pop_depth(self):
	"""Increments indentation level for subsequent _add_line calls."""
	self._depth -= 1