blob: e2dbbdbae4c9e412c2110ed0f4888c776c25d832 [file] [log] [blame]
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Module providing functionality for parser generation."""
import inspect
import logging
import os.path
import re
import shutil
from ebnf_cc import ebnf_parser
from ebnf_cc import intermediate_language
from ebnf_cc import symbol
from ebnf_cc import token_stream
class ParserGenerator(object):
"""Class responsible for the generation of parser given an EBNF file."""
TAB = ' '
_OPTIONAL_OFF = 0
_OPTIONAL_IF = 1
_OPTIONAL_ELIF = 2
def __init__(self, ebnf_filename):
"""Creates a ParserGenerator object.
Args:
ebnf_filename: str name of the EBNF file containing the
language specification.
"""
self._ebnf_filename = ebnf_filename
self._reset()
def _reset(self):
self._token_stream = None
self._depth = 0
self._output_str = ""
def produce_parser(self, output_filename):
"""Top-level parser generation method.
Parses the provided EBNF file and generates a recursive-descent parser
matching the specified language.
Args:
output_filename: str name of the file to output the parser to.
"""
self._reset()
with open(self._ebnf_filename) as f:
# Split text into tokens, where ' and " should be their own tokens.
token_list = re.sub(r'([\"\'])', ' \\1 ', f.read()).split()
self._token_stream = token_stream.TokenStream(token_list)
# Parse EBNF file into Grammar object (which is the top-level
# non-terminal in the EBNF specification of EBNF).
ebnf_parser.Grammar.set_token_stream(self._token_stream)
grammar = ebnf_parser.Grammar()
logging.debug(grammar)
# Add standard generated warning at the top
self._output_str = (
'# NOTE: THIS FILE IS AUTOMATICALLY GENERATED BY ebnf_cc')
self._add_line('# ONLY CHANGE THIS FILE IF YOU DO NOT MIND '
'LOSING ALL YOUR CHANGES')
self._add_description()
# Import files
self._add_line('import _symbol')
self._add_line('import _token_stream')
# Create parser of language by traversing the Grammar object
self._add_grammar(grammar)
# Import custom definitions
self._add_line('# Import any user-provided parser classes')
self._add_line('# Necessary if the EBNF specification is incomplete')
self._add_line('try:')
self._push_depth()
self._add_line('from _custom_definitions import *')
self._pop_depth()
self._add_line('except ImportError:')
self._push_depth()
self._add_line('pass')
self._pop_depth()
self._add_line('')
with open(output_filename, 'w') as f:
f.write(self._output_str)
# Copy token_stream and symbol modules over
for module in (token_stream, symbol):
src_name = inspect.getfile(module)
if src_name[-1] == 'c':
src_name = src_name[:-1]
shutil.copyfile(src_name, '_%s' % os.path.basename(src_name))
def _add_description(self):
"""Add description to top of the file."""
self._add_line('')
self._add_line('# Description')
self._add_line('# ###########')
# TODO(akhouderchah) add full description + usage
self._add_line('')
def _add_grammar(self, grammar):
"""Add parser for the specified EBNF grammar."""
for rule in grammar._children:
self._add_rule(rule)
def _add_rule(self, rule):
"""Add parser class for a single EBNF rule."""
rule_name = rule.get_next()
self._add_line('class %s(_symbol.BaseSymbol):' % rule_name)
self._push_depth()
self._add_line('def __init__(self):')
self._push_depth()
self._add_line('super(%s, self).__init__()' % rule_name)
rhs = rule.get_next(ebnf_parser.Rhs)
if rhs is None:
raise Exception('Expected RHS in rule:\n%s' % rule)
intermediate_repr = intermediate_language.ILGenerator().generate(rhs)
self._add_intermediate(intermediate_repr)
self._pop_depth()
self._pop_depth()
self._add_line('')
self._add_line('')
def _add_intermediate(self, intermediate, optional_mode=None):
"""Generates parser class body of a rule given its IL representation.
Params:
intermediate: intermediate_language._ILObject representing the
rule to generate a parser class for.
optional_mode: an _OPTIONAL_* value representing if the generated
code currently resides in an if or elif statement.
"""
if isinstance(intermediate, ebnf_parser.Terminal):
self._add_terminal(intermediate, optional_mode)
elif isinstance(intermediate, ebnf_parser.Identifier):
self._add_identifier(intermediate, optional_mode)
elif isinstance(intermediate, intermediate_language.Choice):
self._add_choice(intermediate, optional_mode)
elif isinstance(intermediate, intermediate_language.Sequence):
self._add_sequence(intermediate, optional_mode)
elif isinstance(intermediate, intermediate_language.Repetition):
self._add_repetition(intermediate, optional_mode)
elif isinstance(intermediate, intermediate_language.Optional):
self._add_optional(intermediate, optional_mode)
else:
raise ValueError('Unexpected object type in IL representation')
def _add_terminal(self, terminal, optional_mode):
"""Generate code to parse the given ebnf_parser.Terminal object.
Params:
terminal: ebnf_parser.Terminal representing the EBNF terminal.
optional_mode: an _OPTIONAL_* value representing if the generated
code currently resides in an if or elif statement.
"""
terminal.get_next() # Skip over ' token
term = terminal.get_next()
if not optional_mode:
self._add_line("self.add_token('%s')" % term)
elif optional_mode == self._OPTIONAL_IF:
self._add_line("if self.add_token_optional('%s'):" % term)
self._push_depth()
elif optional_mode == self._OPTIONAL_ELIF:
self._add_line("elif self.add_token_optional('%s'):" % term)
self._push_depth()
def _add_identifier(self, identifier, optional_mode):
"""Generate code to parse the given ebnf_parser.Identifier object.
Params:
identifier: ebnf_parser.Identnifier representing the EBNF identifier.
optional_mode: an _OPTIONAL_* value representing if the generated
code currently resides in an if or elif statement.
"""
term = identifier.get_next()
if not optional_mode:
self._add_line('self.add(%s)' % term)
elif optional_mode == self._OPTIONAL_IF:
self._add_line('if self.add_optional(%s):' % term)
self._push_depth()
elif optional_mode == self._OPTIONAL_ELIF:
self._add_line('elif self.add_optional(%s):' % term)
self._push_depth()
def _add_choice(self, il_choice, optional_mode):
"""Generate code to parse the given Choice object.
Params:
il_choice: intermediate_language.Choice representing the
EBNF rule choice.
optional_mode: an _OPTIONAL_* value representing if the generated
code currently resides in an if or elif statement.
"""
self._add_intermediate(il_choice._objs[0], self._OPTIONAL_IF)
self._add_line('pass')
self._pop_depth()
for i in range(1, len(il_choice._objs)):
self._add_intermediate(il_choice._objs[i],
self._OPTIONAL_ELIF)
self._add_line('pass')
self._pop_depth()
if optional_mode:
self._push_depth()
else:
self._add_line('else:')
self._push_depth()
# TODO(akhouderchah) add error message
self._add_line('raise _symbol.SymbolException(\'\')')
self._pop_depth()
def _add_sequence(self, il_sequence, optional_mode):
"""Generate code to parse the given Sequence object.
Params:
il_sequence: intermediate_language.Sequence representing the
EBNF rule sequence.
optional_mode: an _OPTIONAL_* value representing if the generated
code currently resides in an if or elif statement.
"""
self._add_intermediate(il_sequence._objs[0], optional_mode)
for i in range(1, len(il_sequence._objs)):
obj = il_sequence._objs[i]
self._add_intermediate(obj)
def _add_repetition(self, il_repetition, optional_mode):
"""Generate code to parse the given Repetition object.
Params:
il_optional: intermediate_language.Repetition representing the
repeating EBNF rule sequence.
optional_mode: an _OPTIONAL_* value representing if the generated
code currently resides in an if or elif statement.
"""
self._add_line('while True:')
self._push_depth()
self._add_intermediate(il_repetition._objs[0], self._OPTIONAL_IF)
self._add_line('pass')
self._pop_depth()
self._add_line('else:')
self._push_depth()
self._add_line('break')
self._pop_depth()
self._pop_depth()
def _add_optional(self, il_optional, optional_mode):
"""Generate code to parse the given Optional object.
Params:
il_optional: intermediate_language.Optional representing the
optional EBNF rule sequence.
optional_mode: an _OPTIONAL_* value representing if the generated
code currently resides in an if or elif statement.
"""
self._add_intermediate(il_optional._objs[0], self._OPTIONAL_IF)
# Special casing optional terminals
if not isinstance(il_optional._objs[0],
intermediate_language._ILObject):
self._add_line('pass')
self._pop_depth()
def _add_line(self, line):
"""Adds line to file."""
self._output_str += '\n%s%s' % (self.TAB * self._depth, line)
def _push_depth(self):
"""Decrements indentation level for subsequent _add_line calls."""
self._depth += 1
def _pop_depth(self):
"""Increments indentation level for subsequent _add_line calls."""
self._depth -= 1