| # Copyright 2018 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Module providing functionality for parser generation.""" |
| |
| import inspect |
| import logging |
| import os.path |
| import re |
| import shutil |
| |
| from ebnf_cc import ebnf_parser |
| from ebnf_cc import intermediate_language |
| from ebnf_cc import symbol |
| from ebnf_cc import token_stream |
| |
| |
| class ParserGenerator(object): |
| """Class responsible for the generation of parser given an EBNF file.""" |
| TAB = ' ' |
| |
| _OPTIONAL_OFF = 0 |
| _OPTIONAL_IF = 1 |
| _OPTIONAL_ELIF = 2 |
| |
| |
| def __init__(self, ebnf_filename): |
| """Creates a ParserGenerator object. |
| |
| Args: |
| ebnf_filename: str name of the EBNF file containing the |
| language specification. |
| """ |
| self._ebnf_filename = ebnf_filename |
| self._reset() |
| |
| def _reset(self): |
| self._token_stream = None |
| self._depth = 0 |
| self._output_str = "" |
| |
| def produce_parser(self, output_filename): |
| """Top-level parser generation method. |
| |
| Parses the provided EBNF file and generates a recursive-descent parser |
| matching the specified language. |
| |
| Args: |
| output_filename: str name of the file to output the parser to. |
| """ |
| self._reset() |
| with open(self._ebnf_filename) as f: |
| # Split text into tokens, where ' and " should be their own tokens. |
| token_list = re.sub(r'([\"\'])', ' \\1 ', f.read()).split() |
| self._token_stream = token_stream.TokenStream(token_list) |
| |
| # Parse EBNF file into Grammar object (which is the top-level |
| # non-terminal in the EBNF specification of EBNF). |
| ebnf_parser.Grammar.set_token_stream(self._token_stream) |
| grammar = ebnf_parser.Grammar() |
| logging.debug(grammar) |
| |
| # Add standard generated warning at the top |
| self._output_str = ( |
| '# NOTE: THIS FILE IS AUTOMATICALLY GENERATED BY ebnf_cc') |
| self._add_line('# ONLY CHANGE THIS FILE IF YOU DO NOT MIND ' |
| 'LOSING ALL YOUR CHANGES') |
| |
| self._add_description() |
| |
| # Import files |
| self._add_line('import _symbol') |
| self._add_line('import _token_stream') |
| |
| # Create parser of language by traversing the Grammar object |
| self._add_grammar(grammar) |
| |
| # Import custom definitions |
| self._add_line('# Import any user-provided parser classes') |
| self._add_line('# Necessary if the EBNF specification is incomplete') |
| self._add_line('try:') |
| self._push_depth() |
| self._add_line('from _custom_definitions import *') |
| self._pop_depth() |
| self._add_line('except ImportError:') |
| self._push_depth() |
| self._add_line('pass') |
| self._pop_depth() |
| self._add_line('') |
| |
| with open(output_filename, 'w') as f: |
| f.write(self._output_str) |
| |
| # Copy token_stream and symbol modules over |
| for module in (token_stream, symbol): |
| src_name = inspect.getfile(module) |
| if src_name[-1] == 'c': |
| src_name = src_name[:-1] |
| shutil.copyfile(src_name, '_%s' % os.path.basename(src_name)) |
| |
| def _add_description(self): |
| """Add description to top of the file.""" |
| self._add_line('') |
| self._add_line('# Description') |
| self._add_line('# ###########') |
| # TODO(akhouderchah) add full description + usage |
| self._add_line('') |
| |
| def _add_grammar(self, grammar): |
| """Add parser for the specified EBNF grammar.""" |
| for rule in grammar._children: |
| self._add_rule(rule) |
| |
| def _add_rule(self, rule): |
| """Add parser class for a single EBNF rule.""" |
| rule_name = rule.get_next() |
| self._add_line('class %s(_symbol.BaseSymbol):' % rule_name) |
| self._push_depth() |
| self._add_line('def __init__(self):') |
| self._push_depth() |
| self._add_line('super(%s, self).__init__()' % rule_name) |
| |
| rhs = rule.get_next(ebnf_parser.Rhs) |
| if rhs is None: |
| raise Exception('Expected RHS in rule:\n%s' % rule) |
| intermediate_repr = intermediate_language.ILGenerator().generate(rhs) |
| self._add_intermediate(intermediate_repr) |
| |
| self._pop_depth() |
| self._pop_depth() |
| self._add_line('') |
| self._add_line('') |
| |
| def _add_intermediate(self, intermediate, optional_mode=None): |
| """Generates parser class body of a rule given its IL representation. |
| |
| Params: |
| intermediate: intermediate_language._ILObject representing the |
| rule to generate a parser class for. |
| optional_mode: an _OPTIONAL_* value representing if the generated |
| code currently resides in an if or elif statement. |
| """ |
| if isinstance(intermediate, ebnf_parser.Terminal): |
| self._add_terminal(intermediate, optional_mode) |
| elif isinstance(intermediate, ebnf_parser.Identifier): |
| self._add_identifier(intermediate, optional_mode) |
| elif isinstance(intermediate, intermediate_language.Choice): |
| self._add_choice(intermediate, optional_mode) |
| elif isinstance(intermediate, intermediate_language.Sequence): |
| self._add_sequence(intermediate, optional_mode) |
| elif isinstance(intermediate, intermediate_language.Repetition): |
| self._add_repetition(intermediate, optional_mode) |
| elif isinstance(intermediate, intermediate_language.Optional): |
| self._add_optional(intermediate, optional_mode) |
| else: |
| raise ValueError('Unexpected object type in IL representation') |
| |
| def _add_terminal(self, terminal, optional_mode): |
| """Generate code to parse the given ebnf_parser.Terminal object. |
| |
| Params: |
| terminal: ebnf_parser.Terminal representing the EBNF terminal. |
| optional_mode: an _OPTIONAL_* value representing if the generated |
| code currently resides in an if or elif statement. |
| """ |
| terminal.get_next() # Skip over ' token |
| term = terminal.get_next() |
| if not optional_mode: |
| self._add_line("self.add_token('%s')" % term) |
| elif optional_mode == self._OPTIONAL_IF: |
| self._add_line("if self.add_token_optional('%s'):" % term) |
| self._push_depth() |
| elif optional_mode == self._OPTIONAL_ELIF: |
| self._add_line("elif self.add_token_optional('%s'):" % term) |
| self._push_depth() |
| |
| def _add_identifier(self, identifier, optional_mode): |
| """Generate code to parse the given ebnf_parser.Identifier object. |
| |
| Params: |
| identifier: ebnf_parser.Identnifier representing the EBNF identifier. |
| optional_mode: an _OPTIONAL_* value representing if the generated |
| code currently resides in an if or elif statement. |
| """ |
| term = identifier.get_next() |
| if not optional_mode: |
| self._add_line('self.add(%s)' % term) |
| elif optional_mode == self._OPTIONAL_IF: |
| self._add_line('if self.add_optional(%s):' % term) |
| self._push_depth() |
| elif optional_mode == self._OPTIONAL_ELIF: |
| self._add_line('elif self.add_optional(%s):' % term) |
| self._push_depth() |
| |
| def _add_choice(self, il_choice, optional_mode): |
| """Generate code to parse the given Choice object. |
| |
| Params: |
| il_choice: intermediate_language.Choice representing the |
| EBNF rule choice. |
| optional_mode: an _OPTIONAL_* value representing if the generated |
| code currently resides in an if or elif statement. |
| """ |
| self._add_intermediate(il_choice._objs[0], self._OPTIONAL_IF) |
| self._add_line('pass') |
| self._pop_depth() |
| for i in range(1, len(il_choice._objs)): |
| self._add_intermediate(il_choice._objs[i], |
| self._OPTIONAL_ELIF) |
| self._add_line('pass') |
| self._pop_depth() |
| |
| if optional_mode: |
| self._push_depth() |
| else: |
| self._add_line('else:') |
| self._push_depth() |
| # TODO(akhouderchah) add error message |
| self._add_line('raise _symbol.SymbolException(\'\')') |
| self._pop_depth() |
| |
| def _add_sequence(self, il_sequence, optional_mode): |
| """Generate code to parse the given Sequence object. |
| |
| Params: |
| il_sequence: intermediate_language.Sequence representing the |
| EBNF rule sequence. |
| optional_mode: an _OPTIONAL_* value representing if the generated |
| code currently resides in an if or elif statement. |
| """ |
| self._add_intermediate(il_sequence._objs[0], optional_mode) |
| for i in range(1, len(il_sequence._objs)): |
| obj = il_sequence._objs[i] |
| self._add_intermediate(obj) |
| |
| def _add_repetition(self, il_repetition, optional_mode): |
| """Generate code to parse the given Repetition object. |
| |
| Params: |
| il_optional: intermediate_language.Repetition representing the |
| repeating EBNF rule sequence. |
| optional_mode: an _OPTIONAL_* value representing if the generated |
| code currently resides in an if or elif statement. |
| """ |
| self._add_line('while True:') |
| self._push_depth() |
| self._add_intermediate(il_repetition._objs[0], self._OPTIONAL_IF) |
| self._add_line('pass') |
| self._pop_depth() |
| self._add_line('else:') |
| self._push_depth() |
| self._add_line('break') |
| self._pop_depth() |
| self._pop_depth() |
| |
| def _add_optional(self, il_optional, optional_mode): |
| """Generate code to parse the given Optional object. |
| |
| Params: |
| il_optional: intermediate_language.Optional representing the |
| optional EBNF rule sequence. |
| optional_mode: an _OPTIONAL_* value representing if the generated |
| code currently resides in an if or elif statement. |
| """ |
| self._add_intermediate(il_optional._objs[0], self._OPTIONAL_IF) |
| # Special casing optional terminals |
| if not isinstance(il_optional._objs[0], |
| intermediate_language._ILObject): |
| self._add_line('pass') |
| self._pop_depth() |
| |
| def _add_line(self, line): |
| """Adds line to file.""" |
| self._output_str += '\n%s%s' % (self.TAB * self._depth, line) |
| |
| def _push_depth(self): |
| """Decrements indentation level for subsequent _add_line calls.""" |
| self._depth += 1 |
| |
| def _pop_depth(self): |
| """Increments indentation level for subsequent _add_line calls.""" |
| self._depth -= 1 |