blob: 069be6c2aa95a0a81a16dfa0f8f70cacc9a6efe1 [file] [log] [blame]
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Module containing functionality to convert an EBNF parse tree into an
intermediate language representation.
Converting the parse tree into an intermediate language representation allows
for proper handling of operator precedence, serves to insulate the parser
generator code from changes to the EBNF language or parse tree structure used.
"""
import logging
from ebnf_cc import ebnf_parser
class _ILObject(object):
"""Base class for IL objects representing EBNF operations."""
def __init__(self, a, b, is_idempotent):
"""Creates an _ILObject instance with operands a and b.
Params:
a: ebnf_parser.BaseSymbol or _ILObject, representing a parse tree
node or an EBNF operation, respectively. May also be None.
b: ebnf_parser.BaseSymbol or _ILObject, representing a parse tree
node or an EBNF operation, respectively. May also be None.
is_idempotent: whether the specified operation is idempotent (e.g. a
Choice of a Choice is still just a Choice). This value is set
only by child classes, not by users of these classes.
"""
self._objs = []
if is_idempotent:
if isinstance(a, self.__class__):
self._objs = a._objs
else:
self._objs.append(a)
if isinstance(b, self.__class__):
self._objs.extend(b._objs)
else:
self._objs.append(b)
else:
if a is not None:
self._objs.append(a)
if b is not None:
self._objs.append(b)
def __repr__(self):
s = '%s(' % type(self).__name__
for obj in self._objs:
if isinstance(obj, _ILObject):
s += '%s, ' % obj
elif isinstance(obj, ebnf_parser.Terminal):
s += '%s, ' % obj._children[1]._children[0]._str
else:
s += '%s, ' % obj._children[0]._str
return s[:-2] + ')'
class Choice(_ILObject):
"""Choice between different terminals, non-terminals, and _ILObjects.
The IL representation of the EBNF '|' operator.
"""
def __init__(self, a, b):
super(Choice, self).__init__(a, b, True)
class Sequence(_ILObject):
"""Sequence of terminals, non-terminals, and _ILObjects.
The IL representation of the EBNF ',' operator.
"""
def __init__(self, a, b):
super(Sequence, self).__init__(a, b, True)
class Repetition(_ILObject):
"""Repetitions of a sequence of terminals, non-terminals, and _ILObjects.
The IL representation of an EBNF sequence surrounded with '{' and '}'.
"""
def __init__(self, a, b):
super(Repetition, self).__init__(a, b, False)
class Optional(_ILObject):
"""Optional sequence of terminals, non-terminals, and _ILObjects.
The IL representation of an EBNF sequence surrounded with '[' and ']'.
"""
def __init__(self, a, b):
super(Optional, self).__init__(a, b, False)
class ILGenerator(object):
"""Class to take an EBNF parse tree and generate an intermediate language
representation.
This implementation currently only accepts subtrees from Rhs or below, as
there doesn't seem to be much benefit to converting the entire grammar into
an IL.
The conversion from parse tree to IL results in the following levels of
operator precedence from highest to lowest:
{}[]()
|
,
The current implementation uses the Shunting-Yard algorithm for parse tree
evaluation, but the interface does not preclude the use of a different
solution in the future.
"""
def generate(self, parse_tree):
"""Top-level method to generate intermediate language representation.
Params:
parse_tree: parser.BaseSymbol representing the parse subtree
to convert into the IL.
"""
self._output_stack = []
self._operator_stack = []
# Add symbols to their appropriate stack, performing any necessary
# processing in order to maintain the documented operator precedence
while True:
next_symbol = parse_tree.get_next(ebnf_parser.Elementary_Symbol)
if next_symbol is None:
break
if isinstance(next_symbol, ebnf_parser.Output):
self._output_stack.append(next_symbol)
elif isinstance(next_symbol, ebnf_parser.Operator):
self._add_operator(next_symbol)
else:
raise ValueError('Found symbol that is neither an Output nor '
'Operator.')
# Flush the output and operator stacks to return a single expression
while self._pop_operator():
pass
if len(self._output_stack) > 1:
logging.warning('Output stack has more than a single element when '
'being returned!')
return self._output_stack[0]
def _pop_operator(self):
"""Pops the top operator and pushes the result to the output stack.
Returns:
The popped operator if there were operators left on the stack. Else
None.
"""
if not self._operator_stack:
return False
op = self._operator_stack.pop()
if (isinstance(op, ebnf_parser.EndOperator) or
isinstance(op, ebnf_parser.StartOperator)):
pass
elif isinstance(op, ebnf_parser.Or):
b = self._output_stack.pop()
a = self._output_stack.pop()
self._output_stack.append(Choice(a, b))
elif isinstance(op, ebnf_parser.Concat):
b = self._output_stack.pop()
a = self._output_stack.pop()
self._output_stack.append(Sequence(a, b))
else:
raise ValueError('Unrecognized Output type')
return op
def _add_operator(self, op):
"""Adds operator to the operator stack."""
if isinstance(op, ebnf_parser.EndOperator):
while not isinstance(self._pop_operator(),
ebnf_parser.StartOperator):
pass
if isinstance(op, ebnf_parser.Option_End):
a = self._output_stack.pop()
self._output_stack.append(Optional(a, None))
elif isinstance(op, ebnf_parser.Repeat_End):
a = self._output_stack.pop()
self._output_stack.append(Repetition(a, None))
elif isinstance(op, ebnf_parser.StartOperator):
self._operator_stack.append(op)
elif not self._operator_stack:
self._operator_stack.append(op)
else:
while self._operator_stack:
prev_op = self._operator_stack[-1]
# Or is left associative and of greater precedence than Concat,
# and so will always be popped before another Or or Concat.
if isinstance(prev_op, ebnf_parser.Or):
self._pop_operator()
else:
break
self._operator_stack.append(op)