diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index bc858860..2153a0ce 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -15,7 +15,7 @@ from ..lexer import Token from ..tree import Tree from ..exceptions import UnexpectedEOF, UnexpectedToken -from ..utils import logger, OrderedSet +from ..utils import logger, OrderedSet, dedup_list from .grammar_analysis import GrammarAnalyzer from ..grammar import NonTerminal from .earley_common import Item @@ -169,6 +169,7 @@ def predict_and_complete(self, i, to_scan, columns, transitives): items.append(new_item) def _parse(self, lexer, columns, to_scan, start_symbol=None): + def is_quasi_complete(item): if item.is_complete: return True @@ -281,7 +282,7 @@ def parse(self, lexer, start): # If the parse was successful, the start # symbol should have been completed in the last step of the Earley cycle, and will be in # this column. Find the item for the start_symbol, which is the root of the SPPF tree. - solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] + solutions = dedup_list(n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0) if not solutions: expected_terminals = [t.expect.name for t in to_scan] raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan)) @@ -293,16 +294,21 @@ def parse(self, lexer, start): except ImportError: logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image") else: - debug_walker.visit(solutions[0], "sppf.png") - + for i, s in enumerate(solutions): + debug_walker.visit(s, f"sppf{i}.png") - if len(solutions) > 1: - assert False, 'Earley should not generate multiple start symbol items!' if self.Tree is not None: # Perform our SPPF -> AST conversion transformer = ForestToParseTree(self.Tree, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity) - return transformer.transform(solutions[0]) + solutions = [transformer.transform(s) for s in solutions] + + if len(solutions) > 1: + t: Tree = self.Tree('_ambig', solutions) + t.expand_kids_by_data('_ambig') # solutions may themselves be _ambig nodes + return t + return solutions[0] # return the root of the SPPF + # TODO return a list of solutions, or join them together somehow return solutions[0] diff --git a/lark/parsers/earley_common.py b/lark/parsers/earley_common.py index 46e242b4..0ea2d4fa 100644 --- a/lark/parsers/earley_common.py +++ b/lark/parsers/earley_common.py @@ -20,13 +20,13 @@ def __init__(self, rule, ptr, start): self.s = (rule, ptr) self.expect = rule.expansion[ptr] self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None - self._hash = hash((self.s, self.start)) + self._hash = hash((self.s, self.start, self.rule)) def advance(self): return Item(self.rule, self.ptr + 1, self.start) def __eq__(self, other): - return self is other or (self.s == other.s and self.start == other.start) + return self is other or (self.s == other.s and self.start == other.start and self.rule == other.rule) def __hash__(self): return self._hash diff --git a/tests/test_parser.py b/tests/test_parser.py index 74985015..1946be33 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -828,6 +828,22 @@ def test_term_ambig_resolve(self): tree = parser.parse(text) self.assertEqual(tree.children, ['foo', 'bar']) + def test_multiple_start_solutions(self): + grammar = r""" + !start: a | A + !a: A + A: "x" + """ + + l = Lark(grammar, ambiguity='explicit', lexer=LEXER) + tree = l.parse('x') + + expected = Tree('_ambig', [ + Tree('start', ['x']), + Tree('start', [Tree('a', ['x'])])] + ) + self.assertEqual(tree, expected) + def test_cycle(self): grammar = """ start: start? @@ -843,16 +859,24 @@ def test_cycle(self): def test_cycle2(self): grammar = """ - start: _operation - _operation: value - value: "b" - | "a" value - | _operation + start: _recurse + _recurse: v + v: "b" + | "a" v + | _recurse """ l = Lark(grammar, ambiguity="explicit", lexer=LEXER) tree = l.parse("ab") - self.assertEqual(tree, Tree('start', [Tree('value', [Tree('value', [])])])) + expected = ( + Tree('start', [ + Tree('_ambig', [ + Tree('v', [Tree('v', [])]), + Tree('v', [Tree('v', [Tree('v', [])])]) + ]) + ]) + ) + self.assertEqual(tree, expected) def test_cycles(self): grammar = """