Skip to content

Commit

Permalink
Merge pull request #1427 from lark-parser/dev
Browse files Browse the repository at this point in the history
BUGFIX Earley: Now yielding a previously repressed ambiguity
  • Loading branch information
erezsh authored Jun 22, 2024
2 parents 13a97aa + efeb846 commit c1dbe0c
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 15 deletions.
20 changes: 13 additions & 7 deletions lark/parsers/earley.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ..lexer import Token
from ..tree import Tree
from ..exceptions import UnexpectedEOF, UnexpectedToken
from ..utils import logger, OrderedSet
from ..utils import logger, OrderedSet, dedup_list
from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal
from .earley_common import Item
Expand Down Expand Up @@ -169,6 +169,7 @@ def predict_and_complete(self, i, to_scan, columns, transitives):
items.append(new_item)

def _parse(self, lexer, columns, to_scan, start_symbol=None):

def is_quasi_complete(item):
if item.is_complete:
return True
Expand Down Expand Up @@ -281,7 +282,7 @@ def parse(self, lexer, start):
# If the parse was successful, the start
# symbol should have been completed in the last step of the Earley cycle, and will be in
# this column. Find the item for the start_symbol, which is the root of the SPPF tree.
solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
solutions = dedup_list(n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0)
if not solutions:
expected_terminals = [t.expect.name for t in to_scan]
raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan))
Expand All @@ -293,16 +294,21 @@ def parse(self, lexer, start):
except ImportError:
logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
else:
debug_walker.visit(solutions[0], "sppf.png")

for i, s in enumerate(solutions):
debug_walker.visit(s, f"sppf{i}.png")

if len(solutions) > 1:
assert False, 'Earley should not generate multiple start symbol items!'

if self.Tree is not None:
# Perform our SPPF -> AST conversion
transformer = ForestToParseTree(self.Tree, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity)
return transformer.transform(solutions[0])
solutions = [transformer.transform(s) for s in solutions]

if len(solutions) > 1:
t: Tree = self.Tree('_ambig', solutions)
t.expand_kids_by_data('_ambig') # solutions may themselves be _ambig nodes
return t
return solutions[0]

# return the root of the SPPF
# TODO return a list of solutions, or join them together somehow
return solutions[0]
4 changes: 2 additions & 2 deletions lark/parsers/earley_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ def __init__(self, rule, ptr, start):
self.s = (rule, ptr)
self.expect = rule.expansion[ptr]
self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
self._hash = hash((self.s, self.start))
self._hash = hash((self.s, self.start, self.rule))

def advance(self):
return Item(self.rule, self.ptr + 1, self.start)

def __eq__(self, other):
return self is other or (self.s == other.s and self.start == other.start)
return self is other or (self.s == other.s and self.start == other.start and self.rule == other.rule)

def __hash__(self):
return self._hash
Expand Down
36 changes: 30 additions & 6 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,22 @@ def test_term_ambig_resolve(self):
tree = parser.parse(text)
self.assertEqual(tree.children, ['foo', 'bar'])

def test_multiple_start_solutions(self):
grammar = r"""
!start: a | A
!a: A
A: "x"
"""

l = Lark(grammar, ambiguity='explicit', lexer=LEXER)
tree = l.parse('x')

expected = Tree('_ambig', [
Tree('start', ['x']),
Tree('start', [Tree('a', ['x'])])]
)
self.assertEqual(tree, expected)

def test_cycle(self):
grammar = """
start: start?
Expand All @@ -840,16 +856,24 @@ def test_cycle(self):

def test_cycle2(self):
grammar = """
start: _operation
_operation: value
value: "b"
| "a" value
| _operation
start: _recurse
_recurse: v
v: "b"
| "a" v
| _recurse
"""

l = Lark(grammar, ambiguity="explicit", lexer=LEXER)
tree = l.parse("ab")
self.assertEqual(tree, Tree('start', [Tree('value', [Tree('value', [])])]))
expected = (
Tree('start', [
Tree('_ambig', [
Tree('v', [Tree('v', [])]),
Tree('v', [Tree('v', [Tree('v', [])])])
])
])
)
self.assertEqual(tree, expected)

def test_cycles(self):
grammar = """
Expand Down

0 comments on commit c1dbe0c

Please sign in to comment.