Skip to content

Commit

Permalink
Hide EOS used by parser from users (#19)
Browse files Browse the repository at this point in the history
- Hide EOS from user
- Change EOF to EOS
- Update tests
henrylee97 authored Jul 16, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 2dc9a8f commit fdc1363
Showing 4 changed files with 20 additions and 20 deletions.
10 changes: 7 additions & 3 deletions plare/lexer.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,6 @@
from typing import Callable, Generator

from plare.exception import LexingError
from plare.parser import EOF
from plare.token import Token
from plare.utils import logger

@@ -36,8 +35,12 @@ def lex(self, var: str, src: str) -> Generator[Token]:
state = self.state_factory()
lineno = 1
offset = 0
ended = False

while not ended:
if len(src) == 0:
ended = True

while len(src) > 0:
patterns = self.patterns[var]

for regex, pattern in patterns:
@@ -73,5 +76,6 @@ def lex(self, var: str, src: str) -> Generator[Token]:
offset = len(matched) - matched.rfind("\n")
break
else:
if len(src) == 0:
continue
raise LexingError(f"Unexpected character: {src[0]}", lineno, offset)
yield EOF(src, lineno=lineno, offset=offset)
13 changes: 7 additions & 6 deletions plare/parser.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from __future__ import annotations

from itertools import chain
from typing import Any, Iterable, Protocol

from plare.exception import ParserError, ParsingError
from plare.token import Token
from plare.utils import logger


class EOF(Token):
class EOS(Token):
pass


@@ -347,7 +348,7 @@ def calc_follow(self, rules: dict[str, Rule[T]]) -> set[type[Token]]:

self.follow = set()
if isinstance(self.left, StartVariable):
self.follow.add(EOF)
self.follow.add(EOS)

else:
for rule in rules.values():
@@ -490,7 +491,7 @@ def __init__(
for item in state.items:
if item.next is None:
if item.left in start_variables:
self.table[state.id, EOF] = Accept(item.left.orig)
self.table[state.id, EOS] = Accept(item.left.orig)
else:
for symbol in rules[item.left].follow:
reduce_action = Reduce(
@@ -530,7 +531,7 @@ def __init__(
logger.info("Parser created")

def parse(self, var: str, lexbuf: Iterable[Token]) -> T | Token:
lexbuf = iter(lexbuf)
lexbuf = chain(iter(lexbuf), [EOS("", lineno=0, offset=0)])

state = self.entry_state[var]
stack = [state]
@@ -542,14 +543,14 @@ def parse(self, var: str, lexbuf: Iterable[Token]) -> T | Token:
if token is None:
token = next(lexbuf, None)
if token is None:
raise ParsingError("Unexpected end of token stream")
raise ParsingError("Unexpected end of input")
if key is None:
key = type(token)

try:
action = self.table[state, key]
except KeyError:
raise ParsingError(f"Unexpected symbol: {key}")
raise ParsingError(f"Unexpected symbol: {key}") from None
logger.debug("State: %d, Symbol: %s, Action: %s", state, key, action)
key = None
match action:
8 changes: 3 additions & 5 deletions tests/test_lexer.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@

from plare.exception import LexingError
from plare.lexer import Lexer
from plare.parser import EOF, Token
from plare.token import Token


class PLUS(Token):
@@ -73,15 +73,14 @@ def make_positive_integer_lexer():
def test_lex_positive_integer():
lexer = make_positive_integer_lexer()
tokens = list(lexer.lex("start", "+123"))
assert len(tokens) == 3
assert len(tokens) == 2
assert isinstance(tokens[0], PLUS)
assert tokens[0].lineno == 1
assert tokens[0].offset == 0
assert isinstance(tokens[1], NUM)
assert tokens[1].value == 123
assert tokens[1].lineno == 1
assert tokens[1].offset == 1
assert isinstance(tokens[2], EOF)


def test_lex_positive_integer_fail_on_tailing_plus():
@@ -109,7 +108,7 @@ def test_lex_multiple_tokens_for_single_match():
}
)
tokens = list(lexer.lex("start", " \t\n"))
assert len(tokens) == 4
assert len(tokens) == 3
assert isinstance(tokens[0], SPACE)
assert tokens[0].lineno == 1
assert tokens[0].offset == 0
@@ -119,4 +118,3 @@ def test_lex_multiple_tokens_for_single_match():
assert isinstance(tokens[2], SPACE)
assert tokens[2].lineno == 1
assert tokens[2].offset == 2
assert isinstance(tokens[3], EOF)
9 changes: 3 additions & 6 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@

from typing import Any

from plare.parser import EOF, Parser
from plare.parser import Parser
from plare.token import Token


@@ -56,16 +56,14 @@ def make_positive_integer_parser() -> Parser[Tree]:

def test_parse_positive_integer_without_add():
parser = make_positive_integer_parser()
tree = parser.parse(
"pgm", [NUM("1", lineno=1, offset=0), EOF("", lineno=1, offset=1)]
)
tree = parser.parse("pgm", [NUM("1", lineno=1, offset=0)])
assert isinstance(tree, Num)
assert tree.value == 1


def test_minimal_empty_rule_parser():
parser = Parser({"pgm": [([], list[int], [])]})
parsed = parser.parse("pgm", [EOF("", lineno=1, offset=0)])
parsed = parser.parse("pgm", [])
assert isinstance(parsed, list)
assert len(parsed) == 0

@@ -115,7 +113,6 @@ def test_parse_empty_intlist():
[
LBRACKET("[", lineno=1, offset=0),
RBRACKET("]", lineno=1, offset=1),
EOF("", lineno=1, offset=2),
],
)
assert isinstance(tree, IntList)

0 comments on commit fdc1363

Please sign in to comment.