-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.py
96 lines (65 loc) · 2.12 KB
/
lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""A useful ply lexer class."""
from pathlib import Path
from typing import *
from ply import lex
from ply.lex import LexToken
class LexingError(Exception):
"""An exception raised when an error is encountered during lexing."""
def __init__(self, token: LexToken):
"""
Args:
token: The token which caused the error.
"""
super().__init__()
self._token = token
@property
def token(self) -> LexToken:
"""The token which caused the error."""
return self._token
def __str__(self) -> str:
return f'Unrecognised character {self.token.value[0]} encountered at position {self.token.lexpos} on line {self.token.lineno}.'
class Lexer:
"""A useful ply lexer class."""
def __init__(self, **kwargs):
"""Initialize the lexer.
All arguments are passed as is to lex.lex.
"""
self._lexer = lex.lex(module=self, **kwargs)
def t_ANY_error(self, t: LexToken):
"""General error handling."""
raise LexingError(t)
def set_input(self, input: str):
"""Sets the lexer input.
Args:
input: The input to the lexer.
"""
self._lexer.input(input)
def lex_tokens(self) -> Iterator[LexToken]:
"""A generator that yields tokens parsed from the input.
Yields:
The next token lexed from the input.
"""
while True:
tok = self._lexer.token()
if not tok:
break
yield tok
def lex(self, input: str) -> Iterator[LexToken]:
"""Lex an input string.
Args:
input: A string to lex.
Returns:
A generator of lexed tokens.
"""
self.set_input(input)
return self.lex_tokens()
def lex_file(self, path: Path) -> Iterator[LexToken]:
"""Lex the contents of a file.
Args:
path: The path of the file to lex.
Returns:
A generator of the tokens lexed from the file.
"""
with open(path, 'rt') as f:
input = f.read()
return self.lex(input)