-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy paths1_lexer.py
60 lines (55 loc) · 1.39 KB
/
s1_lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import re
class Token:
def __init__(self, type, value):
self.type = type
self.value = value
def __str__(self):
return 'Token(' + self.type + ', ' + repr(self.value) + ')'
def __repr__(self):
return self.__str__()
class Lexer:
def __init__(self, text):
self.text = text;
self.pos = 0;
token_specification = [
('IFE', r'ife'),
('IFNE', r'ifne'),
('WHIE', r'whilee'),
('WHINE', r'whilene'),
('RET', r'return'),
('ID', r'[0-9A-Za-z]+'),
('LBR', r'\('),
('RBR', r'\)'),
('LCBR', r'{'),
('RCBR', r'}'),
('LCOMM', r'/\*'),
('RCOMM', r'\*/'),
('LSBR', r'\['),
('RSBR', r'\]'),
('COMMA', r','),
('SEMICOL', r';'),
('ASSIGN', r'='),
('UNION', r'\+'),
('INTER', r'\*'),
('DIFF', r'-'),
('SYMDIFF', r'~'),
('SPACE', r'\s'),
('EMP', r''),
('MISMATCH', r'.')
]
tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
self.mos = list(re.finditer(tok_regex, self.text))
def get_next_token(self):
if (self.pos == (len(self.text) - 1)) or (self.pos == len(self.text)):
return Token('EOF', 'EOF')
for mo in self.mos:
if mo.start() < self.pos:
continue
kind = mo.lastgroup
value = mo.group()
if kind == 'MISMATCH':
raise RuntimeError(f'{value!r} unexpected at {self.pos}')
elif kind == 'SPACE':
continue
self.pos = mo.end()
return Token(kind, value)