-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscanner.py
287 lines (226 loc) · 8.33 KB
/
scanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
#Globals
tokens_file = open('tokens.txt', 'w')
err_file = open('lexical_errors.txt', 'w')
table_file = open('symbol_table.txt', 'w')
file = open('input.txt', 'r')
lastReadChar = None #everytime you want to go back (used a lookahead), set this to the current read character
reserved_keywords = ["if", "else", "void", "int", "while", "break", "switch", "default", "case", "return", "for"]
lineNo = 1
onNewLine = True
errorOnNewLine = True
firstTokenLine = True
firstErrorLine = True
tokenString = ""
tokenStringStartLineNo = 0
tokens = []
class PanicException(Exception):
def __init__(self, message):
self.message = message
class NotRegex:
@staticmethod
def detect(txt, *regex):
res = False
for r in regex:
if r == "\d":
res = res or txt.isdigit()
elif r == "\w":
res = res or txt.isdigit() or txt.isalpha()
elif r == "\sym":
res = res or (txt in [':', ',', ';' , '[' , ']' , ';' , '(', ')' , '{' , '}' , '+' , '-' , '=' , '*' , '<'])
elif r == "\s":
res = res or (txt in ["\n", "\t", " ", "\f", "\v", "\r"])
else:
res = res or txt == r
if res:
return res
return res
class Token:
def __init__(self, tokenType, value):
self.tokenType = tokenType # id / keyword / num / symbol / $
self.value = value
@staticmethod
def create_token(tokenString, stateNumber):
# print(stateNumber, " -> ", tokenString)
global reserved_keywords
if stateNumber == 2:
if tokenString in reserved_keywords:
return Token("KEYWORD", tokenString)
return Token("ID", tokenString)
elif stateNumber == 4:
return Token("NUM", tokenString)
elif stateNumber == 5:
return Token("SYMBOL", tokenString)
elif stateNumber == 7 or stateNumber == 9:
return Token("SYMBOL" , tokenString)
def __str__(self):
if self.tokenType == '$': return '$'
return f"({self.tokenType}, {self.value}) "
class Node:
# isFinal
# edges
# number
def __init__(self, number, isFinal = False):
self.number = number
self.isFinal = isFinal
self.edges = []
def addEdge(self, *edges):
for e in edges:
self.edges.append(e)
# gets character and returns the next state. if there are no moves using the given char, if current state is final, returns null. otherwise raises a panicException
def getNextState(self, character):
dest = None
for e in self.edges:
if e.matches(character):
dest = e.destinationNode
return dest
class Edge:
#regex
#destinationNode
def __init__(self,number, destination, *regexes):
self.number = number
self.regexes = regexes
self.destinationNode = destination
def matches(self, character):
return NotRegex.detect(character, *(self.regexes))
def panic(panicNodeNumber):
global lineNo
global errorOnNewLine
global firstErrorLine
global tokenString
global tokenStringStartLineNo
# global tokensFirstPanic
sp = " "
if errorOnNewLine:
sp = ""
nl = "\n"
if firstErrorLine:
firstErrorLine = False
nl = ""
err_file.write(f"{nl}{tokenStringStartLineNo}.\t")
errorOnNewLine = False
tokenString = tokenString.replace("\n", "")
if (panicNodeNumber == 0 or panicNodeNumber == 10):
err_file.write(f"{sp}({tokenString}, Invalid input)")
elif (panicNodeNumber == 1):
err_file.write(f"{sp}({tokenString}, Invalid input)")
elif (panicNodeNumber == 3):
err_file.write(f"{sp}({tokenString}, Invalid number)")
elif (panicNodeNumber == 8):
err_file.write(f"{sp}({tokenString}, Unmatched comment)")
elif (panicNodeNumber == 13):
msg = tokenString if len(tokenString) < 8 else f"{tokenString[0:7]}..."
err_file.write(f"({sp}{msg}, Unclosed comment)")
tokenString = ""
# creates the scanner DFA and returns the srart node
def createDFA():
s0 = Node(0 , isFinal = False)
s1 = Node(1 , isFinal = False)
s2 = Node(2 , isFinal = True)
s3 = Node(3 , isFinal = False)
s4 = Node(4 , isFinal = True)
s5 = Node(5 , isFinal = True)
s6 = Node(6 , isFinal = False)
s7 = Node(7 , isFinal = True)
s8 = Node(8 , isFinal = False)
s9 = Node(9 , isFinal = True)
s10 = Node(10 , isFinal = False)
s11 = Node(11, isFinal = False)
s12 = Node(12 , isFinal = True)
s13 = Node(13 , isFinal = False)
s14 = Node(14 , isFinal = False)
s15 = Node(15 , isFinal = True)
# ID / Keyword
s0.addEdge(Edge(1, s1 ,"\w"))
s1.addEdge(Edge(1,s1,"\w") , Edge(0 , s2,"\s", "\sym", "/"))
# Number
s0.addEdge(Edge(2 , s3 ,"\d"))
s3.addEdge(Edge(1 , s3 ,"\d") , Edge(0 , s4 ,"\s" ,"\sym" ))
# Symbol
s0.addEdge(Edge(3 , s5 ,":" , ";" , "," , "[" , "]" , "(" , ")" , "{" , "}" , "+" , "-" , "<" ) , Edge(4 , s6 ,"=" ) , Edge(5 , s8 ,"*"))
s6.addEdge(Edge(1 , s7 ,"=" ) , Edge(0 , s9 ,"\d" , "\w" , "\s" , ":" , ";" , "," , "[" , "]" , "(" , ")" , "{" , "}" , "+" , "-" , "*" , "<"))
s8.addEdge(Edge(0 , s9 ,"\d" , "\w" , "\sym" , "\s"))
# Comment
s0.addEdge(Edge(6 , s10 , "/"))
s10.addEdge(Edge(1 ,s11 , "/") , Edge(2 ,s13 ,"*"))
s11.addEdge(Edge(1 ,s12 ,"\n" ) , Edge(0 , s11 , "\sym" , "\w" , "\d" , "\t" , " " , "\f"))
s13.addEdge(Edge(1 ,s14 , "*") , Edge(0 ,s13 , "\s" , "\w" , "\d" ,":" , ";" , "," , "[" , "]" , "(" , ")" , "{" , "}" , "+" , "-" , "=" , "<"))
s14.addEdge(Edge(1 ,s14 , "*") , Edge(2 ,s12 , "/") , Edge(0 ,s13 , "\w" , "\d" , "\s" ,":" , ";" , "," , "[" , "]" , "(" , ")" , "{" , "}" , "+" , "-" , "=" , "<" ))
# Whitespace
s0.addEdge(Edge(7 , s15 , "\s" , "\v"))
return s0
startNode = createDFA()
def close_files():
global firstErrorLine
file.close()
table_file.close()
tokens_file.write("\n")
tokens_file.close()
if firstErrorLine:
err_file.write("There is no lexical error.")
err_file.close()
def get_next_token():
hasEnded = False
global lastReadChar
global lineNo
global onNewLine
global tokenString
global firstTokenLine
global errorOnNewLine
global startNode
global tokenStringStartLineNo
# start making the token
currentNode = startNode
tokenStringStartLineNo = lineNo
tokenString = ""
token = None
while True:
char = lastReadChar if (lastReadChar != None) else file.read(1)
lastReadChar = None
if not char:
if len(tokenString) > 0:
panic(13)
close_files()
hasEnded = True
break
alreadyOnNewLine = onNewLine
if char == "\n":
lineNo = lineNo + 1
onNewLine = True
errorOnNewLine = True
tokenString = tokenString + char
if currentNode != None:
nextNode = currentNode.getNextState(char)
panicNode = currentNode
currentNode = nextNode
if nextNode != None and nextNode.isFinal:
if currentNode.number in [2, 4, 9]:
lastReadChar = char
tokenString = tokenString[0:-1]
token = Token.create_token(tokenString, currentNode.number)
if token != None:
tokens.append(token)
if char == "\n":
lineNo = lineNo - 1
onNewLine = alreadyOnNewLine
sp = " "
if onNewLine:
sp = ""
nl = "\n"
if firstTokenLine:
firstTokenLine = False
nl = ""
tokens_file.write(f"{nl}{lineNo}.\t")
onNewLine = False
tokens_file.write(f"{sp}({token.tokenType}, {token.value})")
if char == "\n":
onNewLine = True
break
elif nextNode != None:
currentNode = nextNode
else:
panic(panicNode.number)
currentNode = startNode
#token is made
if hasEnded:
return Token('$', '$')
return token