-
Notifications
You must be signed in to change notification settings - Fork 0
/
Lexer.py
180 lines (158 loc) · 7.6 KB
/
Lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import re
import os
import io
from typing import List, Tuple, Callable, Union, TypeVar
from Enums import Error, Errornr, TokenValues
class Token():
def __init__(self, instance : Tuple[TokenValues,str, int]): # ("PLUS", "+", 1)
self.instance = instance[0]
self.type = instance[1]
self.linenr = instance[2]
def __str__(self) -> str:
return (str(self.instance.value) + " -> " + self.type)
def __repr__(self) -> str:
return self.__str__()
#returnTupleFromString :: srt -> int -> Tuple[str, str, int]
def returnTupleFromString(stringToParse : str, linenr : int) -> (Tuple[str, str, int]):
"""Checks string and returns corresponding tuple"""
if (stringToParse == "mas"):
return ((TokenValues.PLUS, stringToParse, linenr))
if (stringToParse == "eksi"):
return ((TokenValues.MIN, stringToParse, linenr))
if (stringToParse == "vezes"):
return ((TokenValues.MULTIPLY, stringToParse, linenr))
if (stringToParse == "dela"):
return ((TokenValues.DIVIDED_BY, stringToParse, linenr))
if (stringToParse.isnumeric() or (stringToParse[0] == "-" and stringToParse[1:].isnumeric())):
return ((TokenValues.NUMBER, stringToParse, linenr))
if(stringToParse == "ef"):
return ((TokenValues.IF, stringToParse, linenr))
if (stringToParse == "annars"):
return ((TokenValues.ELSE, stringToParse, linenr))
if (stringToParse == "er"):
return ((TokenValues.ASSIGN, stringToParse, linenr))
if (stringToParse == "aika"):
return ((TokenValues.WHILE, stringToParse, linenr))
if (stringToParse == "lig"):
return ((TokenValues.EQUAL, stringToParse, linenr))
if (stringToParse == "unterschiedlich"):
return ((TokenValues.NOTEQUAL, stringToParse, linenr))
if (stringToParse == ">="):
return ((TokenValues.GE, stringToParse, linenr))
if (stringToParse == "<="):
return ((TokenValues.SE, stringToParse, linenr))
if (stringToParse == ">"):
return ((TokenValues.GREATER, stringToParse, linenr))
if (stringToParse == "<"):
return ((TokenValues.SMALLER, stringToParse, linenr))
if (stringToParse == "fin"):
return ((TokenValues.SEMICOLON, stringToParse, linenr))
if (stringToParse == "haakje_begin"):
return ((TokenValues.LPAREN, stringToParse, linenr))
if (stringToParse == "haakje_eind"):
return ((TokenValues.RPAREN, stringToParse, linenr))
if (stringToParse == "fa_inizio"):
return ((TokenValues.LBRACE, stringToParse, linenr))
if (stringToParse == "fa_fine"):
return ((TokenValues.RBRACE, stringToParse, linenr))
if (stringToParse == "taispeain"):
return ((TokenValues.PRINT, stringToParse, linenr))
if (stringToParse == "#"):
return ((TokenValues.PRINT_END, stringToParse, linenr))
if (re.fullmatch("^[@][a-zA-Z0-9_]*", stringToParse)):
return ((TokenValues.VAR, stringToParse, linenr))
else:
return ((TokenValues.ERROR, stringToParse, linenr))
# fileToWordlist :: str -> List[str]
def fileToWordlist(string_file : str) -> List[str]:
"""Creates wordlist through string, removes unnecessary chars such as \n \t \r and spaces and returns list with words"""
wordlist = createWordlist(string_file)
# filter empty characters
wordlist = list(filter((lambda x: x != ''), wordlist))
return wordlist
# createWordlist :: str -> List[str]
def createWordlist(string_file : str) -> List[str]:
"""gets string and adds word to list of strings, delimiter is ' ', '\t', '\n', '\r' """
if(len(string_file) <= 0):
return [""]
head = string_file[0]
tail = string_file[1:]
current_wordlist = createWordlist(tail)
# split word when one of those tokens appear
if(head == ' ' or head == '\t' or head == '\n' or head == '\r'):
current_wordlist = [""] + current_wordlist
else:
# update first word in the wordlist
new_word = head + current_wordlist[0]
current_wordlist[0] = new_word
return current_wordlist
A = TypeVar('A')
B = TypeVar('B')
C = TypeVar('C')
# foldl :: (A, B, C) -> B -> List[A] -> List[C]
def foldl(f: Callable[[A, B], C], base : B, list : List[A]) -> List[C]:
"""Creates new list according to the input function"""
if(len(list) == 0):
return base
head, *tail = list
return (f(head, foldl(f, base, tail)))
A = TypeVar('A')
B = TypeVar('B')
# wordlistToTokens (A -> B -> Token) -> List[str] -> int -> List[Token]
def wordlistToTokens(f : Callable [[A, B], Token], wordlist : List[str], linenr : int) -> (List[Token]):
"""Creates List[Tuple] for every word in the list and corresponding the linenumber and foldl this to return the corresponding Token"""
line_list = [linenr] * len(wordlist)
string_and_line = list((zip(wordlist,line_list)))
tokenlist = foldl(f,[], (string_and_line))
return tokenlist
# read_rec :: io.TextIOWrapper -> List[str]
def read_rec(f : io.TextIOWrapper) -> List[str]:
"""Read per line the content and add to list. The result is a list with elements per line of the file.
It ignores the comments"""
read = (f.readline())
if (read == ""):
return [read]
# check if line is comment
if(read[0] == "$"):
read = "" # add an empty list, so the linenumbers will stay correct
return [read] + read_rec(f)
# readFromFile :: str -> -> List[str] | None
def readFromFile(filename : str) -> Union[List[str], None]:
"""open file and read file per row recursively to fill the list per line.
Return the list with the file content or none if the file could not be opened"""
if(os.access(filename, os.R_OK)):
f = open(filename, "r")
lines = read_rec(f)
f.close()
return lines
return None
# function_wordToTuple :: Tuple[str,int] -> List[Token] -> List[Token]
def function_TupleToToken(x : Tuple[str,int], tail : List[Token]) -> List[Token]:
"""Unpack tuple and return Token"""
return [Token(returnTupleFromString(x[0], x[1]))] + tail
# lex_func :: str -> int -> List[Token]
def lex_func(file_line : str, linenr : int) -> List[Token]:
"""Receives a string that contains a line of code. Returns the corresponding Tokens of this line"""
wordlist = fileToWordlist(file_line) # create wordlist from string
tokenlist = wordlistToTokens(function_TupleToToken, wordlist, linenr)
return tokenlist
# lex_rec :: (str -> int -> List[Token]) -> List[str] -> List[Token]
def lex_rec(f : Callable[[str,int], List[Token]], fileContainer : List[str]) -> List[Token]:
"""Per element in the list it retrieves the corresponding list of Tokens
Every element in the list stands for a file-line"""
if(len(fileContainer) <= 0):
return []
return lex_rec(f, fileContainer[:-1]) + f(fileContainer[-1], len(fileContainer))
# lexer -> str -> Tuple[List[Token], Error] | Tuple[None, Error]
def lexer(filename : str) -> Union[Tuple[List[Token], Error], Tuple[None, Error]]:
"""Reads from file the content. If None is returned, show error
else retrieve the tokens matching to the inputted lines. If a Token could not be parsed, create error and return result"""
fileContainer = readFromFile(filename)
if(fileContainer != None):
tokenlist = lex_rec(lex_func, fileContainer)
errorlist = list(filter(lambda x: x.instance == TokenValues.ERROR, tokenlist))
error = Error(Errornr.NO_ERROR)
if (len(errorlist) > 0):
error = (Error(Errornr.SYNTAX_ERROR, "On line " + str(errorlist[0].linenr) + ", cannot define " + " \"" + errorlist[0].type + "\" " + " "))
return tokenlist, error
return None, Error(Errornr.FileNotFoundError, "Cannot open " + filename)