diff --git a/Specification/Project_C.pdf b/Specification/Project_C.pdf new file mode 100644 index 0000000..530c871 Binary files /dev/null and b/Specification/Project_C.pdf differ diff --git a/input files/input.txt b/input files/input.txt new file mode 100644 index 0000000..245b3d7 --- /dev/null +++ b/input files/input.txt @@ -0,0 +1,27 @@ +proc start { +variable [4] := -5; +variable [int]:=0; +variable[ccc] := ccc; +variable[ccc] := input(cdc); +variable[ccc] := add(ccc,4); +varible[cdc] := add(4,4); +variable[cdc] := sub(5,2); +variable[3] := mult(10,2); +variable[cdc] := mult(mult(12,16),add(3,33)); +variable[cdc] := add(mult(4,-34),add(-10,200)); +variable[cdc] := sub(mult(3,3),add(2,3)); +output := "POES"; +fuck := false; +if(false)then{} +else +{}; +while(true)do{}; + +do{variable[cdc] := sub(10,10);} until(true); + +return; + +arr num[2] variable; num int; num ccc; num cdc; +arr num[2] varible; bool fuck; +},main +{call start; halt;} \ No newline at end of file diff --git a/input files/input2.txt b/input files/input2.txt new file mode 100644 index 0000000..455d6b8 --- /dev/null +++ b/input files/input2.txt @@ -0,0 +1,13 @@ +proc strings{ + call other; + return; + }, + proc other{ + you := "K 09"; + return; + } + ,main { + if(true) then { + output := this[you]; + x := -45; }; + halt;} \ No newline at end of file diff --git a/input files/input3.txt b/input files/input3.txt new file mode 100644 index 0000000..5801d44 --- /dev/null +++ b/input files/input3.txt @@ -0,0 +1,48 @@ +proc proc1{ + a := 400; + b := -1000; + word := "NOT END"; + do { + output := a; + output := word; + a := mult(a,-2); + a := sub(a,1); + } + until (not(larger(a,b))); + + word := "IS END"; + + output := word; + + convoluted := not(not(and(and(larger(90,20),eq(3,4)),and(or(not(false),true),true)))); + + if (convoluted) then { + word:= "TRUE"; + } + else{ + word:= "FALSE"; + }; + + output:= word; + + return; + num a; + string word; + num b; + bool convoluted; +}, +main{ + a := false; + b := false; + + a := and(not(a),not(b)); + if(not(not(a)))then{ + call proc1; + }else{ + output := "H2"; + }; + + halt; + bool a; + bool b; +} \ No newline at end of file diff --git a/input files/input4.txt b/input files/input4.txt new file mode 100644 index 0000000..1c8a0e7 --- /dev/null +++ b/input files/input4.txt @@ -0,0 +1 @@ +; \ No newline at end of file diff --git a/input files/spl.txt b/input files/spl.txt new file mode 100644 index 0000000..874aaee --- /dev/null +++ b/input files/spl.txt @@ -0,0 +1,26 @@ +main { + abc1234 := 233; + b := 5; + c := "THIS IS 341"; + d := -1; + + if (larger(mult(abc1234,b),d)) then { + do { + b := add(b,1); + output := b; + } until (eq(abc1234,b)); + + } + else { + c := "W BABY"; + }; + + output := add(abc1234,b); + output := c; + + halt; + num abc1234; + num b; + string c; + num d; +} \ No newline at end of file diff --git a/src/Lexer.py b/src/Lexer.py new file mode 100644 index 0000000..a41b845 --- /dev/null +++ b/src/Lexer.py @@ -0,0 +1,245 @@ +# Author: 0xL0RD + +class Lexer: + Tokens = { + "main": "Main", + "proc": "Proc", + "halt": "Halt", + "return": "Return", + "if": "If", + "then": "Then", + "else": "Else", + "do": "Do", + "while": "While", + "until": "Until", + "output": "Output", + "call": "Call", + "true": "TRUE", + "false": "FALSE", + "input": "Input", + "not": "Not", + "and": "And", + "or": "Or", + "eq": "Eq", + "larger": "Larger", + "add": "Add", + "sub": "Sub", + "mult": "Mult", + "arr": "Arr", + "num": "Num", + "bool": "Bool", + "string": "String", + ":=": "AssignmentToken", + "{": "CurlyOpen", + "}": "CurlyClose", + "(": "RoundOpen", + ")": "RoundClose", + "[": "SquareOpen", + "]": "SquareClose", + ";": "InstrEnd", + ",": "OpDelimeter", + "#SHORTSTRING" : "ShortString", + "#NUMBER": "Number", + "#USERDEFINEDNAME" : "UserDefinedName" + } + + lexerLines = [] + shortStrings:list = [] + userDefinedNames:list = [] + numbers:list = [] + processedTokens:list = [] + delimeters:list = ["{","}","[","]","(",")",";",",",":="] + targetFileName:str = "" + + def __init__(self, sourceFile): + self.targetFileName = sourceFile + + def __enforceStructure(self, fileData): + lineIndex = 0 + temp : str + for line in fileData: + temp = line + for i in range(len(temp)): + temp = temp.replace(" "*(i+2), "") + temp = temp.replace("\t", "") + for terminal in self.delimeters: + if terminal in temp: + temp = temp.replace(terminal, f" {terminal} ") + temp = temp.replace(" ", " ") + temp = temp.strip() + fileData[lineIndex] = temp + lineIndex += 1 + return fileData + + def __reportLexicalError(self, lineNbr, line, correctionalMessage): + print(f"[-] Lexical Error\n") + print(f"\tLine number: {lineNbr}\n\tIn: {line}\n\tError: {correctionalMessage}") + + def __isValidShortString(self, line:str) -> bool: + validChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 " + if len( line ) > 17: + return False + for char in line[1:len(line)-1:]: + if char not in validChars: + return False + return True + + def __getShortStrings(self, sourceFileData:list): + lineNumber = 1 + for line in sourceFileData: + temp = line + while "\"" in temp: + open = sourceFileData[lineNumber-1].find("\"") + + close = open + sourceFileData[lineNumber-1][open+1::].find("\"") + 1 + + if open == close: + self.__reportLexicalError(lineNumber, line, "Short strings must be enclosed by \"") + return [] + + shortString = sourceFileData[lineNumber-1][open:close+1] + + if not self.__isValidShortString(shortString): + self.__reportLexicalError(lineNumber, line, "Short strings may only feature chars in range A-Z0-9 " + "and spaces (MAX-LENGTH=15)") + return [] + + self.shortStrings.append(shortString) + sourceFileData[lineNumber-1] = sourceFileData[lineNumber-1][:open:]+"#SHORTSTRING"+sourceFileData[lineNumber-1][close+1::] + temp = sourceFileData[lineNumber-1][close+1::] + + lineNumber += 1 + return sourceFileData + + def __isvalidUserDefinedName(self, line:str) -> bool: + validChars = "abcdefghijklmnopqrstuvwxyz1234567890" + if line[0] not in validChars[:26]: + return False + + if len(line) > 1: + for char in line[1::]: + if char not in validChars: + return False + return True + + def __getUserDefinedNames(self, sourceFileData:list): + numbers = "-1234567890" + lineNumber = 1 + for line in sourceFileData: + elements:list = line.split(" ") + for element in elements: + if len(element) > 0: + if element not in self.Tokens.keys(): + if element[0] not in numbers: + if not self.__isvalidUserDefinedName(element): + self.__reportLexicalError(lineNumber, line, f"User defined name \"{element}\" does " + f"not conform to regex [a-z].([a-z0-9])*") + return [] + else: + elements[elements.index(element)] = "#USERDEFINEDNAME" + self.userDefinedNames.append(element) + + sourceFileData[lineNumber-1] = " ".join(elements) + + lineNumber += 1 + return sourceFileData + + def __isValidNumber(self, element:str): + numbers = "1234567890" + + if len(element) > 1: + if element[0] == "-": + if element[1] not in numbers[:9:]: + return False + else: + if len(element) > 2: + for char in element[1::]: + if char not in numbers: + return False + return True + elif element[0] in numbers[:9:]: + for char in element[1::]: + if char not in numbers: + return False + return True + else: + return False + else: + if element not in numbers: + return False + return True + + def __getNumbers(self, sourceFileData:list): + lineNumber = 1 + for line in sourceFileData: + elements = line.split(" ") + for element in elements: + if len(element) > 0: + if element not in self.Tokens.keys(): + if not self.__isValidNumber(element): + self.__reportLexicalError(lineNumber, line, f"\"{element}\" is not a valid number in SPL.") + return [] + else: + elements[elements.index(element)] = "#NUMBER" + self.numbers.append(element) + sourceFileData[lineNumber-1] = " ".join(elements) + + lineNumber += 1 + return sourceFileData + + def __generateTokens(self, sourceFileData): + lineNumber = 1 + TokenId = 0 + for line in sourceFileData: + elements = line.split(" ") + for element in elements: + if element != "\n": + if element in self.Tokens.keys(): + if element == "#SHORTSTRING": + shortString = self.shortStrings[0] + if len(self.shortStrings) > 1: + self.shortStrings = self.shortStrings[1::] + self.processedTokens.append(f"[{TokenId}: {self.Tokens[element]} -> {shortString}] {lineNumber}") + elif element == "#USERDEFINEDNAME": + userDefinedName = self.userDefinedNames[0] + if len(self.userDefinedNames) > 1: + self.userDefinedNames = self.userDefinedNames[1::] + self.processedTokens.append(f"[{TokenId}: {self.Tokens[element]} -> {userDefinedName}] {lineNumber}") + elif element == "#NUMBER": + number = self.numbers[0] + if len(self.numbers) > 1: + self.numbers = self.numbers[1::] + self.processedTokens.append(f"[{TokenId}: {self.Tokens[element]} -> {number}] {lineNumber}") + else: + self.processedTokens.append(f"[{TokenId}: {self.Tokens[element]} -> {element}] {lineNumber}") + + TokenId += 1 + lineNumber += 1 + + def Scan(self): + with open(self.targetFileName, "r") as f: + self.lexerLines = f.readlines() + + self.lexerLines = self.__getShortStrings(self.lexerLines) + self.lexerLines = self.__enforceStructure(self.lexerLines) + self.lexerLines = self.__getUserDefinedNames(self.lexerLines) + self.lexerLines = self.__getNumbers(self.lexerLines) + + if self.lexerLines == []: + return [], False + + self.__generateTokens(self.lexerLines) + + return self.processedTokens, True + + def printTokens(self): + print("Tokens:") + for token in self.processedTokens: + print(token) + print("="*50) + + def printCompilerLines(self): + print("Compiler lines: ") + for line in self.lexerLines: + print(line) + print("="*50) diff --git a/src/Parser.py b/src/Parser.py new file mode 100644 index 0000000..73c0e1d --- /dev/null +++ b/src/Parser.py @@ -0,0 +1,390 @@ +# Author: 0xL0RD + +from xml.etree import ElementTree as eTree + + +class Node: + value: str + children: list + parent: any + subValue: str + lineNumber: str + ID: str + scopeID: str + parent_scope_id: str + semantic_id: str + data_type: str + closer = None + + def __init__(self, value, children=None): + if children is None: + children = [] + self.value = value + self.children = children + self.parent = None + self.lineNumber = "" + self.scopeID = "" + self.closer = None + self.semantic_id = "" + self.data_type = "U" + + +class SyntaxTree: + root: Node + terminalTokens: list + ID = 0 + fileName: str + + specialCases = { + ":=": "AssignmentOperator", + "{": "OpenBraceCurly", + "}": "CloseBraceCurly", + "(": "OpenBraceRound", + ")": "CloseBraceRound", + "[": "OpenBraceRect", + "]": "CloseBraceRect", + ";": "InstrEnd", + ",": "DelimeterComma" + } + + def __init__(self, rootNode: Node, terminals): + self.root = rootNode + self.terminalTokens = terminals + + def createXMLTree(self, parent, node: Node): + if parent == None: + nodeElement = eTree.Element(node.value) + else: + if node.value in self.terminalTokens: + if node.value in self.specialCases.keys(): + nodeElement = eTree.SubElement(parent, self.specialCases[node.value]) + else: + nodeElement = eTree.SubElement(parent, node.value) + + if (node.value == "Number" or node.value == "ShortString" or node.value == "UserDefinedName"): + nodeElement.text = f"{node.subValue}" + else: + nodeElement.text = node.value + + if (node.lineNumber != ""): + nodeElement.set("lineNumber", node.lineNumber) + else: + elementName = node.value + if elementName == "ε": + elementName = "EPSILON" + elif elementName == "VAR'": + elementName = "FIELDPRIME" + elif elementName == "VAR": + elementName = "FIELD" + + nodeElement = eTree.SubElement(parent, elementName) + + nodeElement.set("id", str(self.ID)) + node.ID = self.ID + + self.ID += 1 + + for child in node.children: + self.createXMLTree(nodeElement, child) + + if parent == None: + data = eTree.tostring(nodeElement) + + with open(self.fileName, "wb+") as f: + f.write(data) + + +class Parser: + Table: dict = {} + Tree: SyntaxTree + Stack = list + srcFile: str + + def __init__(self, sourcefile): + self.srcFile = sourcefile + defualtObj = { + "SPL": "", + "SPLProgr": "", + "ProcDefs": "", + "PD": "", + "Algorithm": "", + "Instr": "", + "Assign": "", + "Branch": "", + "Alternat": "", + "Loop": "", + "LHS": "", + "Expr": "", + "VAR": "", + "VAR'": "", + "PCall": "", + "Const": "", + "UnOp": "", + "BinOp": "", + "VarDecl": "", + "Dec": "", + "TYP": "", + } + + self.Table["main"] = defualtObj.copy() + self.Table["main"]["SPL"] = "SPLProgr" + self.Table["main"]["SPLProgr"] = "ProcDefs main { Algorithm halt ; VarDecl }" + self.Table["main"]["ProcDefs"] = "ε" + + self.Table["{"] = defualtObj.copy() + + self.Table["halt"] = defualtObj.copy() + self.Table["halt"]["Algorithm"] = "ε" + + self.Table[";"] = defualtObj.copy() + self.Table[";"]["Alternat"] = "ε" + self.Table[";"]["VAR"] = "ε" + + self.Table["}"] = defualtObj.copy() + self.Table["}"]["Algorithm"] = "ε" + self.Table["}"]["VarDecl"] = "ε" + + self.Table[","] = defualtObj.copy() + self.Table[","]["VAR"] = "ε" + self.Table[","]["ProcDefs"] = "ε" + + self.Table["proc"] = defualtObj.copy() + self.Table["proc"]["SPL"] = "SPLProgr" + self.Table["proc"]["SPLProgr"] = "ProcDefs main { Algorithm halt ; VarDecl }" + self.Table["proc"]["ProcDefs"] = "PD , ProcDefs" + self.Table["proc"]["PD"] = "proc UserDefinedName { ProcDefs Algorithm return ; VarDecl }" + + self.Table["UserDefinedName"] = defualtObj.copy() + self.Table["UserDefinedName"]["ProcDefs"] = "ε" + self.Table["UserDefinedName"]["Algorithm"] = "Instr ; Algorithm" + self.Table["UserDefinedName"]["Instr"] = "Assign" + self.Table["UserDefinedName"]["Assign"] = "LHS := Expr" + self.Table["UserDefinedName"]["LHS"] = "UserDefinedName VAR" + self.Table["UserDefinedName"]["Expr"] = "UserDefinedName VAR" + self.Table["UserDefinedName"]["VAR'"] = "UserDefinedName ]" + + self.Table["return"] = defualtObj.copy() + self.Table["return"]["Algorithm"] = "ε" + self.Table["return"]["ProcDefs"] = "ε" + + self.Table[":="] = defualtObj.copy() + self.Table[":="]["VAR"] = "ε" + + self.Table["if"] = defualtObj.copy() + self.Table["if"]["ProcDefs"] = "ε" + self.Table["if"]["Algorithm"] = "Instr ; Algorithm" + self.Table["if"]["Instr"] = "Branch" + self.Table["if"]["Branch"] = "if ( Expr ) then { Algorithm } Alternat" + + self.Table["("] = defualtObj.copy() + + self.Table[")"] = defualtObj.copy() + self.Table[")"]["VAR"] = "ε" + + self.Table["then"] = defualtObj.copy() + + self.Table["else"] = defualtObj.copy() + self.Table["else"]["Alternat"] = "else { Algorithm }" + + self.Table["do"] = defualtObj.copy() + self.Table["do"]["ProcDefs"] = "ε" + self.Table["do"]["Algorithm"] = "Instr ; Algorithm" + self.Table["do"]["Instr"] = "Loop" + self.Table["do"]["Loop"] = "do { Algorithm } until ( Expr )" + + self.Table["until"] = defualtObj.copy() + + self.Table["while"] = defualtObj.copy() + self.Table["while"]["ProcDefs"] = "ε" + self.Table["while"]["Algorithm"] = "Instr ; Algorithm" + self.Table["while"]["Instr"] = "Loop" + self.Table["while"]["Loop"] = "while ( Expr ) do { Algorithm }" + + self.Table["output"] = defualtObj.copy() + self.Table["output"]["ProcDefs"] = "ε" + self.Table["output"]["Algorithm"] = "Instr ; Algorithm" + self.Table["output"]["Instr"] = "Assign" + self.Table["output"]["Expr"] = "LHS := Expr" + self.Table["output"]["Loop"] = "while ( Expr ) do { Algorithm }" + self.Table["output"]["LHS"] = "output" + self.Table["output"]["Assign"] = "LHS := Expr" + + self.Table["["] = defualtObj.copy() + self.Table["["]["VAR"] = "[ VAR'" + + self.Table["]"] = defualtObj.copy() + + self.Table["call"] = defualtObj.copy() + self.Table["call"]["ProcDefs"] = "ε" + self.Table["call"]["Algorithm"] = "Instr ; Algorithm" + self.Table["call"]["Instr"] = "PCall" + self.Table["call"]["PCall"] = "call UserDefinedName" + + self.Table["ShortString"] = defualtObj.copy() + self.Table["ShortString"]["Expr"] = "Const" + self.Table["ShortString"]["VAR'"] = "Const ]" + self.Table["ShortString"]["Const"] = "ShortString" + + self.Table["Number"] = defualtObj.copy() + self.Table["Number"]["Expr"] = "Const" + self.Table["Number"]["VAR'"] = "Const ]" + self.Table["Number"]["Const"] = "Number" + + self.Table["true"] = defualtObj.copy() + self.Table["true"]["Expr"] = "Const" + self.Table["true"]["VAR'"] = "Const ]" + self.Table["true"]["Const"] = "true" + + self.Table["false"] = defualtObj.copy() + self.Table["false"]["Expr"] = "Const" + self.Table["false"]["VAR'"] = "Const ]" + self.Table["false"]["Const"] = "false" + + self.Table["input"] = defualtObj.copy() + self.Table["input"]["Expr"] = "UnOp" + self.Table["input"]["UnOp"] = "input ( UserDefinedName )" + + self.Table["not"] = defualtObj.copy() + self.Table["not"]["Expr"] = "UnOp" + self.Table["not"]["UnOp"] = "not ( Expr )" + + self.Table["and"] = defualtObj.copy() + self.Table["and"]["Expr"] = "BinOp" + self.Table["and"]["BinOp"] = "and ( Expr , Expr )" + + self.Table["or"] = defualtObj.copy() + self.Table["or"]["Expr"] = "BinOp" + self.Table["or"]["BinOp"] = "or ( Expr , Expr )" + + self.Table["eq"] = defualtObj.copy() + self.Table["eq"]["Expr"] = "BinOp" + self.Table["eq"]["BinOp"] = "eq ( Expr , Expr )" + + self.Table["larger"] = defualtObj.copy() + self.Table["larger"]["Expr"] = "BinOp" + self.Table["larger"]["BinOp"] = "larger ( Expr , Expr )" + + self.Table["add"] = defualtObj.copy() + self.Table["add"]["Expr"] = "BinOp" + self.Table["add"]["BinOp"] = "add ( Expr , Expr )" + + self.Table["sub"] = defualtObj.copy() + self.Table["sub"]["Expr"] = "BinOp" + self.Table["sub"]["BinOp"] = "sub ( Expr , Expr )" + + self.Table["mult"] = defualtObj.copy() + self.Table["mult"]["Expr"] = "BinOp" + self.Table["mult"]["BinOp"] = "mult ( Expr , Expr )" + + self.Table["arr"] = defualtObj.copy() + self.Table["arr"]["VarDecl"] = "Dec ; VarDecl" + self.Table["arr"]["Dec"] = "arr TYP [ Const ] UserDefinedName" + + self.Table["num"] = defualtObj.copy() + self.Table["num"]["VarDecl"] = "Dec ; VarDecl" + self.Table["num"]["Dec"] = "TYP UserDefinedName" + self.Table["num"]["TYP"] = "num" + + self.Table["bool"] = defualtObj.copy() + self.Table["bool"]["VarDecl"] = "Dec ; VarDecl" + self.Table["bool"]["Dec"] = "TYP UserDefinedName" + self.Table["bool"]["TYP"] = "bool" + + self.Table["string"] = defualtObj.copy() + self.Table["string"]["VarDecl"] = "Dec ; VarDecl" + self.Table["string"]["Dec"] = "TYP UserDefinedName" + self.Table["string"]["TYP"] = "string" + + def __reportError(self, input, expected): + print(f"\t*************[ Syntax Error ]*************") + FOUND = input["type"] + if FOUND == "": + FOUND = input["value"] + + line: str + + with open(self.srcFile, "r") as f: + lines = f.readlines() + line = lines[int(input['line']) - 1] + + print( + f"\t\tFound: {FOUND}\n\t\tIn sequence: \'{line.strip()}\' on line {input['line']}\n\t\tExpected: {expected.value}\n") + + def __createNodes(self, elements: list) -> list: + nodes: list = [] + + for element in elements: + nodes.append(Node(element)) + + return nodes + + def __addChildren(self, parent: Node, children: list): + for child in children: + child.parent = parent + + parent.children = children + + def parse(self, tokens) -> bool: + input = [] + + for token in tokens: + Type = token.split(" -> ")[0][token.find(": ") + 2::] + lineNumber = token.split(" -> ")[1].split("] ")[1].strip("\n") + rToken = token.split(" -> ")[1].split("] ")[0] + + if Type == "Number" or Type == "UserDefinedName" or Type == "ShortString": + input.append({ + "type": Type, + "line": lineNumber, + "value": f"{rToken}" + }) + + else: + input.append({ + "type": rToken, + "line": lineNumber, + "value": "" + }) + + input = list(reversed(input)) + first = Node("SPL") + self.Tree = SyntaxTree(first, self.Table.keys()) + self.Stack = [first] + + while len(self.Stack) > 0 and len(input) > 0: + top = len(self.Stack) - 1 + iTop = len(input) - 1 + + if (self.Stack[top].value in self.Table.keys()): + if (self.Stack[top].value == input[iTop]["type"]): + if (input[iTop]["type"] == "Number" or input[iTop]["type"] == "ShortString" or input[iTop][ + "type"] == "UserDefinedName"): + self.Stack[top].subValue = input[iTop]["value"] + self.Stack[top].lineNumber = input[iTop]["line"] + input.pop() + self.Stack.pop() + else: + self.__reportError(input.pop(), self.Stack.pop()) + return False + elif (self.Table[(input[iTop]["type"])][self.Stack[top].value] == ""): + self.__reportError(input[iTop], self.Stack[top]) + return False + else: + nonTerminal = self.Stack.pop() + MatchedValueAsStackElements = list( + reversed(self.Table[input[iTop]["type"]][nonTerminal.value].split(" "))) + children = self.__createNodes(MatchedValueAsStackElements) + self.__addChildren(nonTerminal, list(reversed(children))) + if children[0].value != "ε": + self.Stack.extend(children) + + if len(self.Stack) != len(input): + with open(self.srcFile, "r") as f: + allLines = f.readlines() + self.__reportError({"type": "EOF", "line": str(len(allLines))}, self.Stack.pop()) + return False + return True + + def saveSyntaxTreeXML(self, filename): + self.Tree.fileName = filename + self.Tree.createXMLTree(None, self.Tree.root) diff --git a/src/SemanticAnalyzer.py b/src/SemanticAnalyzer.py new file mode 100644 index 0000000..3e79e41 --- /dev/null +++ b/src/SemanticAnalyzer.py @@ -0,0 +1,279 @@ +# Author: 0xL0RD + +from Parser import Node, SyntaxTree +from SemanticTable import SemanticTable +from SymanticRuleInspector import SemanticRuleEnforcer +from TypeChecker import TypeChecker + + +class SemanticAnalyzer: + initialTree: SyntaxTree + ScopeTree: SyntaxTree + ScopeID: int + ProcID: int + VarID: int + SemanticID: int + discoveredIds: dict = {} + semantic_table: SemanticTable + source_file_name: str + rule_enforcer: SemanticRuleEnforcer + type_checker: TypeChecker + + def __init__(self, initialSyntaxTree: SyntaxTree, sourceFile): + self.initialTree = initialSyntaxTree + self.semantic_table = SemanticTable() + self.ScopeID = 0 + self.VarID = 0 + self.ProcID = 0 + self.SemanticID = 0 + self.source_file_name = sourceFile + + def __discoveredIDsContains(self, Scope: Node) -> bool: + for Ids in self.discoveredIds.keys(): + start, end = Ids + if Scope.ID == start and Scope.closer.ID == end: + return True + return False + + def __assignParent(self, newScope: Node): + n_scope_start = newScope.ID + n_scope_end = newScope.closer.ID + match = False + bestMatch = None + bestDifference = None + if not self.__discoveredIDsContains(newScope): + for Ids in self.discoveredIds.keys(): + start, end = Ids + n_scope_start = newScope.ID + n_scope_end = newScope.closer.ID + if int(start) < int(n_scope_start) and int(end) > int(n_scope_end): + difference = (int(n_scope_start) - int(start)) + (int(end) - int(n_scope_end)) + if bestMatch is None: + bestMatch = Ids + bestDifference = difference + match = True + else: + if difference < bestDifference: + bestMatch = Ids + bestDifference = difference + if not match: + self.ScopeTree.root.children.append(newScope) + newScope.parent = self.ScopeTree.root + newScope.parent_scope_id = "0" + else: + parent = self.discoveredIds[bestMatch] + newScope.parent_scope_id = self.discoveredIds[bestMatch].scopeID + parent.children.append(newScope) + newScope.parent = parent + newScope.scopeID = str(self.ScopeID) + self.semantic_table.add_scope(newScope.value, newScope.ID, newScope.scopeID, newScope.parent_scope_id) + self.ScopeID += 1 + self.discoveredIds.update({(n_scope_start, n_scope_end): newScope}) + + def __searchForScopeIdentifiers(self, current: Node): + todo = [current] + done = [] + while len(todo) > 0: + todo.remove(current) + done.append(current) + for child in current.children: + if child.value == "{" and child.parent.value == "PD": + prev = current.children.index(child) - 1 + if prev > -1: + try: + newScopeNode = Node(current.children[prev].subValue, []) + except: + newScopeNode = Node(current.children[prev].value, []) + else: + newScopeNode = Node("ArbitraryScope", []) + + newScopeNode.ID = current.children[current.children.index(child) - 1].ID + newStart = current.children.index(child) + 1 + for sChild in current.children[newStart::]: + if sChild.value == "}": + closer = Node(sChild.value, []) + closer.ID = sChild.ID + newScopeNode.closer = closer + break + self.__assignParent(newScopeNode) + current.children[prev].scopeID = newScopeNode.scopeID + if child not in done: + todo.append(child) + + if len(todo) > 0: + current = todo[0] + + def __recursiveMainScopeSearch(self, current: Node): + mainFound = False + for child in current.children: + newStart = current.children.index(child) + 1 + if child.value == "main" and current.children[newStart].value == "{": + newRootNode = Node(child.value, []) + for cChild in current.children[newStart + 1::]: + if cChild.value == "}": + newlyDiscoveredScope = (current.children[newStart].ID, cChild.ID) + newRootNode.closer = cChild + newRootNode.ID = child.ID + newRootNode.scopeID = str(self.ScopeID) + self.semantic_table.add_scope(newRootNode.value, newRootNode.ID, newRootNode.scopeID, "-") + self.ScopeID += 1 + self.discoveredIds.update({newlyDiscoveredScope: newRootNode}) + self.ScopeTree = SyntaxTree(newRootNode, self.initialTree.terminalTokens) + return newRootNode + + if not mainFound: + for child in current.children: + result = self.__recursiveMainScopeSearch(child) + if result: + return result + + def __findScopes(self): + self.__recursiveMainScopeSearch(self.initialTree.root) + self.__searchForScopeIdentifiers(self.initialTree.root) + + @staticmethod + def __check_parent_for_user_defined_name(c_node: Node): + target_parent_found = False + parent = c_node.parent + while not target_parent_found: + if parent is not None: + if parent.value == "PD" and parent != c_node.parent: + target_parent_found = True + else: + parent = parent.parent + else: + return False + if parent.children[1].value == "UserDefinedName" and parent.children[1].subValue == c_node.subValue: + return True + return False + + def __getNodeScope(self, current: Node, is_proc=False): + bestMatch = None + bestDifference = None + for Ids in self.discoveredIds.keys(): + start, end = Ids + if int(start) < int(current.ID) < int(end): + difference = (int(current.ID) - int(start)) + (int(end) - int(current.ID)) + + if current.value != "main": + if not is_proc: + existing_entry_of_this_scope = self.semantic_table.get_variable_with(current.subValue, + self.discoveredIds[ + Ids].scopeID) + if current.parent.value != "Decl": + if existing_entry_of_this_scope is not None and current.semantic_id == "": + current.semantic_id = existing_entry_of_this_scope["SemanticID"] + + if bestMatch is None: + bestMatch = Ids + bestDifference = difference + else: + if difference < bestDifference: + bestMatch = Ids + bestDifference = difference + if bestMatch is None: + if current.scopeID == "": + current.scopeID = "0" + else: + if current.scopeID == "": + current.scopeID = self.discoveredIds[bestMatch].scopeID + if is_proc: + # Inner scope + existing_entry_of_this_scope = self.semantic_table.get_procedure_with(current.subValue, + self.discoveredIds[ + bestMatch].scopeID) + if existing_entry_of_this_scope is not None: + self.__report_semantic_error(current, + f"Redefinition of existing procedure within the same scope.\n\t\tProc: {existing_entry_of_this_scope['Token']}") + + # Parent scope. + if self.__check_parent_for_user_defined_name(current): + self.__report_semantic_error(current, + f"Redefinition of parent procedure.\n\t\tProc: {current.subValue}") + + if not is_proc: + existing_entry_of_this_scope = self.semantic_table.get_variable_with(current.subValue, + self.discoveredIds[bestMatch].scopeID) + if existing_entry_of_this_scope is not None: + self.__report_semantic_error(current, + f"Redeclaration of variable '{current.subValue}' within same scope.") + + if current.semantic_id == "": + current.semantic_id = str(self.SemanticID) + self.SemanticID += 1 + + @staticmethod + def __is_array(current: Node): + if current.parent.children[0].value == "arr": + return True + return False + + @staticmethod + def __getType(current: Node): + parent = current.parent + + for child in parent.children: + if child.value == "TYP": + return child.children[0].value + + raise "Critical error - type not found. Possible hacking detected." + + def __recursiveTableBuild(self, current: Node): + if current.value == "UserDefinedName": + if current.parent.value == "PD": + self.__getNodeScope(current, True) + self.semantic_table.add_procedure_entry(current.subValue, current.scopeID, current.ID, + current.semantic_id) + else: + self.__getNodeScope(current, False) + if current.parent.value == "Dec": + type_ = self.__getType(current) + if self.__is_array(current): + size_node = current.parent.children[3] + if size_node.value == "Const": + size_ = size_node.children[0].subValue + else: + size_ = size_node.subValue + self.semantic_table.add_variable_entry(current.subValue + "[]", current.scopeID, current.ID, + current.semantic_id, type_, size_) + else: + self.semantic_table.add_variable_entry(current.subValue, current.scopeID, current.ID, + current.semantic_id, type_) + elif current.value == "main": + self.__getNodeScope(current, True) + self.semantic_table.add_procedure_entry(current.value, current.scopeID, current.ID, current.semantic_id) + + for child in current.children: + self.__recursiveTableBuild(child) + + def __report_semantic_error(self, erroneous_node: Node, message: str): + lines: list + with open(self.source_file_name, "r") as f: + lines = f.readlines() + + raise Exception( + f"{message}\n\t\tLine {erroneous_node.lineNumber}: {lines[int(erroneous_node.lineNumber) - 1].strip()}") + + def saveScopeTreeXML(self, fileName: str): + self.ScopeTree.fileName = fileName + self.ScopeTree.createXMLTree(None, self.ScopeTree.root) + + def Scan(self): + self.__findScopes() + self.__recursiveTableBuild(self.initialTree.root) + self.rule_enforcer = SemanticRuleEnforcer(self.semantic_table, self.initialTree) + rule_inspection = self.rule_enforcer.procedure_rule_inspection() + if rule_inspection is not None: + self.__report_semantic_error(rule_inspection, + f"APPL-DECL Error - call made to out-of-scope or non-existing procedure '{rule_inspection.subValue}'") + rule_inspection = self.rule_enforcer.variable_rule_inspection() + if rule_inspection is not None: + self.__report_semantic_error(rule_inspection, + f"APPL-DECL Error - variable '{rule_inspection.subValue}' is referenced but is never declared.") + + self.type_checker = TypeChecker(self.semantic_table, self.initialTree, self.source_file_name) + self.type_checker.check() + return True + + def print_symbol_table(self): + self.semantic_table.print_table() diff --git a/src/SemanticTable.py b/src/SemanticTable.py new file mode 100644 index 0000000..8c435c8 --- /dev/null +++ b/src/SemanticTable.py @@ -0,0 +1,207 @@ +# Author: 0xL0RD + +class SemanticTable: + Table = { + "Variables": [], + "Procedures": [], + "Scopes": [], + } + + def add_variable_entry(self, user_defined_name: str, scope_id: str, variable_id: str, semantic_id: str, + var_type: str, size="TBA"): + if not self.__variable_contains(user_defined_name, scope_id): + self.Table["Variables"].append({ + "Token": user_defined_name, + "VariableID": variable_id, + "Type": var_type, + "ScopeID": scope_id, + "SemanticID": semantic_id, + "Size": size + }) + + def add_procedure_entry(self, user_defined_name: str, scope_id: str, proc_id: str, semantic_id: str): + if not self.__procedure_contains(user_defined_name, scope_id): + self.Table["Procedures"].append({ + "Token": user_defined_name, + "ProcID": proc_id, + "ScopeID": scope_id, + "SemanticID": semantic_id + }) + + def add_scope(self, token: str, token_id: str, scope_id: str, parent_s: str): + self.Table["Scopes"].append({ + "Token": token, + "TokenID": token_id, + "ScopeID": scope_id, + "ParentScopeID": parent_s + }) + + def get_ancestor_scopes(self, scope_id: str): + target_scope_id = scope_id + ancestor_scopes = [target_scope_id] + while target_scope_id != "-": + for scope in self.Table["Scopes"]: + if scope["ScopeID"] == target_scope_id: + target_scope_id = scope["ParentScopeID"] + ancestor_scopes.append(target_scope_id) + break + return ancestor_scopes + + def get_nearest_ancestor_scope(self, scope_id: str): + scopes = self.get_ancestor_scopes(scope_id) + scopes.remove("-") + int_scopes = [int(scope) for scope in scopes] + int_scopes.sort() + return str(int_scopes.pop()) + + def get_successor_scopes(self, scope_id: str): + target_scope_id = scope_id + successor_scopes = [target_scope_id] + child_scopes = [] + target_scope = scope_id + while target_scope is not None: + for scope in self.Table["Scopes"]: + if scope["ParentScopeID"] == target_scope_id and scope["ScopeID"] not in successor_scopes: + successor_scopes.append(scope["ScopeID"]) + child_scopes.append(scope["ScopeID"]) + target_scope_id = scope["ParentScopeID"] + if len(child_scopes) < 1: + target_scope = None + else: + target_scope = child_scopes[0] + child_scopes = child_scopes[1::] + return successor_scopes + + def get_callable_procs_from(self, scope_id: str): + callable_funcs = [] + parent_scope_id = None + + for scope in self.Table["Scopes"]: + if scope["Token"] not in callable_funcs: + if scope["ParentScopeID"] == scope_id or scope["ScopeID"] == scope_id: + callable_funcs.append(scope["Token"]) + elif scope["ScopeID"] == scope_id: + callable_funcs.append(scope["Token"]) + parent_scope_id = scope["ParentScopeID"] + + if parent_scope_id != "0": + for scope in self.Table["Scopes"]: + if scope["ScopeID"] == parent_scope_id: + callable_funcs.append(scope["Token"]) + + return callable_funcs + + def __procedure_contains(self, user_defined_name: str, scope_id: str): + for proc in self.Table["Procedures"]: + if proc["Token"] == user_defined_name and proc["ScopeID"] == scope_id: + return True + return False + + def __variable_contains(self, user_defined_name: str, scope_id: str): + for var in self.Table["Variables"]: + if var["Token"] == user_defined_name and var["ScopeID"] == scope_id: + return True + return False + + def get_variable_with(self, user_defined_name: str, scope_id: str) -> dict: + if not self.__variable_contains(user_defined_name, scope_id): + return None + else: + for entry in self.Table["Variables"]: + if entry["Token"] == user_defined_name and entry["ScopeID"] == scope_id: + return entry + + def get_variable_by_name(self, udn: str): + for entry in self.Table["Variables"]: + if entry["Token"] == udn: + return entry + return None + + def get_nearest_variable(self, udn: str, scope_id: str): + lowest_difference = None + nearest = None + for entry in self.Table["Variables"]: + difference = abs(int(entry["ScopeID"]) - int(scope_id)) + if entry["Token"] == udn: + if lowest_difference is not None: + if lowest_difference > difference: + lowest_difference = difference + nearest = entry + else: + lowest_difference = difference + nearest = entry + return nearest + + def get_procedure_by_name(self, udn: str): + for entry in self.Table["Procedures"]: + if entry["Token"] == udn: + return entry + return None + + def get_nearest_procedure(self, udn: str, scope_id: str): + lowest_difference = None + nearest = None + for entry in self.Table["Procedures"]: + difference = abs(int(entry["ScopeID"]) - int(scope_id)) + if entry["Token"] == udn: + if lowest_difference is not None: + if lowest_difference > difference: + lowest_difference = difference + nearest = entry + else: + lowest_difference = difference + nearest = entry + return nearest + + def get_procedure_with(self, user_defined_name: str, scope_id: str) -> dict: + if not self.__procedure_contains(user_defined_name, scope_id): + return None + else: + for entry in self.Table["Procedures"]: + if entry["Token"] == user_defined_name and entry["ScopeID"] == scope_id: + return entry + + def get_valid_call_scopes(self, proc_scope_id: str): + valid_call_scopes = [proc_scope_id] + for scope in self.Table["Scopes"]: + if scope["ScopeID"] not in valid_call_scopes and scope["ParentScopeID"] == proc_scope_id: + valid_call_scopes.append(scope["ScopeID"]) + elif scope["ScopeID"] == proc_scope_id and scope["ParentScopeID"] not in valid_call_scopes: + valid_call_scopes.append(scope["ParentScopeID"]) + + return valid_call_scopes + + def print_table(self): + print("Procedures:") + print(f"Token\t\t\tProcedure ID\t\tScope ID\tSemantic ID") + print("_" * 100) + for proc in self.Table["Procedures"]: + padding = "" + if len(proc["Token"]) < 8: + padding = " " * (8 - len(proc['Token'])) + print(f"{proc['Token'] + padding}\t\t{proc['ProcID']}\t\t\t\t\t{proc['ScopeID']}\t\t\t{proc['SemanticID']}") + print("_" * 100) + print("\nVariables:") + print(f"Token\t\t\t\tVariable ID\t\t\tType\t\t\tScope ID\tSemantic ID") + print("_" * 100) + for var in self.Table["Variables"]: + padding = "" + typ_padding = "" + if len(var["Token"]) < 10: + padding = " " * (10 - len(var['Token'])) + if len(var["Type"]) < 8: + typ_padding = " " * (8 - len(var["Type"])) + print( + f"{var['Token'] + padding}\t\t\t{var['VariableID']}\t\t\t\t\t{var['Type'] + typ_padding}\t\t{var['ScopeID']}\t\t\t{var['SemanticID']}") + print("_" * 100) + print("\nScopes:") + print(f"Token\t\t\t\t\tToken ID\t\t\tScope ID\tParent Scope ID") + print("_" * 100) + for sco in self.Table["Scopes"]: + padding = "" + if len(sco["Token"]) < 12: + padding = " " * (12 - len(sco['Token'])) + print( + f"{sco['Token'] + padding}\t\t\t{sco['TokenID']}\t\t\t\t\t{sco['ScopeID']}\t\t\t{sco['ParentScopeID']}") + print("_" * 100) + print("=" * 100) diff --git a/src/SymanticRuleInspector.py b/src/SymanticRuleInspector.py new file mode 100644 index 0000000..1a6373e --- /dev/null +++ b/src/SymanticRuleInspector.py @@ -0,0 +1,117 @@ +# Author: 0xL0RD + +from SemanticTable import SemanticTable +from Parser import SyntaxTree, Node + + +class SemanticRuleEnforcer: + sem_table: SemanticTable + initial_tree: SyntaxTree + + def __init__(self, st: SemanticTable, it: SyntaxTree): + self.sem_table = st + self.initial_tree = it + + def __is_array(self, current: Node): + immediate_right_child_position = current.parent.children.index(current) + 1 + immediate_right_child = current.parent.children[immediate_right_child_position] + if immediate_right_child.value == "VAR": + if len(immediate_right_child.children) > 0: + if immediate_right_child.children[0].value == "[": + return True + return False + + def __appl_decl_var_check(self, n: Node): + if n.value == "UserDefinedName": + if n.parent.value != "PD" and n.parent.value != "Dec" and n.parent.value != "PCall": + target_var = n.subValue + if self.__is_array(n): + target_var += "[]" + var_entry = self.sem_table.get_variable_by_name(target_var) + if var_entry is None: + return n + else: + if var_entry["ScopeID"] not in self.sem_table.get_ancestor_scopes(var_entry["ScopeID"]): + return n + + for child in n.children: + lower_check = self.__appl_decl_var_check(child) + if lower_check is not None: + return lower_check + + return None + + def __appl_decl_proc_check(self, n: Node): + if n.value == "PCall": + target_node = n.children[1] + target_procedure = n.children[1].subValue + + if target_procedure == "main": + raise Exception("Critical Error - 'main' is not callable from within the program.") + + callable_funcs = self.sem_table.get_callable_procs_from(target_node.scopeID) + + if target_procedure not in callable_funcs: + return target_node + + for child in n.children: + lower_check = self.__appl_decl_proc_check(child) + if not lower_check is None: + return lower_check + + return None + + def __matching_proc_call_exists(self, n: Node, proc: str, scope_ids: str): + if n.value == "PCall": + target_proc = n.children[1].subValue + if target_proc == proc and n.children[1].scopeID in scope_ids: + return True + for child in n.children: + lower_check = self.__matching_proc_call_exists(child, proc, scope_ids) + if lower_check: + return lower_check + return False + + def __variable_is_used(self, current: Node, udn: str, scope_id: str): + if current.value == "UserDefinedName": + if current.parent.value != "PD" and current.parent.value != "Dec" and current.parent.value != "PCall": + var_name = current.subValue + if self.__is_array(current): + var_name += "[]" + if scope_id in self.sem_table.get_ancestor_scopes(current.scopeID) and var_name == udn: + return True + + for child in current.children: + lower_check = self.__variable_is_used(child, udn, scope_id) + if lower_check: + return lower_check + return False + + def __decl_appl_var_check(self): + all_declared_variables = self.sem_table.Table["Variables"] + for variable in all_declared_variables: + var_is_used = self.__variable_is_used(self.initial_tree.root, variable["Token"], variable["ScopeID"]) + if not var_is_used: + raise Exception( + f"DECL-APPL Error - variable \'{variable['Token']}\' declared but never used.\n") + + def __decl_appl_proc_check(self): + all_declared_procedures = self.sem_table.Table["Procedures"] + for procedure in all_declared_procedures: + valid_scopes = self.sem_table.get_valid_call_scopes(procedure["ScopeID"]) + call_made = self.__matching_proc_call_exists(self.initial_tree.root, procedure["Token"], valid_scopes) + if not call_made and procedure["Token"] != "main": + raise Exception( + f"DECL-APPL Error - procedure \'{procedure['Token']}\' declared but never called.\n") + + def procedure_rule_inspection(self): + appl_decl = self.__appl_decl_proc_check(self.initial_tree.root) + if not appl_decl is None: + return appl_decl + self.__decl_appl_proc_check() + + def variable_rule_inspection(self): + appl_decl = self.__appl_decl_var_check(self.initial_tree.root) + if appl_decl is not None: + return appl_decl + self.__decl_appl_var_check() diff --git a/src/Translator.py b/src/Translator.py new file mode 100644 index 0000000..42dd657 --- /dev/null +++ b/src/Translator.py @@ -0,0 +1,471 @@ +# Author: 0xL0RD + +from Parser import SyntaxTree, Node +from SemanticTable import SemanticTable + + +class CodeGenerator: + s_tree: SyntaxTree + s_table: SemanticTable + id_table: dict + final_output: list + main_entry: str + symbolic_label_table: dict = { + "Procedures": [], + "Variables": [] + } + var_naming_cipher_one = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + var_naming_cipher_two = "0123456789" + first_char_pos = 0 + second_char_pos = 0 + num_char_pos = 0 + + FALSE = "0" + TRUE = "-1" + + def __init__(self, syn_tree: SyntaxTree, sem_table: SemanticTable, id_dict: dict): + self.s_tree = syn_tree + self.s_table = sem_table + self.id_table = id_dict + self.__create_symbolic_label_table() + + def __output_file(self, file_n: str): + with open(file_n, "w+") as f: + for lines in self.final_output: + f.write(lines + "\n") + + def __get_label(self): + var_name = self.var_naming_cipher_one[self.first_char_pos] + self.var_naming_cipher_one[self.second_char_pos] + var_name += self.var_naming_cipher_two[self.num_char_pos] + self.num_char_pos += 1 + + if self.num_char_pos == 9: + self.second_char_pos += 1 + if self.second_char_pos == 25: + self.first_char_pos += 1 + self.second_char_pos = 0 + self.num_char_pos = 0 + + if var_name in ["ZZ98", "ZZ99"]: + raise Exception("Too many variables to compile.") + + return var_name + + def __create_symbolic_label_table(self): + # Functions. + for proc in self.s_table.Table["Procedures"]: + self.symbolic_label_table["Procedures"].append({ + "Label": self.__get_label(), + "Line": "", + "Name": proc["Token"], + "ScopeID": proc["ScopeID"], # Remember, this scopeID represents the scope in which it is defined. + "ProcID": proc["ProcID"] + }) + + # Variables. + for var in self.s_table.Table["Variables"]: + self.symbolic_label_table["Variables"].append({ + "Label": self.__get_label() if var["Type"] != "string" else self.__get_label() + "$", + "Type": var["Type"], + "Name": var["Token"], + "ScopeID": var["ScopeID"], + "Size": var["Size"] + }) + + def __user_defined_name(self, n: Node, scope_id): + label = self.__get_var_by_name_and_scope_id(n.subValue, scope_id) + if not label: + label = self.__get_var_by_name_and_scope_id(n.subValue + "[]", scope_id) + parent = n.parent + next = parent.children.index(n) + 1 + var_sibling = parent.children[next] + if len(var_sibling.children) > 1: + subscript = self.__subscript(var_sibling.children[1], scope_id) + label += f"({subscript})" + return label + + def __const(self, n: Node): + value = n.children[0].value + if value == "true": + value = self.TRUE + elif value == "false": + value = self.FALSE + else: + value = n.children[0].subValue + + return value + + def __un_op(self, n: Node, scope_id: str, instr_list: list, is_assignment: bool = False, var_label=""): + op = n.children[0].value + expr_node = n.children[2] + + if op == "input": + containing_var = self.__user_defined_name(expr_node, scope_id) + instr_list.append(f"INPUT {containing_var}") + if is_assignment: + instr_list.append(f"LET {var_label} = {containing_var}") + return containing_var + + expr_one_label = self.__expr(expr_node, scope_id, instr_list) + + if not is_assignment: + var_label = self.__get_label() + + if op == "not": + instr_list.append(f"IF {expr_one_label} THEN GOTO +3") + + if var_label == "PRINT": + instr_list.append(f"{var_label} {self.TRUE}") + else: + instr_list.append(f"LET {var_label} = {self.TRUE}") + + instr_list.append(f"GOTO +2") + + if var_label == "PRINT": + instr_list.append(f"{var_label} {self.FALSE}") + else: + instr_list.append(f"LET {var_label} = {self.FALSE}") + + if not is_assignment: + return var_label + + def __bin_op(self, n: Node, scope_id: str, instr_list: list, is_assignment: bool = False, var_label=""): + op = n.children[0].value + expr_one = n.children[2] + expr_two = n.children[4] + + expr_one_label = self.__expr(expr_one, scope_id, instr_list) + expr_two_label = self.__expr(expr_two, scope_id, instr_list) + + if not is_assignment and var_label == "": + var_label = self.__get_label() + + if op == "and": + instr_list.append(f"IF {expr_one_label} THEN GOTO +2") + + instr_list.append(f"GOTO +2") + + if var_label == "PRINT": + instr_list.append(f"{var_label} {self.TRUE}") + else: + instr_list.append(f"IF {expr_two_label} THEN GOTO +3") + + if var_label == "PRINT": + instr_list.append(f"{var_label} {self.FALSE}") + else: + instr_list.append(f"LET {var_label} = {self.FALSE}") + + instr_list.append(f"GOTO +2") + + instr_list.append(f"LET {var_label} = {self.TRUE}") + + elif op == "or": + instr_list.append(f"IF {expr_one_label} THEN GOTO +4") + instr_list.append(f"IF {expr_two_label} THEN GOTO +3") + + if var_label == "PRINT": + instr_list.append(f"{var_label} {self.FALSE}") + else: + instr_list.append(f"LET {var_label} = {self.FALSE}") + + instr_list.append(f"GOTO +2") + + if var_label == "PRINT": + instr_list.append(f"{var_label} {self.TRUE}") + else: + instr_list.append(f"LET {var_label} = {self.TRUE}") + + elif op == "larger": + instr_list.append(f"IF {expr_one_label} > {expr_two_label} THEN GOTO +3") + + if var_label == "PRINT": + instr_list.append(f"{var_label} {self.FALSE}") + else: + instr_list.append(f"LET {var_label} = {self.FALSE}") + + instr_list.append(f"GOTO +2") + + if var_label == "PRINT": + instr_list.append(f"{var_label} {self.TRUE}") + else: + instr_list.append(f"LET {var_label} = {self.TRUE}") + + elif op == "eq": + instr_list.append(f"IF {expr_one_label} = {expr_two_label} THEN GOTO +3") + + if var_label == "PRINT": + instr_list.append(f"{var_label} {self.FALSE}") + else: + instr_list.append(f"LET {var_label} = {self.FALSE}") + + instr_list.append(f"GOTO +2") + + if var_label == "PRINT": + instr_list.append(f"{var_label} {self.TRUE}") + else: + instr_list.append(f"LET {var_label} = {self.TRUE}") + + elif op == "mult": + if var_label == "PRINT": + instr_list.append(f"{var_label} {expr_one_label} * {expr_two_label}") + else: + instr_list.append(f"LET {var_label} = {expr_one_label} * {expr_two_label}") + + + elif op == "sub": + if var_label == "PRINT": + instr_list.append(f"{var_label} {expr_one_label} - {expr_two_label}") + else: + instr_list.append(f"LET {var_label} = {expr_one_label} - {expr_two_label}") + + + elif op == "add": + if var_label == "PRINT": + instr_list.append(f"{var_label} {expr_one_label} + {expr_two_label}") + else: + instr_list.append(f"LET {var_label} = {expr_one_label} + {expr_two_label}") + + if not is_assignment: + return var_label + + def __subscript(self, n: Node, scope_id: str): + target_node = n.children[0] + if target_node.value == "Const": + return self.__const(target_node) + elif target_node.value == "UserDefinedName": + return self.__user_defined_name(target_node, scope_id) + + def __expr(self, n: Node, scope_id: str, instr_list: list, var_label: str = "", is_assign: bool = False): + body = n.children[0] + if body.value == "Const": + return self.__const(body) + elif body.value == "UserDefinedName": + return self.__user_defined_name(body, scope_id) + elif body.value == "BinOp": + return self.__bin_op(body, scope_id, instr_list, is_assign, var_label) + elif body.value == "UnOp": + return self.__un_op(body, scope_id, instr_list, is_assign, var_label) + + def __lhs(self, n: Node, scope_id: str): + return_value = "" + + if n.children[0].value == "UserDefinedName": + target_var = n.children[0].subValue + else: + target_var = n.children[0].value + + if target_var != "output": + label = self.__get_var_by_name_and_scope_id(n.children[0].subValue, scope_id) + if not label: + label = self.__get_var_by_name_and_scope_id(n.children[0].subValue + "[]", scope_id) + else: + return "PRINT" + + return_value += f"{label}" + + # We're dealing with an array. + if len(n.children[1].children) > 1 and label: + index = self.__subscript(n.children[1].children[1], scope_id) + return_value += f"({index})" + + return return_value + + def __get_proc_by_label(self, proc_label: str): + for proc in self.symbolic_label_table["Procedures"]: + if proc["Label"] == proc_label: + return proc["Line"] + + def __update_proc_line_by_label(self, proc_name: str, line_num: str): + for proc in self.symbolic_label_table["Procedures"]: + if proc["Label"] == proc_name: + proc["Line"] = line_num + + def __get_proc_by_name_and_scope_id(self, proc_name: str, scope_id: str): + for proc in self.symbolic_label_table["Procedures"]: + # At this point, scope analysis would have caught out any errors. + if proc["Name"] == proc_name and proc["ScopeID"] == \ + scope_id or proc["Name"] == proc_name and proc["ScopeID"] in self.s_table.get_successor_scopes( + scope_id): + return proc["Label"] + + def __get_var_by_name_and_scope_id(self, var_name: str, scope_id: str): + for var in self.symbolic_label_table["Variables"]: + # At this point, scope analysis would have caught out any errors. + if var["Name"] == var_name and var["ScopeID"] == \ + scope_id or var["Name"] == var_name and var["ScopeID"] in self.s_table.get_ancestor_scopes( + scope_id): + return var["Label"] + + def __assign(self, n: Node, scope_id, inst_list: list): + lhs_node = n.children[0] + rhs_node = n.children[2] + lhs = self.__lhs(lhs_node, scope_id) + rhs = self.__expr(rhs_node, scope_id, inst_list, lhs, True) + + if rhs: + if lhs == "PRINT": + inst_list.append(f"{lhs} {rhs}") + else: + inst_list.append(f"LET {lhs} = {rhs}") + + def __pcall(self, n: Node, scope_id, instr_list: list): + target_func = n.children[1].subValue + instruction = f"GO SUB {self.__get_proc_by_name_and_scope_id(target_func, scope_id)}" + instr_list.append(instruction) + + def __loop(self, n: Node, scope_id, instr_list: list): + is_while_loop = True if n.children[0].value == "while" else False + if is_while_loop: + evaluation_node = n.children[2] + algorithm_node = n.children[6] + beginning_of_loop = len(instr_list) + eval_expr = self.__expr(evaluation_node, scope_id, instr_list) + instr_list.append(f"IF {eval_expr} THEN GOTO +2") + instr_list.append(f"GOTO PENDING...") + post_while_loop_indicator_line = len(instr_list) - 1 + self.__algorithm(algorithm_node, scope_id, instr_list) + instr_list.append(f"GOTO -{((len(instr_list)) - beginning_of_loop)}") + instr_list[post_while_loop_indicator_line] = f"GOTO +{(len(instr_list)) - post_while_loop_indicator_line}" + else: + algorithm_node = n.children[2] + evaluation_node = n.children[6] + loop_beginning = len(instr_list) + self.__algorithm(algorithm_node, scope_id, instr_list) + eval_expr = self.__expr(evaluation_node, scope_id, instr_list) + instr_list.append(f"IF {eval_expr} THEN GOTO +2") + instr_list.append(f"GOTO -{(len(instr_list)) - loop_beginning}") + + def __branch(self, n: Node, scope_id, instr_list: list): + eval_exp_node = n.children[2] + algorithm_node = n.children[6] + alternate_node = n.children[8] + eval_exp_symbol = self.__expr(eval_exp_node, scope_id, instr_list) + instr_list.append(f"IF {eval_exp_symbol} THEN GOTO +2") + instr_list.append(f"GOTO PENDING...") + else_or_untruthy_branch_position = len(instr_list) - 1 + self.__algorithm(algorithm_node, scope_id, instr_list) + + instr_list[else_or_untruthy_branch_position] = \ + f"GOTO +{(len(instr_list)) - else_or_untruthy_branch_position}" + + if len(alternate_node.children) > 1: + instr_list.append("GOTO PENDING...") + instr_list[else_or_untruthy_branch_position] = \ + f"GOTO +{(len(instr_list)) - else_or_untruthy_branch_position}" + escape_else_pos = len(instr_list) - 1 + self.__algorithm(alternate_node.children[2], scope_id, instr_list) + instr_list[escape_else_pos] = \ + f"GOTO +{(len(instr_list)) - escape_else_pos}" + + def __initialize_arrays(self, arr: list): + lines_taken = 0 + for entry in self.symbolic_label_table["Variables"]: + if "[]" in entry["Name"]: + arr.append(f"{lines_taken} DIM {entry['Label']}({entry['Size']})") + lines_taken += 1 + + return lines_taken + + def __instr(self, n: Node, scope_id, inst_list: list): + focus = n.children[0] + if focus.value == "PCall": + self.__pcall(focus, scope_id, inst_list) + elif focus.value == "Loop": + self.__loop(focus, scope_id, inst_list) + elif focus.value == "Branch": + self.__branch(focus, scope_id, inst_list) + elif focus.value == "Assign": + self.__assign(focus, scope_id, inst_list) + + def __algorithm(self, n: Node, scope_id, inst_list: list): + for child in n.children: + if child.value == "Instr": + self.__instr(child, scope_id, inst_list) + elif child.value == "Algorithm": + self.__algorithm(child, scope_id, inst_list) + + def __recursive_func_build(self, current: Node, scope_id, proc_id, inst_list: list): + if current.ID == proc_id: + parent = current.parent + index_of_this_node = parent.children.index(current) + for child in parent.children: + if parent.children.index(child) > index_of_this_node: + if child.value == "Algorithm": + self.__algorithm(child, scope_id, inst_list) + elif child.value == "halt": + inst_list.append("STOP") + elif child.value == "CloseBraceCurly": + return + else: + for child in current.children: + self.__recursive_func_build(child, scope_id, proc_id, inst_list) + + def __bake_absolute_addresses(self, all_procs: dict): + final_list = [] + line = self.__initialize_arrays(final_list) + start = line + final_list.append("GOTO START") + line += 1 + self.final_output = [] + for proc_name in all_procs.keys(): + self.__update_proc_line_by_label(proc_name, str(line)) + for code in all_procs[proc_name]: + if "GOTO +" in code or "GOTO -": + split_inst = code.split(" ") + for op in split_inst: + if "+" in op and split_inst[split_inst.index(op) - 1] == "GOTO": + replacement = line + int(op[op.find("+") + 1::].strip()) + split_inst[split_inst.index(op)] = str(replacement) + if "-" in op and split_inst[split_inst.index(op) - 1] == "GOTO": + replacement = line - int(op[op.find("-") + 1::].strip()) + split_inst[split_inst.index(op)] = str(replacement) + + code = " ".join(split_inst) + + self.final_output.append(f"{line} {code}") + line += 1 + + for line in self.final_output: + if "GO SUB" in line: + split_line = line.split(" ") + for element in split_line: + if element in all_procs.keys(): + replacement = self.__get_proc_by_label(element) + split_line[split_line.index(element)] = replacement + new_line = " ".join(split_line) + self.final_output[self.final_output.index(line)] = new_line + + final_list[start] = f"{start} GOTO {self.__get_proc_by_label(self.main_entry)}" + final_list.extend(self.final_output) + self.final_output = final_list + + def translate_functions(self, file_name: str): + functions = {} + instruction_list = [] + for entry in self.symbolic_label_table["Procedures"]: + self.__recursive_func_build(self.s_tree.root, entry["ScopeID"], entry["ProcID"], instruction_list) + if instruction_list[-1] != "STOP": + instruction_list.append("RETURN") + + if entry["Name"] == "main": + self.main_entry = entry["Label"] + + functions[entry["Label"]] = instruction_list.copy() + instruction_list = [] + self.__bake_absolute_addresses(functions) + self.__output_file(file_name) + + def print_symbolic_label_table(self): + # Functions. + print(f"Procedures\n{'_' * 50}\nLabel\t\tLine\t\tScope ID\tProcID") + for proc in self.symbolic_label_table["Procedures"]: + print(f"{proc['Label']}\t\t{proc['Line']}\t\t\t\t{proc['ScopeID']}\t\t\t{proc['ProcID']}") + + # Variables. + print(f"\nVariables\n{'_' * 50}\nLabel\t\tScope ID") + for var in self.symbolic_label_table["Variables"]: + print(f"{var['Label']}\t\t\t\t{var['ScopeID']}") + + def print_basic_code(self): + for code in self.final_output: + print(code) diff --git a/src/TypeChecker.py b/src/TypeChecker.py new file mode 100644 index 0000000..f078b40 --- /dev/null +++ b/src/TypeChecker.py @@ -0,0 +1,249 @@ +# Author: 0xL0RD + +from Parser import Node, SyntaxTree +from SemanticTable import SemanticTable + + +class TypeChecker: + sem_table: SemanticTable + error_assign_messages = { + "S": f"Assigning non-string value to String variable", + "U": f"Attempting to assign Unknown variable type", + "N": f"Assigning non-numeric value to Number variable", + "B": f"Assigning non-binary value to Boolean variable", + "M": ["N", "S"] + } + source_file: str + tree: SyntaxTree + + def __init__(self, st: SemanticTable, t: SyntaxTree, fname: str): + self.sem_table = st + self.source_file = fname + self.tree = t + + @staticmethod + def __is_array(current: Node): + immediate_right_child_position = current.parent.children.index(current) + 1 + immediate_right_child = current.parent.children[immediate_right_child_position] + if immediate_right_child.value == "VAR": + if len(immediate_right_child.children) > 0: + if immediate_right_child.children[0].value == "[": + return True + return False + + def __eval_expr(self, expr_node: Node): + resulting_type = None + if expr_node.children[0].value == "UnOp": + resulting_type = self.__unop(expr_node.children[0]) + elif expr_node.children[0].value == "Const": + resulting_type = self.__const(expr_node.children[0]) + elif expr_node.children[0].value == "UserDefinedName": + data_type, is_array = self.__get_data_type(expr_node.children[0]) + resulting_type = data_type + elif expr_node.children[0].value == "BinOp": + resulting_type = self.__binop(expr_node.children[0]) + + expr_node.data_type = resulting_type + return resulting_type + + def __binop(self, bin_op_node: Node): + numerical_ops = ["add", "sub", "mult"] + binary_ops = ["not", "and", "or", "eq", "larger"] + operation = bin_op_node.children[0].value + expr_1 = bin_op_node.children[2] + expr_2 = bin_op_node.children[4] + expr_1_type = self.__eval_expr(expr_1) + expr_2_type = self.__eval_expr(expr_2) + if expr_1_type == expr_2_type: + if operation in binary_ops: + return "B" + else: + if operation in numerical_ops and expr_1_type == "N": + return "N" + else: + self.__report_type_error(f"'{operation}' operation only permits the use of numeric data.", + bin_op_node) + else: + if operation == "eq": + return "B" + elif operation in binary_ops: + if expr_1_type != "B": + err_node = expr_1 + else: + err_node = expr_2 + error_message = f"Only boolean values are permitted in '{operation}' calls." + else: + if expr_1_type != "N": + err_node = expr_1 + else: + err_node = expr_2 + error_message = f"Only numerical values are permitted in '{operation}' calls." + self.__report_type_error(error_message, err_node) + + @staticmethod + def __const(const_node: Node): + const_node: Node = const_node.children[0] + if const_node.value == "Number": + return "N" + if const_node.value == "ShortString": + return "S" + if const_node.value == "true" or const_node.value == "false": + return "B" + + def __unop(self, unop_node: Node): + operation: Node = unop_node.children[0] + operation_name = operation.value + parameter: Node = unop_node.children[2] + if operation_name == "input": + if parameter.value == "UserDefinedName": + expected, is_arr = self.__get_data_type(parameter) + if expected == "N": + return "N" + else: + self.__report_type_error(f"Parameter for 'input' must be a number.", parameter) + elif operation_name == "not": + if parameter.value == "UserDefinedName": + expected, is_arr = self.__get_data_type(parameter) + if expected == "B": + return "B" + else: + self.__report_type_error(f"Parameter for 'not' must be a bool.", parameter) + elif parameter.value == "Expr": + type_val = self.__eval_expr(parameter) + if type_val == "B": + return "B" + else: + self.__report_type_error(f"Parameter for 'not' must be a bool.", parameter) + else: + self.__report_type_error(f"Parameter for 'not' must be a bool.", parameter) + + def __evaluate_rhs(self, rhs: Node, expected_data_type: str): + if rhs.value == "Expr": + evaluated_value = self.__eval_expr(rhs) + if evaluated_value == expected_data_type: + return True + self.__report_type_error(self.error_assign_messages[expected_data_type], rhs) + + def __report_type_error(self, message, er_node: Node = None): + lines: list + with open(self.source_file, "r") as f: + lines = f.readlines() + print(f"[-] Type error: {message}") + try: + if er_node is not None: + if er_node.lineNumber == "": + trav = er_node + while er_node.lineNumber == "": + for child in trav.children: + er_node.lineNumber = child.lineNumber + if er_node.lineNumber != "": + break + trav = trav.children[0] + if er_node.lineNumber != "": + print(f"\t\tLine {er_node.lineNumber}: {lines[int(er_node.lineNumber) - 1]}") + except Exception as e: + print(f"[-] Could not retrieve line number.\n{e}") + exit(-1) + + def __correct_array_subscript_used(self, arr_udn: Node): + immediate_right_child_position = arr_udn.parent.children.index(arr_udn) + 1 + immediate_right_child = arr_udn.parent.children[immediate_right_child_position] + if immediate_right_child.value == "VAR": + if len(immediate_right_child.children) > 1: + if immediate_right_child.children[1].value == "VAR'": + subscript_expr = immediate_right_child.children[1].children[0] + if subscript_expr.value == "Const": + if subscript_expr.children[0].value == "Number" and int( + subscript_expr.children[0].subValue) >= 0: + return True + else: + self.__report_type_error("Expected Non-Negative number as subscript value", subscript_expr) + elif subscript_expr.value == "UserDefinedName": + udn_type, is_arr = self.__get_data_type(subscript_expr) + if udn_type == "N": + return True + else: + self.__report_type_error("Subscript variable must be of type number", subscript_expr) + self.__report_type_error("Expected subscript variable or constant", arr_udn) + + def __get_data_type(self, udn_node: Node): + # We can assume, at this point, that the variable has been declared properly. + # That is, we can assume that the declaration will be in this scope, or one of its ancestors. + ancestor_scopes = self.sem_table.get_ancestor_scopes(udn_node.scopeID) + ancestor_scopes.remove("-") + ancestor_scopes.sort() + ancestor_scopes = list(reversed(ancestor_scopes)) + var_name = udn_node.subValue + if self.__is_array(udn_node): + var_name += "[]" + decl_entry = self.sem_table.get_variable_with(var_name, udn_node.scopeID) + is_arr: bool + + while decl_entry is None: + decl_entry = self.sem_table.get_variable_with(var_name, ancestor_scopes.pop()) + + if decl_entry["Type"] == "num": + udn_node.data_type = "N" + elif decl_entry["Type"] == "string": + udn_node.data_type = "S" + elif decl_entry["Type"] == "bool": + udn_node.data_type = "B" + + is_arr = True if decl_entry["Token"].endswith("[]") else False + return udn_node.data_type, is_arr + + def __var_type_check(self, lhs: Node, rhs: Node): + expected_data_type, is_array = self.__get_data_type(lhs) + if is_array: + self.__correct_array_subscript_used(lhs) + self.__evaluate_rhs(rhs, expected_data_type) + return True + + def __stdio_type_check(self, rhs: Node): + # RHS is always an expr. + data_type_of_rhs = self.__eval_expr(rhs) + if data_type_of_rhs in ["N", "S"]: + rhs.data_type = data_type_of_rhs + return True + else: + self.__report_type_error(f"Output function may only write Numbers and Strings to the screen.", rhs) + + def __branch_check(self, branch_node: Node): + target_expr = branch_node.children[2] + type_ = self.__eval_expr(target_expr) + if type_ == "B": + return True + else: + self.__report_type_error(f"If condition must be of type boolean", target_expr) + + def __loop_check(self, loop_node: Node): + target_index: int + if loop_node.children[0].value == "while": + target_index = 2 + else: + target_index = 6 + target_expr = loop_node.children[target_index] + type_returned = self.__eval_expr(target_expr) + if type_returned == "B": + return True + else: + self.__report_type_error(f"Loop condition must be of type boolean", target_expr) + + def __identify_types(self, current: Node): + if current.value == "Assign": + LHS = current.children[0].children[0] + RHS = current.children[2] + if LHS.value == "UserDefinedName": + self.__var_type_check(LHS, RHS) + elif LHS.value == "output": + self.__stdio_type_check(RHS) + elif current.value == "Branch": + self.__branch_check(current) + elif current.value == "Loop": + self.__loop_check(current) + + for child in current.children: + self.__identify_types(child) + + def check(self): + self.__identify_types(self.tree.root) diff --git a/src/splCompiler.py b/src/splCompiler.py new file mode 100644 index 0000000..220547a --- /dev/null +++ b/src/splCompiler.py @@ -0,0 +1,64 @@ +#!/usr/bin/python3 + +# Author: 0x0L0RD + +from sys import argv +from Lexer import Lexer +from Parser import Parser +from SemanticAnalyzer import SemanticAnalyzer +from Translator import CodeGenerator + + +def main(): + if len(argv) < 2: + print(f"usage: {argv[0]} [optional verbosity]") + print("\t\tVerbosity flags:\n\t\t\t-v\tVerbose\n\t\t\t-vv\tMaximum Verbosity\n\n") + exit(1) + + try: + lexer = Lexer(argv[1]) + parser = Parser(argv[1]) + tokens, success = lexer.Scan() + + if len(argv) > 2: + if argv[2] == "-vv": + print("[i] Generating tokens from:") + lexer.printCompilerLines() + if "-v" in argv[2]: + print("[i] Printing tokens.") + lexer.printTokens() + + if success: + if parser.parse(tokens): + outputFile = argv[1].replace(".txt", ".xml") + if not outputFile.endswith(".xml"): + outputFile.replace(".", "") + outputFile += ".xml" + parser.saveSyntaxTreeXML(outputFile) + print(f"[+] Successfully parsed. See '{outputFile}'") + semanticAnalyzer = SemanticAnalyzer(parser.Tree, argv[1]) + if semanticAnalyzer.Scan(): + print("[+] Semantic analysis passed.") + cGen = CodeGenerator(semanticAnalyzer.initialTree, semanticAnalyzer.semantic_table, + semanticAnalyzer.discoveredIds) + outputFile = argv[1].replace(".txt", ".bas") + if not outputFile.endswith(".bas"): + outputFile.replace(".", "") + outputFile += ".bas" + cGen.translate_functions(outputFile) + print(f"[+] Compiled successfully. See '{outputFile}'.") + if len(argv) > 2: + if argv[2] == "-vv": + semanticAnalyzer.print_symbol_table() + cGen.print_symbolic_label_table() + cGen.print_basic_code() + else: + print("[-] Error occurred while parsing.") + else: + print("[-] Error occurred during lexical analysis.") + except Exception as e: + print(f"[-] Exception: {e}") + + +if __name__ == "__main__": + main()