From b6dcd99f3f9b0c6d8318f7792a99e463ee050168 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Wed, 18 Oct 2023 23:16:59 +0200 Subject: [PATCH 01/69] Add a grammar base --- src/compiler/parser/grammar.md | 86 +++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 6 deletions(-) diff --git a/src/compiler/parser/grammar.md b/src/compiler/parser/grammar.md index e718258..e7e7950 100644 --- a/src/compiler/parser/grammar.md +++ b/src/compiler/parser/grammar.md @@ -1,4 +1,32 @@ -# Grammar of a function declaration +# Grammar + +IFJ23 + +## base + +assignment-expression → `=` expression + +type-annotation → `:` type +type → `String` | `Int` | `Double` + +code-block → `{`statements?`}` + +## expressions + +expression → TODO + +## statements + +statement → function-declaration +statement → function-call +statement → variable-declaration +statement → variable-assignment +statement → if-statement +statement → while-statement +statement → return-statement +statement → expression + +## function declaration function-declaration → function-head function-name function-signature function-body? @@ -6,13 +34,59 @@ function-head → func function-name → identifier function-signature → parameter-clause function-result? -function-result → -> type +function-result → `->` type function-body → code-block -parameter-clause → ( ) | ( parameter-list ) -parameter-list → parameter | parameter , parameter-list -parameter → external-parameter-name? local-parameter-name type-annotation default-argument-clause? +parameter-clause → `(` `)` | `(` parameter-list `)` +parameter-list → parameter | parameter `,` parameter-list parameter → external-parameter-name? local-parameter-name type-annotation external-parameter-name → identifier local-parameter-name → identifier -default-argument-clause → = expression + +### ? not in the project specification ? +parameter → external-parameter-name? local-parameter-name type-annotation default-argument-clause? +default-argument-clause → `=` expression + +## function call + +argument-name identifier +argument-signature + +argument → argument-name`:` expression +argument → expression + +argument-list → argument | argument , argument-list +argument-clause → `(` `)` | `(` argument-list `)` + +function-call → function-name argument-clause + +## function call (assignment) ?expression? +TODO + +id = název_funkce(seznam_vstupních_parametrů) +let id = název_funkce(seznam_vstupních_parametrů) + +## variable declaration +variable-declaration → variable-head variable-name type-annotation assignment-expression? +variable-head → let | var +variable-name → identifier + +## variable assignment + variable-assignment → variable-name assignment-expression? + variable-name → identifier + +## if statement + +condition → expression +condition → let variable-name + +if-statement → if condition code-block else-clause +else-clause → else code-block + +## while statement + +while-statement → while condition code-block + +## return statement + +return-statement → return expression? From b4d9b987cc9645e3722429482e3fa20ab68effc3 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Wed, 18 Oct 2023 23:29:05 +0200 Subject: [PATCH 02/69] Refactor grammar markdown --- src/compiler/parser/grammar.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/compiler/parser/grammar.md b/src/compiler/parser/grammar.md index e7e7950..efda186 100644 --- a/src/compiler/parser/grammar.md +++ b/src/compiler/parser/grammar.md @@ -7,6 +7,7 @@ IFJ23 assignment-expression → `=` expression type-annotation → `:` type + type → `String` | `Int` | `Double` code-block → `{`statements?`}` @@ -18,12 +19,19 @@ expression → TODO ## statements statement → function-declaration + statement → function-call + statement → variable-declaration + statement → variable-assignment + statement → if-statement + statement → while-statement + statement → return-statement + statement → expression ## function declaration @@ -31,56 +39,76 @@ statement → expression function-declaration → function-head function-name function-signature function-body? function-head → func + function-name → identifier function-signature → parameter-clause function-result? + function-result → `->` type + function-body → code-block parameter-clause → `(` `)` | `(` parameter-list `)` + parameter-list → parameter | parameter `,` parameter-list + parameter → external-parameter-name? local-parameter-name type-annotation + external-parameter-name → identifier + local-parameter-name → identifier ### ? not in the project specification ? + parameter → external-parameter-name? local-parameter-name type-annotation default-argument-clause? + default-argument-clause → `=` expression ## function call argument-name identifier + argument-signature argument → argument-name`:` expression + argument → expression argument-list → argument | argument , argument-list + argument-clause → `(` `)` | `(` argument-list `)` function-call → function-name argument-clause ## function call (assignment) ?expression? + TODO id = název_funkce(seznam_vstupních_parametrů) let id = název_funkce(seznam_vstupních_parametrů) ## variable declaration + variable-declaration → variable-head variable-name type-annotation assignment-expression? + variable-head → let | var + variable-name → identifier ## variable assignment + variable-assignment → variable-name assignment-expression? + variable-name → identifier ## if statement condition → expression + condition → let variable-name if-statement → if condition code-block else-clause + else-clause → else code-block ## while statement From ea5ef87c2086ddffd57a91a05252001333ba2374 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Wed, 18 Oct 2023 23:43:39 +0200 Subject: [PATCH 03/69] Refactor grammar markdown added breaks --- src/compiler/parser/grammar.md | 119 +++++++++++++-------------------- 1 file changed, 46 insertions(+), 73 deletions(-) diff --git a/src/compiler/parser/grammar.md b/src/compiler/parser/grammar.md index efda186..8b1f4b5 100644 --- a/src/compiler/parser/grammar.md +++ b/src/compiler/parser/grammar.md @@ -4,13 +4,13 @@ IFJ23 ## base -assignment-expression → `=` expression +assignment-expression → `=` expression
-type-annotation → `:` type -type → `String` | `Int` | `Double` +type-annotation → `:` type
+type → `String` | `Int` | `Double`
-code-block → `{`statements?`}` +code-block → `{`statements?`}`
## expressions @@ -18,103 +18,76 @@ expression → TODO ## statements -statement → function-declaration - -statement → function-call - -statement → variable-declaration - -statement → variable-assignment - -statement → if-statement - -statement → while-statement - -statement → return-statement - -statement → expression +statement → function-declaration
+statement → function-call
+statement → variable-declaration
+statement → variable-assignmen
+statement → if-statement
+statement → while-statement
+statement → return-statement
+statement → expression
## function declaration -function-declaration → function-head function-name function-signature function-body? - -function-head → func - -function-name → identifier - -function-signature → parameter-clause function-result? - -function-result → `->` type - -function-body → code-block - -parameter-clause → `(` `)` | `(` parameter-list `)` +function-declaration → function-head function-name function-signature function-body?
-parameter-list → parameter | parameter `,` parameter-list +function-head → func
+function-name → identifier
-parameter → external-parameter-name? local-parameter-name type-annotation +function-signature → parameter-clause function-result?
+function-result → `->` type
+function-body → code-block
-external-parameter-name → identifier +parameter-clause → `(` `)` | `(` parameter-list `)`
+parameter-list → parameter | parameter `,` parameter-list
+parameter → external-parameter-name? local-parameter-name type-annotation
+external-parameter-name → identifier
+local-parameter-name → identifier
-local-parameter-name → identifier - -### ? not in the project specification ? - -parameter → external-parameter-name? local-parameter-name type-annotation default-argument-clause? - -default-argument-clause → `=` expression +### ? not in the project specification ?
+parameter → external-parameter-name? local-parameter-name type-annotation default-argument-clause?
+default-argument-clause → `=` expression
## function call -argument-name identifier - -argument-signature +argument-name identifier
+argument-signature
-argument → argument-name`:` expression +argument → argument-name`:` expression
+argument → expression
-argument → expression +argument-list → argument | argument , argument-list
+argument-clause → `(` `)` | `(` argument-list `)`
-argument-list → argument | argument , argument-list - -argument-clause → `(` `)` | `(` argument-list `)` - -function-call → function-name argument-clause +function-call → function-name argument-clause
## function call (assignment) ?expression? - TODO -id = název_funkce(seznam_vstupních_parametrů) -let id = název_funkce(seznam_vstupních_parametrů) +id = název_funkce(seznam_vstupních_parametrů)
+let id = název_funkce(seznam_vstupních_parametrů)
## variable declaration - -variable-declaration → variable-head variable-name type-annotation assignment-expression? - -variable-head → let | var - -variable-name → identifier +variable-declaration → variable-head variable-name type-annotation assignment-expression?
+variable-head → let | var
+variable-name → identifier
## variable assignment - - variable-assignment → variable-name assignment-expression? - - variable-name → identifier + variable-assignment → variable-name assignment-expression?
+ variable-name → identifier
## if statement -condition → expression - -condition → let variable-name - -if-statement → if condition code-block else-clause +condition → expression
+condition → let variable-name
-else-clause → else code-block +if-statement → if condition code-block else-clause
+else-clause → else code-block
## while statement -while-statement → while condition code-block +while-statement → while condition code-block
## return statement -return-statement → return expression? +return-statement → return expression?
From 38f5521b1d07bfa5f133f9ece2812316bfb36e70 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Thu, 19 Oct 2023 22:26:54 +0200 Subject: [PATCH 04/69] Update grammar.md Changed the grammar --- src/compiler/parser/grammar.md | 101 +++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 36 deletions(-) diff --git a/src/compiler/parser/grammar.md b/src/compiler/parser/grammar.md index 8b1f4b5..c6c7963 100644 --- a/src/compiler/parser/grammar.md +++ b/src/compiler/parser/grammar.md @@ -4,34 +4,70 @@ IFJ23 ## base -assignment-expression → `=` expression
+type-annotation → `:` type
+//type → `String` | `Int` | `Double`
+type → identifier // Built-in types will be resolved at semantic analysis
+code-block → `{` statements? `}`
-type-annotation → `:` type
-type → `String` | `Int` | `Double`
+## literals -code-block → `{`statements?`}`
+literal → numeric-literal | string-literal | boolean-literal | nil-literal
+ +? numeric-literal → `-`? integer-literal | `-`? floating-point-literal // Should this be included? Number literals are defined by FSM diagram
+boolean-literal → `true` | `false`
+nil-literal → `nil`
## expressions -expression → TODO +expression → TODO
+ +// This is probably wrong (swift parses `a = b` as a `BinaryExpression`, but we want to be more explic, so this would be parsed as `AssignmentExpression`), so it needs rework; it's just to show the `function-call-expression` rule (this way the function call can appear anywhere in the expression (`myFunc(otherFunc(10))`, `myFunc() + otherFunc(10) * 8`, `a = myFunc() * -1`, ...)) + +expression → prefix-expression infix-expressions?
+expression-list → expression | expression `,` expression-list
+ +prefix-expression → prefix-operator? postfix-expression
+ +infix-expression → infix-operator prefix-expression
+infix-expression → assignment-operator prefix-expression
+infix-expression → conditional-operator prefix-expression
+? infix-expression → type-casting-operator // Are we gonna support explicit type casting?
+infix-expressions → infix-expression infix-expressions?
+ +postfix-expression → primary-expression
+postfix-expression → postfix-expression postfix-operator
+postfix-expression → function-call-expression
+ +primary-expression → identifier
+primary-expression → literal-expression
+primary-expression → parenthesized-expression
+ +assignment-operator → `=`
+ +assignment-expression → `=` expression
+ +conditional-operator → `?` expression `:`
+ +parenthesized-expression → `(` expression `)`
+ +literal-expression → literal
## statements statement → function-declaration
-statement → function-call
statement → variable-declaration
-statement → variable-assignmen
statement → if-statement
statement → while-statement
statement → return-statement
-statement → expression
+statement → expression-statement
+ +expression-statement → expression
## function declaration -function-declaration → function-head function-name function-signature function-body?
+function-declaration → `func` function-name function-signature function-body?
-function-head → func
function-name → identifier
function-signature → parameter-clause function-result?
@@ -44,50 +80,43 @@ parameter → external-parameter-name? local-parameter-name type-annotation
external-parameter-name → identifier
local-parameter-name → identifier
-### ? not in the project specification ?
-parameter → external-parameter-name? local-parameter-name type-annotation default-argument-clause?
-default-argument-clause → `=` expression
+### ? not in the project specification ? // Ik, but it's nice to have the initializers :)
+parameter → external-parameter-name? local-parameter-name type-annotation initializer?
+initializer → `=` expression
## function call -argument-name identifier
-argument-signature
- -argument → argument-name`:` expression
-argument → expression
+function-call-expression → function-name argument-clause
-argument-list → argument | argument , argument-list
argument-clause → `(` `)` | `(` argument-list `)`
+argument-list → argument | argument `,` argument-list
-function-call → function-name argument-clause
- -## function call (assignment) ?expression? -TODO +argument → argument-name `:` expression
+argument → expression
-id = název_funkce(seznam_vstupních_parametrů)
-let id = název_funkce(seznam_vstupních_parametrů)
+argument-name → identifier
## variable declaration -variable-declaration → variable-head variable-name type-annotation assignment-expression?
-variable-head → let | var
+variable-declaration → variable-head variable-declaration-list?
+variable-head → `let` | `var`
variable-name → identifier
-## variable assignment - variable-assignment → variable-name assignment-expression?
- variable-name → identifier
+variable-declaration-list → variable-declarator | variable-declarator `,` variable-declaration-list
+variable-declarator → pattern initializer?
+pattern → variable-name type-annotation?
## if statement -condition → expression
-condition → let variable-name
+condition → expression | optional-binding-condition
+optional-binding-condition → `let` pattern initializer? | `var` pattern initializer? -if-statement → if condition code-block else-clause
-else-clause → else code-block
+if-statement → `if` condition code-block else-clause?
+else-clause → `else` code-block | `else` if-statement
## while statement -while-statement → while condition code-block
+while-statement → `while` condition code-block
## return statement -return-statement → return expression?
+return-statement → `return` expression?
From ae04c45fe48c4f047d040f2a58af774d027849fe Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Sat, 21 Oct 2023 16:33:42 +0200 Subject: [PATCH 05/69] Remove inintializer comment in grammar --- src/compiler/parser/grammar.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/compiler/parser/grammar.md b/src/compiler/parser/grammar.md index c6c7963..91459ee 100644 --- a/src/compiler/parser/grammar.md +++ b/src/compiler/parser/grammar.md @@ -76,12 +76,10 @@ function-body → code-block
parameter-clause → `(` `)` | `(` parameter-list `)`
parameter-list → parameter | parameter `,` parameter-list
-parameter → external-parameter-name? local-parameter-name type-annotation
+parameter → external-parameter-name? local-parameter-name type-annotation initializer?
external-parameter-name → identifier
local-parameter-name → identifier
-### ? not in the project specification ? // Ik, but it's nice to have the initializers :)
-parameter → external-parameter-name? local-parameter-name type-annotation initializer?
initializer → `=` expression
## function call From d947e78d47ba7428dd00d3ebd29cfb6e08f03b04 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Sat, 21 Oct 2023 16:45:50 +0200 Subject: [PATCH 06/69] Add if statement parsing --- include/compiler/parser/ASTNodes.h | 49 +++++++- src/compiler/parser/ASTNodes.c | 80 ++++++++++-- src/compiler/parser/Parser.c | 193 +++++++++++++++++++++++++++-- 3 files changed, 298 insertions(+), 24 deletions(-) diff --git a/include/compiler/parser/ASTNodes.h b/include/compiler/parser/ASTNodes.h index 0d82368..2427e47 100644 --- a/include/compiler/parser/ASTNodes.h +++ b/include/compiler/parser/ASTNodes.h @@ -19,7 +19,11 @@ enum ASTNodeType { NODE_PARAMETER_LIST, NODE_FUNCTION_DECLARATION, NODE_ARGUMENT, - NODE_FUNCTION_CALL + NODE_IF_STATEMENT, + NODE_ELSE_CLAUSE, + NODE_PATTERN, + NODE_CONDITION, + NODE_OPTIONAL_BINDING_CONDITION, }; @@ -57,6 +61,7 @@ typedef struct VariableDeclarationASTNode { enum ASTNodeType _type; IdentifierASTNode *id; TypeReferenceASTNode *type; + bool isConstant; } VariableDeclarationASTNode; typedef struct ExpressionStatementASTNode { @@ -103,6 +108,39 @@ typedef struct FunctionCallASTNode { Array /**/ *arguments; } FunctionCallASTNode; +typedef struct PatternASTNode { + enum ASTNodeType _type; + IdentifierASTNode *name; + TypeReferenceASTNode *type; +} PatternASTNode; + +typedef struct OptionalBindingConditionASTNode { + enum ASTNodeType _type; + PatternASTNode *pattern; + ExpressionASTNode *initializer; + bool isConstant; +} OptionalBindingConditionASTNode; + +typedef struct ConditionASTNode { + enum ASTNodeType _type; + ExpressionASTNode *expression; + OptionalBindingConditionASTNode *optionalBindingCondition; +} ConditionASTNode; + +typedef struct ElseClauseASTNode { + enum ASTNodeType _type; + struct IfStatementASTNode *ifStatement; + BlockASTNode *body; + bool isElseIf; +} ElseClauseASTNode; + +typedef struct IfStatementASTNode { + enum ASTNodeType _type; + ConditionASTNode *condition; + BlockASTNode *body; + ElseClauseASTNode *elseClause; +} IfStatementASTNode; + // TODO: Add more AST nodes @@ -112,13 +150,16 @@ ProgramASTNode * new_ProgramASTNode(BlockASTNode *block); BlockASTNode * new_BlockASTNode(Array *statements); IdentifierASTNode * new_IdentifierASTNode(String *name); TypeReferenceASTNode * new_TypeReferenceASTNode(IdentifierASTNode *id, bool isNullable); -VariableDeclarationASTNode * new_VariableDeclarationASTNode(IdentifierASTNode *id, TypeReferenceASTNode *type); +VariableDeclarationASTNode * new_VariableDeclarationASTNode(IdentifierASTNode *id, TypeReferenceASTNode *type, bool isConstant); ReturnStatementASTNode * new_ReturnStatementASTNode(ExpressionASTNode *expression); ParameterASTNode * new_ParameterASTNode(IdentifierASTNode *id, TypeReferenceASTNode *type, ExpressionASTNode *initializer, IdentifierASTNode *externalName, bool isLabeless); ParameterListASTNode * new_ParameterListASTNode(Array *parameters); -ArgumentASTNode * new_ArgumentASTNode(ExpressionASTNode *expression, IdentifierASTNode *label); FunctionDeclarationASTNode * new_FunctionDeclarationASTNode(IdentifierASTNode *id, ParameterListASTNode *parameterList, TypeReferenceASTNode *returnType, BlockASTNode *body); -FunctionCallASTNode * new_FunctionCallASTNode(IdentifierASTNode *id, Array *arguments); +PatternASTNode * new_PatternASTNode(IdentifierASTNode *name, TypeReferenceASTNode *type); +OptionalBindingConditionASTNode * new_OptionalBindingConditionASTNode(PatternASTNode *pattern, ExpressionASTNode *initializer, bool isConstant); +ConditionASTNode * new_ConditionASTNode(ExpressionASTNode *expression, OptionalBindingConditionASTNode *optionalBindingCondition); +ElseClauseASTNode * new_ElseClauseASTNode(IfStatementASTNode *ifStatement, BlockASTNode *body, bool isElseIf); +IfStatementASTNode * new_IfStatementASTNode(ConditionASTNode *condition, BlockASTNode *body, ElseClauseASTNode *elseClause); // TODO: Add more AST node constructors diff --git a/src/compiler/parser/ASTNodes.c b/src/compiler/parser/ASTNodes.c index c78d02e..41be52d 100644 --- a/src/compiler/parser/ASTNodes.c +++ b/src/compiler/parser/ASTNodes.c @@ -7,7 +7,7 @@ /* Definitions of ASTNodes */ -ProgramASTNode* new_ProgramASTNode( +ProgramASTNode * new_ProgramASTNode( BlockASTNode *block ) { prepare_node_of(ProgramASTNode, NODE_PROGRAM) @@ -15,7 +15,7 @@ ProgramASTNode* new_ProgramASTNode( return node; } -BlockASTNode* new_BlockASTNode( +BlockASTNode * new_BlockASTNode( Array *statements ) { prepare_node_of(BlockASTNode, NODE_BLOCK) @@ -23,7 +23,7 @@ BlockASTNode* new_BlockASTNode( return node; } -IdentifierASTNode* new_IdentifierASTNode( +IdentifierASTNode * new_IdentifierASTNode( String *name ) { prepare_node_of(IdentifierASTNode, NODE_IDENTIFIER) @@ -31,7 +31,7 @@ IdentifierASTNode* new_IdentifierASTNode( return node; } -TypeReferenceASTNode* new_TypeReferenceASTNode( +TypeReferenceASTNode * new_TypeReferenceASTNode( IdentifierASTNode *id, bool isNullable ) { @@ -41,17 +41,19 @@ TypeReferenceASTNode* new_TypeReferenceASTNode( return node; } -VariableDeclarationASTNode* new_VariableDeclarationASTNode( +VariableDeclarationASTNode * new_VariableDeclarationASTNode( IdentifierASTNode *id, - TypeReferenceASTNode *type + TypeReferenceASTNode *type, + bool isConstant ) { prepare_node_of(VariableDeclarationASTNode, NODE_VARIABLE_DECLARATION) node->id = id; node->type = type; + node->isConstant = isConstant; return node; } -ExpressionStatementASTNode* new_ExpressionStatementASTNode( +ExpressionStatementASTNode * new_ExpressionStatementASTNode( ExpressionASTNode *expression ) { prepare_node_of(ExpressionStatementASTNode, NODE_EXPRESSION_STATEMENT) @@ -59,7 +61,7 @@ ExpressionStatementASTNode* new_ExpressionStatementASTNode( return node; } -ReturnStatementASTNode* new_ReturnStatementASTNode( +ReturnStatementASTNode * new_ReturnStatementASTNode( ExpressionASTNode *expression ) { prepare_node_of(ReturnStatementASTNode, NODE_RETURN_STATEMENT) @@ -67,7 +69,7 @@ ReturnStatementASTNode* new_ReturnStatementASTNode( return node; } -ParameterASTNode* new_ParameterASTNode( +ParameterASTNode * new_ParameterASTNode( IdentifierASTNode *id, TypeReferenceASTNode *type, ExpressionASTNode *initializer, @@ -83,7 +85,7 @@ ParameterASTNode* new_ParameterASTNode( return node; } -ParameterListASTNode* new_ParameterListASTNode( +ParameterListASTNode * new_ParameterListASTNode( Array *parameters ) { prepare_node_of(ParameterListASTNode, NODE_PARAMETER_LIST) @@ -121,7 +123,61 @@ FunctionCallASTNode* new_FunctionCallASTNode( ) { prepare_node_of(FunctionCallASTNode, NODE_FUNCTION_CALL) node->id = id; - node->arguments = arguments; +PatternASTNode * new_PatternASTNode( + IdentifierASTNode *name, + TypeReferenceASTNode *type +) { + prepare_node_of(PatternASTNode, NODE_PATTERN) + node->name = name; + node->type = type; + return node; +} + +OptionalBindingConditionASTNode * new_OptionalBindingConditionASTNode( + PatternASTNode *pattern, + ExpressionASTNode *initializer, + bool isConstant +) { + prepare_node_of(OptionalBindingConditionASTNode, NODE_OPTIONAL_BINDING_CONDITION) + node->pattern = pattern; + node->initializer = initializer; + node->isConstant = isConstant; + return node; +} + +ConditionASTNode * new_ConditionASTNode( + ExpressionASTNode *expression, + OptionalBindingConditionASTNode *optionalBindingCondition +) { + prepare_node_of(ConditionASTNode, NODE_CONDITION) + node->expression = expression; + node->optionalBindingCondition = optionalBindingCondition; + return node; +} + +ElseClauseASTNode * new_ElseClauseASTNode( + IfStatementASTNode *ifStatement, + BlockASTNode *body, + bool isElseIf +) { + prepare_node_of(ElseClauseASTNode, NODE_ELSE_CLAUSE) + node->ifStatement = ifStatement; + node->body = body; + node->isElseIf = isElseIf; + return node; +} + +IfStatementASTNode * new_IfStatementASTNode( + ConditionASTNode *condition, + BlockASTNode *body, + ElseClauseASTNode *elseClause +) { + prepare_node_of(IfStatementASTNode, NODE_IF_STATEMENT) + node->condition = condition; + node->body = body; + node->elseClause = elseClause; + return node; +} return node; } @@ -130,7 +186,7 @@ FunctionCallASTNode* new_FunctionCallASTNode( /* General purpose methods */ -ASTNode* ASTNode_alloc(size_t size, enum ASTNodeType type) { +ASTNode * ASTNode_alloc(size_t size, enum ASTNodeType type) { ASTNode *node = mem_alloc(size); node->_type = type; return node; diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index b2dcf94..f5a36ad 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -14,10 +14,15 @@ ParserResult __Parser_parseProgram(Parser *parser); ParserResult __Parser_parseBlock(Parser *parser, bool requireBraces); ParserResult __Parser_parseStatement(Parser *parser); ParserResult __Parser_parseExpression(Parser *parser); -ParserResult __Parser_parseFuncStatement(Parser *parser); ParserResult __Parser_parseTypeReference(Parser *parser); ParserResult __Parser_parseParameter(Parser *parser); ParserResult __Parser_parseParameterList(Parser *parser); +ParserResult __Parser_parseFuncStatement(Parser *parser); +ParserResult __Parser_parsePattern(Parser *parser); +ParserResult __Parser_parseOptionalBindingCondition(Parser *parser); +ParserResult __Parser_parseCondition(Parser *parser); +ParserResult __Parser_parseElseClause(Parser *parser); +ParserResult __Parser_parseIfStatement(Parser *parser); /* Definitions of public functions */ @@ -64,6 +69,7 @@ ParserResult __Parser_parseProgram(Parser *parser) { ParserResult __Parser_parseBlock(Parser *parser, bool requireBraces) { assertf(parser != NULL); + // Check for left brace if(requireBraces) { LexerResult result = Lexer_nextToken(parser->lexer); @@ -107,12 +113,19 @@ ParserResult __Parser_parseBlock(Parser *parser, bool requireBraces) { ParserResult __Parser_parseStatement(Parser *parser) { assertf(parser != NULL); - LexerResult result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); + LexerResult peek = Lexer_peekToken(parser->lexer, 0); + if(!peek.success) return LexerToParserError(peek); - if(result.token->kind == TOKEN_FUNC) { + if(peek.token->kind == TOKEN_FUNC) { ParserResult funcResult = __Parser_parseFuncStatement(parser); if(!funcResult.success) return funcResult; + return ParserSuccess(funcResult.node); + } + + if(peek.token->kind == TOKEN_IF) { + ParserResult ifResult = __Parser_parseIfStatement(parser); + if(!ifResult.success) return ifResult; + return ParserSuccess(ifResult.node); } return ParserNoMatch(); @@ -159,7 +172,6 @@ ParserResult __Parser_parseTypeReference(Parser *parser) { return ParserSuccess(paramType); } - ParserResult __Parser_parseParameter(Parser *parser) { // TODO: Add logic to output correct error messages // TODO: Add expression parsing @@ -231,7 +243,6 @@ ParserResult __Parser_parseParameter(Parser *parser) { return ParserSuccess(paramNode); } - ParserResult __Parser_parseParameterList(Parser *parser) { assertf(parser != NULL); @@ -279,9 +290,13 @@ ParserResult __Parser_parseParameterList(Parser *parser) { ParserResult __Parser_parseFuncStatement(Parser *parser) { // TODO: Symbol table management - assertf(parser != NULL); + + // skip func keyword LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + result = Lexer_nextToken(parser->lexer); LexerResult peek; if(!result.success) return LexerToParserError(result); @@ -314,12 +329,174 @@ ParserResult __Parser_parseFuncStatement(Parser *parser) { } ParserResult blockResult = __Parser_parseBlock(parser, true); - // if(!blockResult.success) return blockResult; + if(!blockResult.success) return blockResult; FunctionDeclarationASTNode *func = new_FunctionDeclarationASTNode(funcId, (ParameterListASTNode*)parameterListResult.node, returnType, (BlockASTNode*)blockResult.node); return ParserSuccess(func); } +ParserResult __Parser_parsePattern(Parser *parser) { + assertf(parser != NULL); + + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + if(result.token->type != TOKEN_IDENTIFIER) { + return ParserError( + String_fromFormat("expected pattern"), + Array_fromArgs(1, result.token)); + } + + IdentifierASTNode *patternName = new_IdentifierASTNode(result.token->value.string); + TypeReferenceASTNode *type = NULL; + + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + if(peek.token->kind == TOKEN_COLON) { + ParserResult typeResult = __Parser_parseTypeReference(parser); + if(!typeResult.success) return typeResult; + type = (TypeReferenceASTNode*)typeResult.node; + } + + PatternASTNode *pattern = new_PatternASTNode(patternName, type); + + return ParserSuccess(pattern); +} + +ParserResult __Parser_parseOptionalBindingCondition(Parser *parser) { + assertf(parser != NULL); + + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + bool isConstant = result.token->kind == TOKEN_LET; + + ParserResult patternResult = __Parser_parsePattern(parser); + if(!patternResult.success) return patternResult; + + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + ExpressionASTNode *initializer = NULL; + + if(peek.token->kind == TOKEN_EQUAL) { + + // Skip the '=' token + LexerResult tmp = Lexer_nextToken(parser->lexer); + if(!tmp.success) return LexerToParserError(result); + + ParserResult initializerResult = __Parser_parseExpression(parser); + if(!initializerResult.success) return initializerResult; + initializer = (ExpressionASTNode*)initializerResult.node; + } + + OptionalBindingConditionASTNode *bindingCondition = new_OptionalBindingConditionASTNode((PatternASTNode*)patternResult.node, (ExpressionASTNode*)initializer, isConstant); + + return ParserSuccess(bindingCondition); +} + +ParserResult __Parser_parseCondition(Parser *parser) { + assertf(parser != NULL); + + ExpressionASTNode *expression = NULL; + OptionalBindingConditionASTNode *bindingCondition = NULL; + + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + if(peek.token->kind == TOKEN_LET || peek.token->kind == TOKEN_VAR) { + ParserResult bindingConditionResult = __Parser_parseOptionalBindingCondition(parser); + if(!bindingConditionResult.success) return bindingConditionResult; + bindingCondition = (OptionalBindingConditionASTNode*)bindingConditionResult.node; + } else { + ParserResult expressionResult = __Parser_parseExpression(parser); + if(!expressionResult.success) return expressionResult; + + expression = (ExpressionASTNode*)expressionResult.node; + } + + ConditionASTNode *condition = new_ConditionASTNode(expression, bindingCondition); + + return ParserSuccess(condition); +} + +ParserResult __Parser_parseElseClause(Parser *parser) { + assertf(parser != NULL); + + // skip else keyword + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + IfStatementASTNode *ifStatement = NULL; + bool isElseIf = false; + BlockASTNode *body = NULL; + + if(peek.token->kind == TOKEN_IF) { + ParserResult ifStatementResult = __Parser_parseIfStatement(parser); + if(!ifStatementResult.success) return ifStatementResult; + ifStatement = (IfStatementASTNode*)ifStatementResult.node; + isElseIf = true; + } else { + ParserResult blockResult = __Parser_parseBlock(parser, true); + if(!blockResult.success) return blockResult; + body = (BlockASTNode*)blockResult.node; + } + + ElseClauseASTNode *elseClause = new_ElseClauseASTNode(ifStatement, body, isElseIf); + + return ParserSuccess(elseClause); +} + +ParserResult __Parser_parseIfStatement(Parser *parser) { + assertf(parser != NULL); + + // skip if keyword + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + // look more into this + if(peek.token->kind == TOKEN_LEFT_BRACE) { + return ParserError( + String_fromFormat("missing condition in 'if' statement"), + Array_fromArgs(1, peek.token)); + } + + + if(peek.token->kind == TOKEN_ELSE || peek.token->type == TOKEN_EOF) { + return ParserError( + String_fromFormat("expected expression, var, or let in 'if' condition"), + Array_fromArgs(1, peek.token)); + } + + ParserResult conditionResult = __Parser_parseCondition(parser); + if(!conditionResult.success) return conditionResult; + + ParserResult blockResult = __Parser_parseBlock(parser, true); + if(!blockResult.success) return blockResult; + + peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + ElseClauseASTNode *elseClause = NULL; + + if(peek.token->kind == TOKEN_ELSE) { + ParserResult elseClauseResult = __Parser_parseElseClause(parser); + if(!elseClauseResult.success) return elseClauseResult; + elseClause = (ElseClauseASTNode*)elseClauseResult.node; + } + + IfStatementASTNode *ifStatement = new_IfStatementASTNode((ConditionASTNode*)conditionResult.node, (BlockASTNode*)blockResult.node, (ElseClauseASTNode*)elseClause); + + return ParserSuccess(ifStatement); +} /* How to walk/traverse parsed AST or decide what kind of node the ASTNode * pointer refers to in general? */ From 67bb9dcb72d5dd2b41dbc76ba31dad69dff14a88 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Sat, 21 Oct 2023 16:50:18 +0200 Subject: [PATCH 07/69] Add while statement parsing --- include/compiler/parser/ASTNodes.h | 8 ++++++ src/compiler/parser/ASTNodes.c | 9 +++++++ src/compiler/parser/Parser.c | 40 ++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/include/compiler/parser/ASTNodes.h b/include/compiler/parser/ASTNodes.h index 2427e47..6d70f1e 100644 --- a/include/compiler/parser/ASTNodes.h +++ b/include/compiler/parser/ASTNodes.h @@ -24,6 +24,7 @@ enum ASTNodeType { NODE_PATTERN, NODE_CONDITION, NODE_OPTIONAL_BINDING_CONDITION, + NODE_WHILE_STATEMENT, }; @@ -141,6 +142,12 @@ typedef struct IfStatementASTNode { ElseClauseASTNode *elseClause; } IfStatementASTNode; +typedef struct WhileStatementASTNode { + enum ASTNodeType _type; + ConditionASTNode *condition; + BlockASTNode *body; +} WhileStatementASTNode; + // TODO: Add more AST nodes @@ -160,6 +167,7 @@ OptionalBindingConditionASTNode * new_OptionalBindingConditionASTNode(PatternAST ConditionASTNode * new_ConditionASTNode(ExpressionASTNode *expression, OptionalBindingConditionASTNode *optionalBindingCondition); ElseClauseASTNode * new_ElseClauseASTNode(IfStatementASTNode *ifStatement, BlockASTNode *body, bool isElseIf); IfStatementASTNode * new_IfStatementASTNode(ConditionASTNode *condition, BlockASTNode *body, ElseClauseASTNode *elseClause); +WhileStatementASTNode * new_WhileStatementASTNode(ConditionASTNode *condition, BlockASTNode *body); // TODO: Add more AST node constructors diff --git a/src/compiler/parser/ASTNodes.c b/src/compiler/parser/ASTNodes.c index 41be52d..804949b 100644 --- a/src/compiler/parser/ASTNodes.c +++ b/src/compiler/parser/ASTNodes.c @@ -178,6 +178,15 @@ IfStatementASTNode * new_IfStatementASTNode( node->elseClause = elseClause; return node; } + + +WhileStatementASTNode * new_WhileStatementASTNode( + ConditionASTNode *condition, + BlockASTNode *body +) { + prepare_node_of(WhileStatementASTNode, NODE_WHILE_STATEMENT) + node->condition = condition; + node->body = body; return node; } diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index f5a36ad..455618a 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -23,6 +23,7 @@ ParserResult __Parser_parseOptionalBindingCondition(Parser *parser); ParserResult __Parser_parseCondition(Parser *parser); ParserResult __Parser_parseElseClause(Parser *parser); ParserResult __Parser_parseIfStatement(Parser *parser); +ParserResult __Parser_parseWhileStatement(Parser *parser); /* Definitions of public functions */ @@ -128,6 +129,11 @@ ParserResult __Parser_parseStatement(Parser *parser) { return ParserSuccess(ifResult.node); } + if(peek.token->kind == TOKEN_WHILE) { + ParserResult whileResult = __Parser_parseWhileStatement(parser); + if(!whileResult.success) return whileResult; + return ParserSuccess(whileResult.node); + } return ParserNoMatch(); } @@ -497,6 +503,40 @@ ParserResult __Parser_parseIfStatement(Parser *parser) { return ParserSuccess(ifStatement); } + +ParserResult __Parser_parseWhileStatement(Parser *parser) { + assertf(parser != NULL); + + // skip while keyword + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + // look more into this + if(peek.token->kind == TOKEN_LEFT_BRACE) { + return ParserError( + String_fromFormat("missing condition in 'while' statement"), + Array_fromArgs(1, peek.token)); + } + + if(peek.token->type == TOKEN_EOF) { + return ParserError( + String_fromFormat("expected expression, var, or let in 'while' condition"), + Array_fromArgs(1, peek.token)); + } + + ParserResult conditionResult = __Parser_parseCondition(parser); + if(!conditionResult.success) return conditionResult; + + ParserResult blockResult = __Parser_parseBlock(parser, true); + if(!blockResult.success) return blockResult; + + WhileStatementASTNode *whileStatement = new_WhileStatementASTNode((ConditionASTNode*)conditionResult.node, (BlockASTNode*)blockResult.node); + + return ParserSuccess(whileStatement); +} /* How to walk/traverse parsed AST or decide what kind of node the ASTNode * pointer refers to in general? */ From 2444dd0d413b8f80dcc8df94e38a67a887369095 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Sat, 21 Oct 2023 16:51:01 +0200 Subject: [PATCH 08/69] Add return statement parsing --- src/compiler/parser/Parser.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index 455618a..96506b5 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -24,6 +24,7 @@ ParserResult __Parser_parseCondition(Parser *parser); ParserResult __Parser_parseElseClause(Parser *parser); ParserResult __Parser_parseIfStatement(Parser *parser); ParserResult __Parser_parseWhileStatement(Parser *parser); +ParserResult __Parser_parseReturnStatement(Parser *parser); /* Definitions of public functions */ @@ -134,6 +135,12 @@ ParserResult __Parser_parseStatement(Parser *parser) { if(!whileResult.success) return whileResult; return ParserSuccess(whileResult.node); } + + if(peek.token->kind == TOKEN_RETURN) { + ParserResult returnResult = __Parser_parseReturnStatement(parser); + if(!returnResult.success) return returnResult; + return ParserSuccess(returnResult.node); + } return ParserNoMatch(); } @@ -537,6 +544,30 @@ ParserResult __Parser_parseWhileStatement(Parser *parser) { return ParserSuccess(whileStatement); } + +ParserResult __Parser_parseReturnStatement(Parser *parser) { + assertf(parser != NULL); + + // skip return keyword + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + ExpressionASTNode *expression = NULL; + + if(peek.token->type != TOKEN_EOF) { + ParserResult expressionResult = __Parser_parseExpression(parser); + if(!expressionResult.success) return expressionResult; + expression = (ExpressionASTNode*)expressionResult.node; + } + + ReturnStatementASTNode *returnStatement = new_ReturnStatementASTNode((ExpressionASTNode*)expression); + + return ParserSuccess(returnStatement); +} /* How to walk/traverse parsed AST or decide what kind of node the ASTNode * pointer refers to in general? */ From 81e315efc7ca60f70dda8c8dbb666a162891076c Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Sat, 21 Oct 2023 16:51:17 +0200 Subject: [PATCH 09/69] Add function call parsing --- include/compiler/parser/ASTNodes.h | 24 +++++--- src/compiler/parser/ASTNodes.c | 38 ++++++++---- src/compiler/parser/Parser.c | 98 ++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 20 deletions(-) diff --git a/include/compiler/parser/ASTNodes.h b/include/compiler/parser/ASTNodes.h index 6d70f1e..fead1ba 100644 --- a/include/compiler/parser/ASTNodes.h +++ b/include/compiler/parser/ASTNodes.h @@ -19,6 +19,8 @@ enum ASTNodeType { NODE_PARAMETER_LIST, NODE_FUNCTION_DECLARATION, NODE_ARGUMENT, + NODE_ARGUMENT_LIST, + NODE_FUNCTION_CALL, NODE_IF_STATEMENT, NODE_ELSE_CLAUSE, NODE_PATTERN, @@ -89,12 +91,6 @@ typedef struct ParameterListASTNode { Array /**/ *parameters; } ParameterListASTNode; -typedef struct ArgumentASTNode { - enum ASTNodeType _type; - ExpressionASTNode *expression; - IdentifierASTNode *label; -} ArgumentASTNode; - typedef struct FunctionDeclarationASTNode { enum ASTNodeType _type; IdentifierASTNode *id; @@ -103,10 +99,21 @@ typedef struct FunctionDeclarationASTNode { BlockASTNode *body; } FunctionDeclarationASTNode; +typedef struct ArgumentASTNode { + enum ASTNodeType _type; + ExpressionASTNode *expression; + IdentifierASTNode *label; +} ArgumentASTNode; + +typedef struct ArgumentListASTNode { + enum ASTNodeType _type; + Array /**/ *arguments; +} ArgumentListASTNode; + typedef struct FunctionCallASTNode { enum ASTNodeType _type; IdentifierASTNode *id; - Array /**/ *arguments; + ArgumentListASTNode* argumentList; } FunctionCallASTNode; typedef struct PatternASTNode { @@ -162,6 +169,9 @@ ReturnStatementASTNode * new_ReturnStatementASTNode(ExpressionASTNode *expressio ParameterASTNode * new_ParameterASTNode(IdentifierASTNode *id, TypeReferenceASTNode *type, ExpressionASTNode *initializer, IdentifierASTNode *externalName, bool isLabeless); ParameterListASTNode * new_ParameterListASTNode(Array *parameters); FunctionDeclarationASTNode * new_FunctionDeclarationASTNode(IdentifierASTNode *id, ParameterListASTNode *parameterList, TypeReferenceASTNode *returnType, BlockASTNode *body); +ArgumentASTNode * new_ArgumentASTNode(ExpressionASTNode *expression, IdentifierASTNode *label); +ArgumentListASTNode * new_ArgumentListASTNode(Array * arguments); +FunctionCallASTNode * new_FunctionCallASTNode(IdentifierASTNode *id, ArgumentListASTNode *argumentList); PatternASTNode * new_PatternASTNode(IdentifierASTNode *name, TypeReferenceASTNode *type); OptionalBindingConditionASTNode * new_OptionalBindingConditionASTNode(PatternASTNode *pattern, ExpressionASTNode *initializer, bool isConstant); ConditionASTNode * new_ConditionASTNode(ExpressionASTNode *expression, OptionalBindingConditionASTNode *optionalBindingCondition); diff --git a/src/compiler/parser/ASTNodes.c b/src/compiler/parser/ASTNodes.c index 804949b..da7d55b 100644 --- a/src/compiler/parser/ASTNodes.c +++ b/src/compiler/parser/ASTNodes.c @@ -93,17 +93,7 @@ ParameterListASTNode * new_ParameterListASTNode( return node; } -ArgumentASTNode* new_ArgumentASTNode( - ExpressionASTNode *expression, - IdentifierASTNode *label -) { - prepare_node_of(ArgumentASTNode, NODE_ARGUMENT) - node->label = label; - node->expression = expression; - return node; -} - -FunctionDeclarationASTNode* new_FunctionDeclarationASTNode( +FunctionDeclarationASTNode * new_FunctionDeclarationASTNode( IdentifierASTNode *id, ParameterListASTNode *parameterList, TypeReferenceASTNode *returnType, @@ -117,12 +107,34 @@ FunctionDeclarationASTNode* new_FunctionDeclarationASTNode( return node; } -FunctionCallASTNode* new_FunctionCallASTNode( - IdentifierASTNode *id, +ArgumentASTNode * new_ArgumentASTNode( + ExpressionASTNode *expression, + IdentifierASTNode *label +) { + prepare_node_of(ArgumentASTNode, NODE_ARGUMENT) + node->label = label; + node->expression = expression; + return node; +} + +ArgumentListASTNode * new_ArgumentListASTNode( Array *arguments +) { + prepare_node_of(ArgumentListASTNode, NODE_ARGUMENT_LIST) + node->arguments = arguments; + return node; +} + +FunctionCallASTNode * new_FunctionCallASTNode( + IdentifierASTNode *id, + ArgumentListASTNode *argumentList ) { prepare_node_of(FunctionCallASTNode, NODE_FUNCTION_CALL) node->id = id; + node->argumentList = argumentList; + return node; +} + PatternASTNode * new_PatternASTNode( IdentifierASTNode *name, TypeReferenceASTNode *type diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index 96506b5..9c03a2a 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -25,6 +25,9 @@ ParserResult __Parser_parseElseClause(Parser *parser); ParserResult __Parser_parseIfStatement(Parser *parser); ParserResult __Parser_parseWhileStatement(Parser *parser); ParserResult __Parser_parseReturnStatement(Parser *parser); +ParserResult __Parser_parseArgument(Parser *parser); +ParserResult __Parser_parseArgumentList(Parser *parser); +ParserResult __Parser_parseFunctionCallExpression(Parser *parser); /* Definitions of public functions */ @@ -141,6 +144,18 @@ ParserResult __Parser_parseStatement(Parser *parser) { if(!returnResult.success) return returnResult; return ParserSuccess(returnResult.node); } + + if(peek.token->type == TOKEN_IDENTIFIER) { + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + // function call + if(peek.token->kind == TOKEN_LEFT_PAREN) { + ParserResult functionCallResult = __Parser_parseFunctionCallExpression(parser); + if(!functionCallResult.success) return functionCallResult; + return ParserSuccess(functionCallResult.node); + } + } return ParserNoMatch(); } @@ -568,6 +583,89 @@ ParserResult __Parser_parseReturnStatement(Parser *parser) { return ParserSuccess(returnStatement); } + +ParserResult __Parser_parseArgument(Parser *parser) { + assertf(parser != NULL); + + IdentifierASTNode *argumentLabel = NULL; + ExpressionASTNode *expression = NULL; + + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + // labeled argument + if(result.token->type == TOKEN_IDENTIFIER && peek.token->kind == TOKEN_COLON) { + argumentLabel = new_IdentifierASTNode(result.token->value.string); + // Skip the ':' token + LexerResult tmp = Lexer_nextToken(parser->lexer); + if(!tmp.success) return LexerToParserError(result); + } + + ParserResult expressionResult = __Parser_parseExpression(parser); + if(!expressionResult.success) return expressionResult; + expression = (ExpressionASTNode*)expressionResult.node; + + ArgumentASTNode *argument = new_ArgumentASTNode(expression, argumentLabel); + + return ParserSuccess(argument); +} + +ParserResult __Parser_parseArgumentList(Parser *parser) { + assertf(parser != NULL); + + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + // parse argument-list + Array *arguments = Array_alloc(0); + while(true) { + ParserResult argumentResult = __Parser_parseArgument(parser); + if(!argumentResult.success) return argumentResult; + + Array_push(arguments, (ArgumentASTNode*)argumentResult.node); + + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + + if(!peek.success) return LexerToParserError(result); + + if(peek.token->kind == TOKEN_COMMA) { + result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + } + + peek = Lexer_peekToken(parser->lexer, 1); + if(peek.token->kind == TOKEN_RIGHT_PAREN) { + result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + break; + } + } + ArgumentListASTNode *argumentList = new_ArgumentListASTNode(arguments); + + return ParserSuccess(argumentList); + +} + +ParserResult __Parser_parseFunctionCallExpression(Parser *parser) { + assertf(parser != NULL); + + // identifier + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + IdentifierASTNode *funcId = new_IdentifierASTNode(result.token->value.string); + + ParserResult argumentListResult = __Parser_parseArgumentList(parser); + if(!argumentListResult.success) return argumentListResult; + + FunctionCallASTNode *fuctionCallExpression = new_FunctionCallASTNode(funcId, (ArgumentListASTNode*)argumentListResult.node); + + return ParserSuccess(fuctionCallExpression); +} /* How to walk/traverse parsed AST or decide what kind of node the ASTNode * pointer refers to in general? */ From 73027689eb65a9cb2c180216e87981efdb9225de Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Sun, 22 Oct 2023 20:27:08 +0200 Subject: [PATCH 10/69] Add variable declaration parsing --- include/compiler/parser/ASTNodes.h | 30 +++++++--- src/compiler/parser/ASTNodes.c | 25 +++++++-- src/compiler/parser/Parser.c | 90 ++++++++++++++++++++++++++---- 3 files changed, 123 insertions(+), 22 deletions(-) diff --git a/include/compiler/parser/ASTNodes.h b/include/compiler/parser/ASTNodes.h index fead1ba..c2f1ed3 100644 --- a/include/compiler/parser/ASTNodes.h +++ b/include/compiler/parser/ASTNodes.h @@ -13,6 +13,8 @@ enum ASTNodeType { NODE_IDENTIFIER, NODE_TYPE_REFERENCE, NODE_VARIABLE_DECLARATION, + NODE_VARIABLE_DECLARATION_LIST, + NODE_VARIABLE_DECLARATOR, NODE_EXPRESSION_STATEMENT, NODE_RETURN_STATEMENT, NODE_PARAMETER, @@ -26,7 +28,7 @@ enum ASTNodeType { NODE_PATTERN, NODE_CONDITION, NODE_OPTIONAL_BINDING_CONDITION, - NODE_WHILE_STATEMENT, + NODE_WHILE_STATEMENT }; @@ -60,11 +62,21 @@ typedef struct TypeReferenceASTNode { bool isNullable; } TypeReferenceASTNode; +typedef struct VariableDeclaratorASTNode { + enum ASTNodeType _type; + struct PatternASTNode *pattern; + ExpressionASTNode *initializer; +} VariableDeclaratorASTNode; + +typedef struct VariableDeclarationListASTNode { + enum ASTNodeType _type; + Array /**/ *declarators; +} VariableDeclarationListASTNode; + typedef struct VariableDeclarationASTNode { enum ASTNodeType _type; - IdentifierASTNode *id; - TypeReferenceASTNode *type; - bool isConstant; + VariableDeclarationListASTNode *declaratorList; + bool isConstant; } VariableDeclarationASTNode; typedef struct ExpressionStatementASTNode { @@ -113,7 +125,7 @@ typedef struct ArgumentListASTNode { typedef struct FunctionCallASTNode { enum ASTNodeType _type; IdentifierASTNode *id; - ArgumentListASTNode* argumentList; + ArgumentListASTNode *argumentList; } FunctionCallASTNode; typedef struct PatternASTNode { @@ -126,7 +138,7 @@ typedef struct OptionalBindingConditionASTNode { enum ASTNodeType _type; PatternASTNode *pattern; ExpressionASTNode *initializer; - bool isConstant; + bool isConstant; } OptionalBindingConditionASTNode; typedef struct ConditionASTNode { @@ -164,13 +176,15 @@ ProgramASTNode * new_ProgramASTNode(BlockASTNode *block); BlockASTNode * new_BlockASTNode(Array *statements); IdentifierASTNode * new_IdentifierASTNode(String *name); TypeReferenceASTNode * new_TypeReferenceASTNode(IdentifierASTNode *id, bool isNullable); -VariableDeclarationASTNode * new_VariableDeclarationASTNode(IdentifierASTNode *id, TypeReferenceASTNode *type, bool isConstant); +VariableDeclarationASTNode * new_VariableDeclarationASTNode(VariableDeclarationListASTNode *declaratorList, bool isConstant); +VariableDeclaratorASTNode * new_VariableDeclaratorASTNode(PatternASTNode *pattern, ExpressionASTNode *initializer); +VariableDeclarationListASTNode * new_VariableDeclarationListASTNode(Array *declarators); ReturnStatementASTNode * new_ReturnStatementASTNode(ExpressionASTNode *expression); ParameterASTNode * new_ParameterASTNode(IdentifierASTNode *id, TypeReferenceASTNode *type, ExpressionASTNode *initializer, IdentifierASTNode *externalName, bool isLabeless); ParameterListASTNode * new_ParameterListASTNode(Array *parameters); FunctionDeclarationASTNode * new_FunctionDeclarationASTNode(IdentifierASTNode *id, ParameterListASTNode *parameterList, TypeReferenceASTNode *returnType, BlockASTNode *body); ArgumentASTNode * new_ArgumentASTNode(ExpressionASTNode *expression, IdentifierASTNode *label); -ArgumentListASTNode * new_ArgumentListASTNode(Array * arguments); +ArgumentListASTNode * new_ArgumentListASTNode(Array *arguments); FunctionCallASTNode * new_FunctionCallASTNode(IdentifierASTNode *id, ArgumentListASTNode *argumentList); PatternASTNode * new_PatternASTNode(IdentifierASTNode *name, TypeReferenceASTNode *type); OptionalBindingConditionASTNode * new_OptionalBindingConditionASTNode(PatternASTNode *pattern, ExpressionASTNode *initializer, bool isConstant); diff --git a/src/compiler/parser/ASTNodes.c b/src/compiler/parser/ASTNodes.c index da7d55b..7b5f8f3 100644 --- a/src/compiler/parser/ASTNodes.c +++ b/src/compiler/parser/ASTNodes.c @@ -41,18 +41,35 @@ TypeReferenceASTNode * new_TypeReferenceASTNode( return node; } +VariableDeclaratorASTNode * new_VariableDeclaratorASTNode( + PatternASTNode *pattern, + ExpressionASTNode *initializer +) { + prepare_node_of(VariableDeclaratorASTNode, NODE_VARIABLE_DECLARATOR) + node->pattern = pattern; + node->initializer = initializer; + return node; +} + +VariableDeclarationListASTNode * new_VariableDeclarationListASTNode( + Array *declarators +) { + prepare_node_of(VariableDeclarationListASTNode, NODE_VARIABLE_DECLARATION_LIST) + node->declarators = declarators; + return node; +} + VariableDeclarationASTNode * new_VariableDeclarationASTNode( - IdentifierASTNode *id, - TypeReferenceASTNode *type, + VariableDeclarationListASTNode *declaratorList, bool isConstant ) { prepare_node_of(VariableDeclarationASTNode, NODE_VARIABLE_DECLARATION) - node->id = id; - node->type = type; + node->declaratorList = declaratorList; node->isConstant = isConstant; return node; } + ExpressionStatementASTNode * new_ExpressionStatementASTNode( ExpressionASTNode *expression ) { diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index 9c03a2a..e56dac5 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -25,6 +25,9 @@ ParserResult __Parser_parseElseClause(Parser *parser); ParserResult __Parser_parseIfStatement(Parser *parser); ParserResult __Parser_parseWhileStatement(Parser *parser); ParserResult __Parser_parseReturnStatement(Parser *parser); +ParserResult __Parser_parseVariableDeclarator(Parser *parser); +ParserResult __Parser_parseVariableDeclarationList(Parser *parser); +ParserResult __Parser_parseVariableDeclarationStatement(Parser *parser); ParserResult __Parser_parseArgument(Parser *parser); ParserResult __Parser_parseArgumentList(Parser *parser); ParserResult __Parser_parseFunctionCallExpression(Parser *parser); @@ -145,17 +148,12 @@ ParserResult __Parser_parseStatement(Parser *parser) { return ParserSuccess(returnResult.node); } - if(peek.token->type == TOKEN_IDENTIFIER) { - LexerResult peek = Lexer_peekToken(parser->lexer, 1); - if(!peek.success) return LexerToParserError(peek); - - // function call - if(peek.token->kind == TOKEN_LEFT_PAREN) { - ParserResult functionCallResult = __Parser_parseFunctionCallExpression(parser); - if(!functionCallResult.success) return functionCallResult; - return ParserSuccess(functionCallResult.node); - } + if(peek.token->kind == TOKEN_LET || peek.token->kind == TOKEN_VAR) { + ParserResult variableDeclarationResult = __Parser_parseVariableDeclarationStatement(parser); + if(!variableDeclarationResult.success) return variableDeclarationResult; + return ParserSuccess(variableDeclarationResult.node); } + return ParserNoMatch(); } @@ -584,6 +582,78 @@ ParserResult __Parser_parseReturnStatement(Parser *parser) { return ParserSuccess(returnStatement); } + +ParserResult __Parser_parseVariableDeclarator(Parser *parser) { + assertf(parser != NULL); + + ParserResult patternResult = __Parser_parsePattern(parser); + if(!patternResult.success) return patternResult; + + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + ExpressionASTNode *initializer = NULL; + + if(peek.token->kind == TOKEN_EQUAL) { + ParserResult initializerResult = __Parser_parseExpression(parser); + if(!initializerResult.success) return initializerResult; + initializer = initializerResult.node; + } + + VariableDeclaratorASTNode *variableDeclarator = new_VariableDeclaratorASTNode((PatternASTNode*)patternResult.node, (ExpressionASTNode*)initializer); + + return ParserSuccess(variableDeclarator); +} + +ParserResult __Parser_parseVariableDeclarationList(Parser *parser) { + assertf(parser != NULL); + LexerResult peek; + LexerResult result; + + + Array *declarators = Array_alloc(0); + while(true) { + ParserResult declaratorResult = __Parser_parseVariableDeclarator(parser); + if(!declaratorResult.success) return declaratorResult; + + Array_push(declarators, (VariableDeclaratorASTNode*)declaratorResult.node); + + peek = Lexer_peekToken(parser->lexer, 1); + + if(peek.token->kind == TOKEN_COMMA) { + result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + } + + peek = Lexer_peekToken(parser->lexer, 1); + if(peek.token->type == TOKEN_EOF) { + result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + break; + } + } + + VariableDeclarationListASTNode *variableDeclarationList = new_VariableDeclarationListASTNode(declarators); + + return ParserSuccess(variableDeclarationList); +} + +ParserResult __Parser_parseVariableDeclarationStatement(Parser *parser) { + assertf(parser != NULL); + + // let/var + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + bool isConstant = result.token->kind == TOKEN_LET; + + ParserResult declarationList = __Parser_parseVariableDeclarationList(parser); + if(!declarationList.success) return declarationList; + + VariableDeclarationASTNode *variableDeclaration = new_VariableDeclarationASTNode((VariableDeclarationListASTNode*)declarationList.node, isConstant); + return ParserSuccess(variableDeclaration); +} + ParserResult __Parser_parseArgument(Parser *parser) { assertf(parser != NULL); From 22abfe26fa521706f784ec6fba2f8be42508f05e Mon Sep 17 00:00:00 2001 From: Korunka1 Date: Fri, 27 Oct 2023 22:37:04 +0200 Subject: [PATCH 11/69] Add new files Expressions.h and Expressions.c --- include/compiler/parser/Expressions.h | 40 ++++++ src/compiler/parser/Expressions.c | 178 ++++++++++++++++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 include/compiler/parser/Expressions.h create mode 100644 src/compiler/parser/Expressions.c diff --git a/include/compiler/parser/Expressions.h b/include/compiler/parser/Expressions.h new file mode 100644 index 0000000..c70dc9d --- /dev/null +++ b/include/compiler/parser/Expressions.h @@ -0,0 +1,40 @@ +#include "compiler/lexer/Token.h" + +#ifndef EXPRESSIONS_H +#define EXPRESSIONS_H + +enum PrecTableRelation { + S, // Shift + R, // Reduce + E, // Equal + X // Error +}; + +enum PrecTableIndex { + I_PLUS_MINUS, + I_MUL_DIV, + I_EXCL_OP, + I_NIL_COALES, + I_REL_OP, + I_ID, + I_LEFT_BRAC, + I_RIGHT_BRAC, + I_DOLLAR +}; + +typedef enum { + S_BOTTOM, + S_STOP, + S_TERMINAL, + S_NONTERMINAL +}StackItemType; + +typedef struct StackItem { + Token *token; + StackItemType Stype; +} StackItem; + +#endif + + +//LexerResult current = Lexer_nextToken(parser->lexer); \ No newline at end of file diff --git a/src/compiler/parser/Expressions.c b/src/compiler/parser/Expressions.c new file mode 100644 index 0000000..d1f2a23 --- /dev/null +++ b/src/compiler/parser/Expressions.c @@ -0,0 +1,178 @@ +#include "compiler/parser/Parser.h" + +#include + +#include "assertf.h" +//#include "internal/Array.h" +//#include "compiler/parser/ASTNodes.h" +//#include "compiler/lexer/Lexer.h" +#include "compiler/parser/Expressions.h" +#include "compiler/lexer/Token.h" +#include "internal/Array.h" +#include "allocator/MemoryAllocator.c" + +#define TAB_SIZE 9 +#define STACK_SIZE 20 + +int precedence_table[TAB_SIZE][TAB_SIZE] = //[stack top terminal][input token] +{ + // +-|*/| ! |??|r |i |( |) |$ + {R, S, S, R, R, S, S, R, R}, // +- + {R, R, S, R, R, S, S, R, R}, // */ + {R, R, R, R, R, S, S, R, R}, // ! + {S, S, S, S, S, S, S, R, R}, // ?? + {S, S, S, R, R, S, S, R, R}, // r (==, !=, <, >, <=, >=) + {R, R, R, R, R, X, X, R, R}, // i + {S, S, S, S, S, S, S, E, X}, // ( + {R, R, R, R, R, X, X, R, R}, // ) + {S, S, S, S, S, S, S, X, X} // $ + +}; + +int Expr_getPrecTbIndex(Token *token){ + + switch (token->kind){ + case TOKEN_PLUS: + case TOKEN_MINUS: + return I_PLUS_MINUS; + + case TOKEN_STAR: + case TOKEN_SLASH: + return I_MUL_DIV; + + case TOKEN_EXCLAMATION: + return I_EXCL_OP; + + case TOKEN_NULL_COALESCING: + return I_NIL_COALES; + + case TOKEN_EQUALITY: + case TOKEN_NOT_EQUALITY: + case TOKEN_LESS: + case TOKEN_GREATER: + case TOKEN_LESS_EQUAL: + case TOKEN_GREATER_EQUAL: + return I_REL_OP; + + case TOKEN_LEFT_BRACE: + return I_LEFT_BRAC; + + case TOKEN_RIGHT_BRACE: + return I_RIGHT_BRAC; + + case TOKEN_DEFAULT: + if(token->type == TOKEN_IDENTIFIER){ + return I_ID; + } //else? + case TOKEN_STRING: + case TOKEN_INTEGER: + case TOKEN_FLOATING: + case TOKEN_NIL: + return I_ID; + + default: + return I_DOLLAR; + } +} + +StackItem Expr_getTopTerminal(Array *stack){ + StackItem *top; + for(int i = 0; i < stack->size; i++){ + if((top = Array_get(stack, stack->size - i))->Stype == S_TERMINAL){ + return top; + } + } + // else error ? +} + +bool Expr_typecheck(Array *stack){ + +} + +StackItem Expr_performReduction(Array *stack){ + if (stack->size == 2){ + StackItem *operator = Array_get(stack, 1); + switch (operator->token->kind){ + case TOKEN_PLUS: + case TOKEN_MINUS: + case TOKEN_STAR: + case TOKEN_SLASH: + if(;){//call typecheck + ;//call make operation + } + else return NULL; + + case TOKEN_EQUALITY: + case TOKEN_NOT_EQUALITY: + case TOKEN_LESS: + case TOKEN_GREATER: + case TOKEN_LESS_EQUAL: + case TOKEN_GREATER_EQUAL: + if(;){//call typecheck + ;//call make operation + } + else return NULL; + + + default: + break; + } + + } +} + +ParserResult __Parser_parseExpression(Parser *parser) { + assertf(parser != NULL); + + Array *stack = Array_alloc(STACK_SIZE); + Array *reduceStack = Array_alloc(STACK_SIZE); + Token *token = NULL; + StackItem *bottom = mem_alloc(sizeof(StackItem)); + StackItem *stopReduction = mem_alloc(sizeof(StackItem)); + stopReduction->Stype = S_STOP; + bottom->Stype = S_BOTTOM; + Array_push(stack, bottom); + + LexerResult current = Lexer_nextToken(parser->lexer); + + while(true){ + + if(!current.success) return LexerToParserError(current); + int operation = precedence_table[Expr_getPrecTbIndex(Expr_getTopTerminal(stack).token)][Expr_getPrecTbIndex(current.token)]; + + switch(operation){ + case S: + StackItem *token = mem_alloc(sizeof(StackItem)); + token->Stype = S_TERMINAL; + token->token = current.token; + Array_push(stack, stopReduction); + Array_push(stack, token); + current = Lexer_nextToken(parser->lexer); //better check first with peekToken + break; + case R: + while((token = Array_pop(stack))->Stype != S_STOP){ + if(token->kind != S_STOP){ + Array_push(reduceStack, token); + } + } + //perform reduction + //push result on stack (nonterminal) + break; + case E: + StackItem *token = mem_alloc(sizeof(StackItem)); + token->Stype = S_TERMINAL; + token->token = current.token; + Array_push(stack, token); + break; + case X: + //call error + break; + default: + break; + + + } + } + + return ParserNoMatch(); +} \ No newline at end of file From 07b3fc9edb75f86b8e495ae0926860517493847a Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Sat, 28 Oct 2023 16:13:27 +0200 Subject: [PATCH 12/69] Add assignment statement --- include/compiler/parser/ASTNodes.h | 10 +++++++++- src/compiler/parser/ASTNodes.c | 11 ++++++++++- src/compiler/parser/Parser.c | 30 ++++++++++++++++++++++++++++++ src/compiler/parser/grammar.md | 5 +++++ 4 files changed, 54 insertions(+), 2 deletions(-) diff --git a/include/compiler/parser/ASTNodes.h b/include/compiler/parser/ASTNodes.h index c2f1ed3..382a231 100644 --- a/include/compiler/parser/ASTNodes.h +++ b/include/compiler/parser/ASTNodes.h @@ -28,7 +28,8 @@ enum ASTNodeType { NODE_PATTERN, NODE_CONDITION, NODE_OPTIONAL_BINDING_CONDITION, - NODE_WHILE_STATEMENT + NODE_WHILE_STATEMENT, + NODE_ASSIGNMENT_STATEMENT }; @@ -167,6 +168,12 @@ typedef struct WhileStatementASTNode { BlockASTNode *body; } WhileStatementASTNode; +typedef struct AssignmentStatementASTNode { + enum ASTNodeType _type; + IdentifierASTNode *id; + ExpressionASTNode *assignment; +} AssignmentStatementASTNode; + // TODO: Add more AST nodes @@ -192,6 +199,7 @@ ConditionASTNode * new_ConditionASTNode(ExpressionASTNode *expression, OptionalB ElseClauseASTNode * new_ElseClauseASTNode(IfStatementASTNode *ifStatement, BlockASTNode *body, bool isElseIf); IfStatementASTNode * new_IfStatementASTNode(ConditionASTNode *condition, BlockASTNode *body, ElseClauseASTNode *elseClause); WhileStatementASTNode * new_WhileStatementASTNode(ConditionASTNode *condition, BlockASTNode *body); +AssignmentStatementASTNode * new_AssignmentStatementASTNode(IdentifierASTNode *id, ExpressionASTNode *assignment); // TODO: Add more AST node constructors diff --git a/src/compiler/parser/ASTNodes.c b/src/compiler/parser/ASTNodes.c index 7b5f8f3..6bcae56 100644 --- a/src/compiler/parser/ASTNodes.c +++ b/src/compiler/parser/ASTNodes.c @@ -208,7 +208,6 @@ IfStatementASTNode * new_IfStatementASTNode( return node; } - WhileStatementASTNode * new_WhileStatementASTNode( ConditionASTNode *condition, BlockASTNode *body @@ -219,6 +218,16 @@ WhileStatementASTNode * new_WhileStatementASTNode( return node; } +AssignmentStatementASTNode * new_AssignmentStatementASTNode( + IdentifierASTNode *id, + ExpressionASTNode *assignment +) { + prepare_node_of(AssignmentStatementASTNode, NODE_ASSIGNMENT_STATEMENT) + node->id = id; + node->assignment = assignment; + return node; +} + // TODO: Add more ASTNode constructors diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index e56dac5..faecb98 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -31,6 +31,7 @@ ParserResult __Parser_parseVariableDeclarationStatement(Parser *parser); ParserResult __Parser_parseArgument(Parser *parser); ParserResult __Parser_parseArgumentList(Parser *parser); ParserResult __Parser_parseFunctionCallExpression(Parser *parser); +ParserResult __Parser_parseAssignmentStatement(Parser *parser); /* Definitions of public functions */ @@ -154,6 +155,16 @@ ParserResult __Parser_parseStatement(Parser *parser) { return ParserSuccess(variableDeclarationResult.node); } + if(peek.token->type == TOKEN_IDENTIFIER) { + LexerResult tmp = Lexer_peekToken(parser->lexer, 1); + if(!tmp.success) return LexerToParserError(tmp); + if(tmp.token->kind == TOKEN_EQUAL) { + ParserResult assignmentStatementResult = __Parser_parseAssignmentStatement(parser); + if(!assignmentStatementResult.success) return assignmentStatementResult; + return ParserSuccess(assignmentStatementResult.node); + } + } + return ParserNoMatch(); } @@ -736,6 +747,25 @@ ParserResult __Parser_parseFunctionCallExpression(Parser *parser) { return ParserSuccess(fuctionCallExpression); } + +ParserResult __Parser_parseAssignmentStatement(Parser *parser) { + assertf(parser != NULL); + + // identifier + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + IdentifierASTNode *variableId = new_IdentifierASTNode(result.token->value.string); + + // skip '=' + result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + ParserResult assignmentResult = __Parser_parseExpression(parser); + if(!assignmentResult.success) return assignmentResult; + + AssignmentStatementASTNode *assignmentStatement = new_AssignmentStatementASTNode(variableId, (ExpressionASTNode*)assignmentResult.node); + return ParserSuccess(assignmentStatement); +} /* How to walk/traverse parsed AST or decide what kind of node the ASTNode * pointer refers to in general? */ diff --git a/src/compiler/parser/grammar.md b/src/compiler/parser/grammar.md index 91459ee..866cc24 100644 --- a/src/compiler/parser/grammar.md +++ b/src/compiler/parser/grammar.md @@ -94,7 +94,12 @@ argument → expression
argument-name → identifier
+## assigment statement + +assignment-statement → variable-name assignment-expression
+ ## variable declaration + variable-declaration → variable-head variable-declaration-list?
variable-head → `let` | `var`
variable-name → identifier
From 613f314cad2e1bfaa82e8e9112f3f65b4efa8ec4 Mon Sep 17 00:00:00 2001 From: Korunka1 Date: Sun, 29 Oct 2023 19:08:44 +0100 Subject: [PATCH 13/69] Add new AST nodes, functions for creating them, expression parsing without error and end handling --- include/compiler/parser/ASTNodes.h | 52 ++++++++++- include/compiler/parser/Expressions.h | 2 + src/compiler/parser/ASTNodes.c | 32 +++++++ src/compiler/parser/Expressions.c | 130 ++++++++++++++++++++------ 4 files changed, 186 insertions(+), 30 deletions(-) diff --git a/include/compiler/parser/ASTNodes.h b/include/compiler/parser/ASTNodes.h index 0d82368..1900c40 100644 --- a/include/compiler/parser/ASTNodes.h +++ b/include/compiler/parser/ASTNodes.h @@ -2,6 +2,7 @@ #include "internal/Array.h" #include "internal/String.h" +#include "compiler/lexer/Token.h" #ifndef ASTNode_H #define ASTNode_H @@ -19,7 +20,26 @@ enum ASTNodeType { NODE_PARAMETER_LIST, NODE_FUNCTION_DECLARATION, NODE_ARGUMENT, - NODE_FUNCTION_CALL + NODE_FUNCTION_CALL, + NODE_BINARY_EXPRESSION, + NODE_UNARY_EXPRESSION, + NODE_LITERAL_EXPRESSION +}; + +enum OperatorType{ + OPERATOR_DEFAULT = 0, + OPERATOR_PLUS, + OPERATOR_MINUS, + OPERATOR_STAR, + OPERATOR_SLASH, + OPERATOR_EXCLAMATION, + OPERATOR_NULL_COALESCING, + OPERATOR_EQUAL, + OPERATOR_NOT_EQUAL, + OPERATOR_LESS, + OPERATOR_GREATER, + OPERATOR_LESS_EQUAL, + OPERATOR_GREATER_EQUAL }; @@ -103,6 +123,33 @@ typedef struct FunctionCallASTNode { Array /**/ *arguments; } FunctionCallASTNode; +//typedef struct ExpressionASTNode { +// enum ASTNodeType _type; +// BinaryExpressionASTNode *BExpression; +// UnaryExpressionASTNode *UExpression; +// LiteralExpressionASTNode *LExpression; +// IdentifierASTNode *IExpression; +//} ExpressionASTNode; + +typedef struct BinaryExpressionASTNode { + enum ASTNodeType _type; + ExpressionASTNode *left; + ExpressionASTNode *right; + enum OperatorType operator; +} BinaryExpressionASTNode; + +typedef struct UnaryExpressionASTNode { + enum ASTNodeType _type; + ExpressionASTNode *argument; + enum OperatorType operator; + //bool IsPrefix; + +} UnaryExpressionASTNode; + +typedef struct LiteralExpressionASTNode { + enum ASTNodeType _type; + union TokenValue value; +} LiteralExpressionASTNode; // TODO: Add more AST nodes @@ -119,6 +166,9 @@ ParameterListASTNode * new_ParameterListASTNode(Array *parameters); ArgumentASTNode * new_ArgumentASTNode(ExpressionASTNode *expression, IdentifierASTNode *label); FunctionDeclarationASTNode * new_FunctionDeclarationASTNode(IdentifierASTNode *id, ParameterListASTNode *parameterList, TypeReferenceASTNode *returnType, BlockASTNode *body); FunctionCallASTNode * new_FunctionCallASTNode(IdentifierASTNode *id, Array *arguments); +BinaryExpressionASTNode* new_BinaryExpressionASTNode(ExpressionASTNode *left, ExpressionASTNode *right, enum OperatorType operator); +UnaryExpressionASTNode* new_UnaryExpressionASTNode(ExpressionASTNode *argument, enum OperatorType operator /*, bool IsPrefix*/); +LiteralExpressionASTNode* new_LiteralExpressionASTNode(union TokenValue value); // TODO: Add more AST node constructors diff --git a/include/compiler/parser/Expressions.h b/include/compiler/parser/Expressions.h index c70dc9d..724e7be 100644 --- a/include/compiler/parser/Expressions.h +++ b/include/compiler/parser/Expressions.h @@ -1,4 +1,5 @@ #include "compiler/lexer/Token.h" +#include "compiler/parser/ASTNodes.h" #ifndef EXPRESSIONS_H #define EXPRESSIONS_H @@ -32,6 +33,7 @@ typedef enum { typedef struct StackItem { Token *token; StackItemType Stype; + ExpressionASTNode *node; } StackItem; #endif diff --git a/src/compiler/parser/ASTNodes.c b/src/compiler/parser/ASTNodes.c index c78d02e..f39e9b7 100644 --- a/src/compiler/parser/ASTNodes.c +++ b/src/compiler/parser/ASTNodes.c @@ -125,6 +125,38 @@ FunctionCallASTNode* new_FunctionCallASTNode( return node; } +BinaryExpressionASTNode* new_BinaryExpressionASTNode( + ExpressionASTNode *left, + ExpressionASTNode *right, + enum OperatorType operator +){ + prepare_node_of(BinaryExpressionASTNode, NODE_BINARY_EXPRESSION) + node->left = left; + node->right = right; + node->operator = operator; + return node; +} + +UnaryExpressionASTNode* new_UnaryExpressionASTNode( + ExpressionASTNode *argument, + enum OperatorType operator + //bool IsPrefix +){ + prepare_node_of(UnaryExpressionASTNode, NODE_UNARY_EXPRESSION) + node->argument = argument; + node->operator = operator; + //node->IsPrefix = IsPrefix; + return node; +} + +LiteralExpressionASTNode* new_LiteralExpressionASTNode( + union TokenValue value +){ + prepare_node_of(LiteralExpressionASTNode, NODE_LITERAL_EXPRESSION) + node->value = value; + return node; +} + // TODO: Add more ASTNode constructors diff --git a/src/compiler/parser/Expressions.c b/src/compiler/parser/Expressions.c index d1f2a23..f7d91c6 100644 --- a/src/compiler/parser/Expressions.c +++ b/src/compiler/parser/Expressions.c @@ -9,7 +9,8 @@ #include "compiler/parser/Expressions.h" #include "compiler/lexer/Token.h" #include "internal/Array.h" -#include "allocator/MemoryAllocator.c" +#include "allocator/MemoryAllocator.h" +#include "compiler/parser/ASTNodes.h" #define TAB_SIZE 9 #define STACK_SIZE 20 @@ -63,7 +64,7 @@ int Expr_getPrecTbIndex(Token *token){ case TOKEN_DEFAULT: if(token->type == TOKEN_IDENTIFIER){ return I_ID; - } //else? + } //else error case TOKEN_STRING: case TOKEN_INTEGER: case TOKEN_FLOATING: @@ -76,47 +77,114 @@ int Expr_getPrecTbIndex(Token *token){ } StackItem Expr_getTopTerminal(Array *stack){ - StackItem *top; + StackItem *top = NULL; for(int i = 0; i < stack->size; i++){ if((top = Array_get(stack, stack->size - i))->Stype == S_TERMINAL){ - return top; + return *top; } } - // else error ? + //else error } -bool Expr_typecheck(Array *stack){ - +void Expr_pushAfterTopTerminal(Array *stack){ + StackItem *stopReduction = mem_alloc(sizeof(StackItem)); + stopReduction->token = NULL; + stopReduction->Stype = S_STOP; + stopReduction->node = NULL; + for(int i = 0; i < stack->size; i++){ + if (((StackItem *)Array_get(stack, stack->size - i))->Stype == S_TERMINAL) { + Array_push(stack, stopReduction); + } + } } -StackItem Expr_performReduction(Array *stack){ +StackItem *Expr_performReduction(Array *stack){ + + // E -> i + if(stack->size == 0){ + StackItem *id = Array_get(stack, 0); + + if(id->token->type = TOKEN_LITERAL){ + LiteralExpressionASTNode *literalE = new_LiteralExpressionASTNode(id->token->value); + id->node = (ExpressionASTNode*) literalE; + id->Stype = S_NONTERMINAL; + return id; + } + if(id->token->type = TOKEN_IDENTIFIER){ + IdentifierASTNode *identifierE = new_IdentifierASTNode(id->token->value.string); //string or identifier? + id->node = (ExpressionASTNode*) identifierE; + id->Stype = S_NONTERMINAL; + return id; + } + } + + // E -> E! + if(stack->size == 1){ + StackItem *operator = Array_get(stack, 1); + StackItem *argument = Array_get(stack, 0); + if(operator->token->type == TOKEN_EXCLAMATION){ + UnaryExpressionASTNode *unaryE = new_UnaryExpressionASTNode(argument->node, OPERATOR_EXCLAMATION); + operator->node = (ExpressionASTNode*) unaryE; + operator->Stype = S_NONTERMINAL; + return operator; + } + } + + // Binary operations and parentheses if (stack->size == 2){ StackItem *operator = Array_get(stack, 1); + StackItem *leftOperand = Array_get(stack, 0); + StackItem *rightOperand = Array_get(stack, 2); + + // E -> (E) + if(operator->Stype == S_NONTERMINAL && leftOperand->token->kind == TOKEN_LEFT_PAREN && rightOperand->token->kind == TOKEN_RIGHT_PAREN){ + return operator; + } + + enum OperatorType operatorType = 0; switch (operator->token->kind){ case TOKEN_PLUS: + operatorType = OPERATOR_PLUS; + break; case TOKEN_MINUS: + operatorType = OPERATOR_MINUS; + break; case TOKEN_STAR: + operatorType = OPERATOR_STAR; + break; case TOKEN_SLASH: - if(;){//call typecheck - ;//call make operation - } - else return NULL; - + operatorType = OPERATOR_SLASH; + break; case TOKEN_EQUALITY: + operatorType = OPERATOR_EQUAL; + break; case TOKEN_NOT_EQUALITY: + operatorType = OPERATOR_NOT_EQUAL; + break; case TOKEN_LESS: + operatorType = OPERATOR_LESS; + break; case TOKEN_GREATER: + operatorType = OPERATOR_GREATER; + break; case TOKEN_LESS_EQUAL: - case TOKEN_GREATER_EQUAL: - if(;){//call typecheck - ;//call make operation - } - else return NULL; - - + operatorType = OPERATOR_LESS_EQUAL; + break; + case TOKEN_GREATER_EQUAL: + operatorType = OPERATOR_GREATER_EQUAL; + break; + case TOKEN_NULL_COALESCING: + operatorType = OPERATOR_NULL_COALESCING; + break; default: break; } + if(operatorType){ + BinaryExpressionASTNode *binaryE = new_BinaryExpressionASTNode(leftOperand->node, rightOperand->node, operatorType); + operator->node = (ExpressionASTNode*) binaryE; + operator->Stype = S_NONTERMINAL; + return operator; + } } } @@ -128,9 +196,9 @@ ParserResult __Parser_parseExpression(Parser *parser) { Array *reduceStack = Array_alloc(STACK_SIZE); Token *token = NULL; StackItem *bottom = mem_alloc(sizeof(StackItem)); - StackItem *stopReduction = mem_alloc(sizeof(StackItem)); - stopReduction->Stype = S_STOP; + bottom->Stype = S_BOTTOM; + bottom->node = NULL; Array_push(stack, bottom); LexerResult current = Lexer_nextToken(parser->lexer); @@ -138,25 +206,29 @@ ParserResult __Parser_parseExpression(Parser *parser) { while(true){ if(!current.success) return LexerToParserError(current); - int operation = precedence_table[Expr_getPrecTbIndex(Expr_getTopTerminal(stack).token)][Expr_getPrecTbIndex(current.token)]; - + enum PrecTableRelation operation = precedence_table[Expr_getPrecTbIndex(Expr_getTopTerminal(stack).token)][Expr_getPrecTbIndex(current.token)]; + if(((StackItem*)Array_get(stack, stack->size))->Stype = S_NONTERMINAL && stack->size == 1 && operation == R){ + StackItem *finalExpression = Array_get(stack, stack->size); + return ParserSuccess(finalExpression->node); + } //TODO: when to end switch(operation){ case S: StackItem *token = mem_alloc(sizeof(StackItem)); token->Stype = S_TERMINAL; token->token = current.token; - Array_push(stack, stopReduction); + token->node = NULL; + Expr_pushAfterTopTerminal(stack); Array_push(stack, token); current = Lexer_nextToken(parser->lexer); //better check first with peekToken break; case R: while((token = Array_pop(stack))->Stype != S_STOP){ - if(token->kind != S_STOP){ + if(token->Stype != S_STOP){ Array_push(reduceStack, token); } } - //perform reduction - //push result on stack (nonterminal) + // Perform reduction and push result on stack (nonterminal) + Array_push(stack, Expr_performReduction(reduceStack)); break; case E: StackItem *token = mem_alloc(sizeof(StackItem)); @@ -165,7 +237,7 @@ ParserResult __Parser_parseExpression(Parser *parser) { Array_push(stack, token); break; case X: - //call error + //call LexerToParserError break; default: break; From 8b71036d0f5e2aa14433b8dc95aa39de2f6da4d4 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Mon, 30 Oct 2023 22:58:59 +0100 Subject: [PATCH 14/69] Update src/compiler/parser/ASTNodes.c --- src/compiler/parser/ASTNodes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/parser/ASTNodes.c b/src/compiler/parser/ASTNodes.c index f39e9b7..4869bf4 100644 --- a/src/compiler/parser/ASTNodes.c +++ b/src/compiler/parser/ASTNodes.c @@ -141,7 +141,7 @@ UnaryExpressionASTNode* new_UnaryExpressionASTNode( ExpressionASTNode *argument, enum OperatorType operator //bool IsPrefix -){ +) { prepare_node_of(UnaryExpressionASTNode, NODE_UNARY_EXPRESSION) node->argument = argument; node->operator = operator; From a24846e402af8fe12d2c96b0f1f6855873a4446d Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Mon, 30 Oct 2023 22:59:15 +0100 Subject: [PATCH 15/69] Update src/compiler/parser/ASTNodes.c --- src/compiler/parser/ASTNodes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/parser/ASTNodes.c b/src/compiler/parser/ASTNodes.c index 4869bf4..b614f18 100644 --- a/src/compiler/parser/ASTNodes.c +++ b/src/compiler/parser/ASTNodes.c @@ -151,7 +151,7 @@ UnaryExpressionASTNode* new_UnaryExpressionASTNode( LiteralExpressionASTNode* new_LiteralExpressionASTNode( union TokenValue value -){ +) { prepare_node_of(LiteralExpressionASTNode, NODE_LITERAL_EXPRESSION) node->value = value; return node; From 5aaee478fe8af4ab8e595e14a5518dc597ea57b5 Mon Sep 17 00:00:00 2001 From: Korunka1 Date: Fri, 3 Nov 2023 08:07:25 +0100 Subject: [PATCH 16/69] small changes in syntax after code review, first test for expression parsing --- include/compiler/parser/ASTNodes.h | 10 +- include/compiler/parser/Expressions.h | 8 +- src/compiler/parser/ASTNodes.c | 4 +- src/compiler/parser/Expressions.c | 440 ++++++++++++------------ src/compiler/parser/Parser.c | 8 +- src/internal/Array.c | 2 +- test/compiler/parser/Expressions.test.c | 26 ++ 7 files changed, 265 insertions(+), 233 deletions(-) create mode 100644 test/compiler/parser/Expressions.test.c diff --git a/include/compiler/parser/ASTNodes.h b/include/compiler/parser/ASTNodes.h index 1900c40..e6d338d 100644 --- a/include/compiler/parser/ASTNodes.h +++ b/include/compiler/parser/ASTNodes.h @@ -30,9 +30,9 @@ enum OperatorType{ OPERATOR_DEFAULT = 0, OPERATOR_PLUS, OPERATOR_MINUS, - OPERATOR_STAR, - OPERATOR_SLASH, - OPERATOR_EXCLAMATION, + OPERATOR_MUL, + OPERATOR_DIV, + OPERATOR_UNWRAP, OPERATOR_NULL_COALESCING, OPERATOR_EQUAL, OPERATOR_NOT_EQUAL, @@ -142,7 +142,7 @@ typedef struct UnaryExpressionASTNode { enum ASTNodeType _type; ExpressionASTNode *argument; enum OperatorType operator; - //bool IsPrefix; + //bool isPrefix; } UnaryExpressionASTNode; @@ -167,7 +167,7 @@ ArgumentASTNode * new_ArgumentASTNode(ExpressionASTNode *expression, IdentifierA FunctionDeclarationASTNode * new_FunctionDeclarationASTNode(IdentifierASTNode *id, ParameterListASTNode *parameterList, TypeReferenceASTNode *returnType, BlockASTNode *body); FunctionCallASTNode * new_FunctionCallASTNode(IdentifierASTNode *id, Array *arguments); BinaryExpressionASTNode* new_BinaryExpressionASTNode(ExpressionASTNode *left, ExpressionASTNode *right, enum OperatorType operator); -UnaryExpressionASTNode* new_UnaryExpressionASTNode(ExpressionASTNode *argument, enum OperatorType operator /*, bool IsPrefix*/); +UnaryExpressionASTNode* new_UnaryExpressionASTNode(ExpressionASTNode *argument, enum OperatorType operator /*, bool isPrefix*/); LiteralExpressionASTNode* new_LiteralExpressionASTNode(union TokenValue value); // TODO: Add more AST node constructors diff --git a/include/compiler/parser/Expressions.h b/include/compiler/parser/Expressions.h index 724e7be..379a14c 100644 --- a/include/compiler/parser/Expressions.h +++ b/include/compiler/parser/Expressions.h @@ -12,9 +12,9 @@ enum PrecTableRelation { }; enum PrecTableIndex { - I_PLUS_MINUS, - I_MUL_DIV, - I_EXCL_OP, + I_ADDITIVE, + I_MULTIPLICATIVE, + I_UNWRAP_OP, I_NIL_COALES, I_REL_OP, I_ID, @@ -36,6 +36,8 @@ typedef struct StackItem { ExpressionASTNode *node; } StackItem; +StackItem Expr_getTopTerminal(Array *stack); + #endif diff --git a/src/compiler/parser/ASTNodes.c b/src/compiler/parser/ASTNodes.c index f39e9b7..d1c1dbe 100644 --- a/src/compiler/parser/ASTNodes.c +++ b/src/compiler/parser/ASTNodes.c @@ -140,12 +140,12 @@ BinaryExpressionASTNode* new_BinaryExpressionASTNode( UnaryExpressionASTNode* new_UnaryExpressionASTNode( ExpressionASTNode *argument, enum OperatorType operator - //bool IsPrefix + //bool isPrefix ){ prepare_node_of(UnaryExpressionASTNode, NODE_UNARY_EXPRESSION) node->argument = argument; node->operator = operator; - //node->IsPrefix = IsPrefix; + //node->isPrefix = isPrefix; return node; } diff --git a/src/compiler/parser/Expressions.c b/src/compiler/parser/Expressions.c index f7d91c6..930bd05 100644 --- a/src/compiler/parser/Expressions.c +++ b/src/compiler/parser/Expressions.c @@ -12,239 +12,243 @@ #include "allocator/MemoryAllocator.h" #include "compiler/parser/ASTNodes.h" -#define TAB_SIZE 9 +#define TABLE_SIZE 9 #define STACK_SIZE 20 -int precedence_table[TAB_SIZE][TAB_SIZE] = //[stack top terminal][input token] +int precedence_table[TABLE_SIZE][TABLE_SIZE] = //[stack top terminal][input token] { - // +-|*/| ! |??|r |i |( |) |$ - {R, S, S, R, R, S, S, R, R}, // +- - {R, R, S, R, R, S, S, R, R}, // */ - {R, R, R, R, R, S, S, R, R}, // ! - {S, S, S, S, S, S, S, R, R}, // ?? - {S, S, S, R, R, S, S, R, R}, // r (==, !=, <, >, <=, >=) - {R, R, R, R, R, X, X, R, R}, // i - {S, S, S, S, S, S, S, E, X}, // ( - {R, R, R, R, R, X, X, R, R}, // ) - {S, S, S, S, S, S, S, X, X} // $ + // +-|*/| ! |??|r |i |( |) |$ + {R, S, S, R, R, S, S, R, R}, // +- + {R, R, S, R, R, S, S, R, R}, // */ + {R, R, R, R, R, S, S, R, R}, // ! + {S, S, S, S, S, S, S, R, R}, // ?? + {S, S, S, R, R, S, S, R, R}, // r (==, !=, <, >, <=, >=) + {R, R, R, R, R, X, X, R, R}, // i + {S, S, S, S, S, S, S, E, X}, // ( + {R, R, R, R, R, X, X, R, R}, // ) + {S, S, S, S, S, S, S, X, X} // $ }; -int Expr_getPrecTbIndex(Token *token){ - - switch (token->kind){ - case TOKEN_PLUS: - case TOKEN_MINUS: - return I_PLUS_MINUS; - - case TOKEN_STAR: - case TOKEN_SLASH: - return I_MUL_DIV; - - case TOKEN_EXCLAMATION: - return I_EXCL_OP; - - case TOKEN_NULL_COALESCING: - return I_NIL_COALES; - - case TOKEN_EQUALITY: - case TOKEN_NOT_EQUALITY: - case TOKEN_LESS: - case TOKEN_GREATER: - case TOKEN_LESS_EQUAL: - case TOKEN_GREATER_EQUAL: - return I_REL_OP; - - case TOKEN_LEFT_BRACE: - return I_LEFT_BRAC; - - case TOKEN_RIGHT_BRACE: - return I_RIGHT_BRAC; - - case TOKEN_DEFAULT: - if(token->type == TOKEN_IDENTIFIER){ - return I_ID; - } //else error - case TOKEN_STRING: - case TOKEN_INTEGER: - case TOKEN_FLOATING: - case TOKEN_NIL: - return I_ID; - - default: - return I_DOLLAR; - } +int Expr_getPrecTbIndex(Token *token) { + + switch(token->kind) { + case TOKEN_PLUS: + case TOKEN_MINUS: + return I_ADDITIVE; + + case TOKEN_STAR: + case TOKEN_SLASH: + return I_MULTIPLICATIVE; + + case TOKEN_EXCLAMATION: + return I_UNWRAP_OP; + + case TOKEN_NULL_COALESCING: + return I_NIL_COALES; + + case TOKEN_EQUALITY: + case TOKEN_NOT_EQUALITY: + case TOKEN_LESS: + case TOKEN_GREATER: + case TOKEN_LESS_EQUAL: + case TOKEN_GREATER_EQUAL: + return I_REL_OP; + + case TOKEN_LEFT_BRACE: + return I_LEFT_BRAC; + + case TOKEN_RIGHT_BRACE: + return I_RIGHT_BRAC; + + case TOKEN_DEFAULT: + if(token->type == TOKEN_IDENTIFIER) { + return I_ID; + } //else error + return I_DOLLAR; //maybe + case TOKEN_STRING: + case TOKEN_INTEGER: + case TOKEN_FLOATING: + case TOKEN_NIL: + return I_ID; + + default: + return I_DOLLAR; + } } -StackItem Expr_getTopTerminal(Array *stack){ - StackItem *top = NULL; - for(int i = 0; i < stack->size; i++){ - if((top = Array_get(stack, stack->size - i))->Stype == S_TERMINAL){ - return *top; - } - } - //else error +StackItem Expr_getTopTerminal(Array *stack) { + StackItem *top = NULL; + for(size_t i = 0; i < stack->size; i++) { + if((top = Array_get(stack, stack->size - i - 1))->Stype == (S_TERMINAL || S_BOTTOM)) { + return *top; + } + } + //else error + return *top; } -void Expr_pushAfterTopTerminal(Array *stack){ - StackItem *stopReduction = mem_alloc(sizeof(StackItem)); - stopReduction->token = NULL; - stopReduction->Stype = S_STOP; - stopReduction->node = NULL; - for(int i = 0; i < stack->size; i++){ - if (((StackItem *)Array_get(stack, stack->size - i))->Stype == S_TERMINAL) { - Array_push(stack, stopReduction); - } - } +void Expr_pushAfterTopTerminal(Array *stack) { + StackItem *stopReduction = mem_alloc(sizeof(StackItem)); + stopReduction->token = NULL; + stopReduction->Stype = S_STOP; + stopReduction->node = NULL; + for(size_t i = 0; i < stack->size; i++) { + if(((StackItem *)Array_get(stack, stack->size - i))->Stype == S_TERMINAL) { + Array_push(stack, stopReduction); + } + } } -StackItem *Expr_performReduction(Array *stack){ - - // E -> i - if(stack->size == 0){ - StackItem *id = Array_get(stack, 0); - - if(id->token->type = TOKEN_LITERAL){ - LiteralExpressionASTNode *literalE = new_LiteralExpressionASTNode(id->token->value); - id->node = (ExpressionASTNode*) literalE; - id->Stype = S_NONTERMINAL; - return id; - } - if(id->token->type = TOKEN_IDENTIFIER){ - IdentifierASTNode *identifierE = new_IdentifierASTNode(id->token->value.string); //string or identifier? - id->node = (ExpressionASTNode*) identifierE; - id->Stype = S_NONTERMINAL; - return id; - } - } - - // E -> E! - if(stack->size == 1){ - StackItem *operator = Array_get(stack, 1); - StackItem *argument = Array_get(stack, 0); - if(operator->token->type == TOKEN_EXCLAMATION){ - UnaryExpressionASTNode *unaryE = new_UnaryExpressionASTNode(argument->node, OPERATOR_EXCLAMATION); - operator->node = (ExpressionASTNode*) unaryE; - operator->Stype = S_NONTERMINAL; - return operator; - } - } - - // Binary operations and parentheses - if (stack->size == 2){ - StackItem *operator = Array_get(stack, 1); - StackItem *leftOperand = Array_get(stack, 0); - StackItem *rightOperand = Array_get(stack, 2); - - // E -> (E) - if(operator->Stype == S_NONTERMINAL && leftOperand->token->kind == TOKEN_LEFT_PAREN && rightOperand->token->kind == TOKEN_RIGHT_PAREN){ - return operator; - } - - enum OperatorType operatorType = 0; - switch (operator->token->kind){ - case TOKEN_PLUS: - operatorType = OPERATOR_PLUS; - break; - case TOKEN_MINUS: - operatorType = OPERATOR_MINUS; - break; - case TOKEN_STAR: - operatorType = OPERATOR_STAR; - break; - case TOKEN_SLASH: - operatorType = OPERATOR_SLASH; - break; - case TOKEN_EQUALITY: - operatorType = OPERATOR_EQUAL; - break; - case TOKEN_NOT_EQUALITY: - operatorType = OPERATOR_NOT_EQUAL; - break; - case TOKEN_LESS: - operatorType = OPERATOR_LESS; - break; - case TOKEN_GREATER: - operatorType = OPERATOR_GREATER; - break; - case TOKEN_LESS_EQUAL: - operatorType = OPERATOR_LESS_EQUAL; - break; - case TOKEN_GREATER_EQUAL: - operatorType = OPERATOR_GREATER_EQUAL; - break; - case TOKEN_NULL_COALESCING: - operatorType = OPERATOR_NULL_COALESCING; - break; - default: - break; - } - if(operatorType){ - BinaryExpressionASTNode *binaryE = new_BinaryExpressionASTNode(leftOperand->node, rightOperand->node, operatorType); - operator->node = (ExpressionASTNode*) binaryE; - operator->Stype = S_NONTERMINAL; - return operator; - } - - } +StackItem *Expr_performReduction(Array *stack) { + + // E -> i + if(stack->size == 0) { + StackItem *id = Array_get(stack, 0); + + if(id->token->type == TOKEN_LITERAL) { + LiteralExpressionASTNode *literalE = new_LiteralExpressionASTNode(id->token->value); + id->node = (ExpressionASTNode*)literalE; + id->Stype = S_NONTERMINAL; + return id; + } + if(id->token->type == TOKEN_IDENTIFIER) { + IdentifierASTNode *identifierE = new_IdentifierASTNode(id->token->value.string); //string or identifier? + id->node = (ExpressionASTNode*)identifierE; + id->Stype = S_NONTERMINAL; + return id; + } + } + + // E -> E! + if(stack->size == 1) { + StackItem *operator = Array_get(stack, 1); + StackItem *argument = Array_get(stack, 0); + if(operator->token->kind == TOKEN_EXCLAMATION) { + UnaryExpressionASTNode *unaryE = new_UnaryExpressionASTNode(argument->node, OPERATOR_UNWRAP); + operator->node = (ExpressionASTNode*)unaryE; + operator->Stype = S_NONTERMINAL; + return operator; + } + } + + // Binary operations and parentheses + if(stack->size == 2) { + StackItem *operator = Array_get(stack, 1); + StackItem *leftOperand = Array_get(stack, 0); + StackItem *rightOperand = Array_get(stack, 2); + + // E -> (E) + if(operator->Stype == S_NONTERMINAL && leftOperand->token->kind == TOKEN_LEFT_PAREN && rightOperand->token->kind == TOKEN_RIGHT_PAREN) { + return operator; + } + + enum OperatorType operatorType = 0; + switch(operator->token->kind) { + case TOKEN_PLUS: + operatorType = OPERATOR_PLUS; + break; + case TOKEN_MINUS: + operatorType = OPERATOR_MINUS; + break; + case TOKEN_STAR: + operatorType = OPERATOR_MUL; + break; + case TOKEN_SLASH: + operatorType = OPERATOR_DIV; + break; + case TOKEN_EQUALITY: + operatorType = OPERATOR_EQUAL; + break; + case TOKEN_NOT_EQUALITY: + operatorType = OPERATOR_NOT_EQUAL; + break; + case TOKEN_LESS: + operatorType = OPERATOR_LESS; + break; + case TOKEN_GREATER: + operatorType = OPERATOR_GREATER; + break; + case TOKEN_LESS_EQUAL: + operatorType = OPERATOR_LESS_EQUAL; + break; + case TOKEN_GREATER_EQUAL: + operatorType = OPERATOR_GREATER_EQUAL; + break; + case TOKEN_NULL_COALESCING: + operatorType = OPERATOR_NULL_COALESCING; + break; + default: + break; + } + if(operatorType) { + BinaryExpressionASTNode *binaryE = new_BinaryExpressionASTNode(leftOperand->node, rightOperand->node, operatorType); + operator->node = (ExpressionASTNode*)binaryE; + operator->Stype = S_NONTERMINAL; + return operator; + } + + } + return NULL; //maybe } ParserResult __Parser_parseExpression(Parser *parser) { assertf(parser != NULL); - - Array *stack = Array_alloc(STACK_SIZE); - Array *reduceStack = Array_alloc(STACK_SIZE); - Token *token = NULL; - StackItem *bottom = mem_alloc(sizeof(StackItem)); - - bottom->Stype = S_BOTTOM; - bottom->node = NULL; - Array_push(stack, bottom); - - LexerResult current = Lexer_nextToken(parser->lexer); - - while(true){ - - if(!current.success) return LexerToParserError(current); - enum PrecTableRelation operation = precedence_table[Expr_getPrecTbIndex(Expr_getTopTerminal(stack).token)][Expr_getPrecTbIndex(current.token)]; - if(((StackItem*)Array_get(stack, stack->size))->Stype = S_NONTERMINAL && stack->size == 1 && operation == R){ - StackItem *finalExpression = Array_get(stack, stack->size); - return ParserSuccess(finalExpression->node); - } //TODO: when to end - switch(operation){ - case S: - StackItem *token = mem_alloc(sizeof(StackItem)); - token->Stype = S_TERMINAL; - token->token = current.token; - token->node = NULL; - Expr_pushAfterTopTerminal(stack); - Array_push(stack, token); - current = Lexer_nextToken(parser->lexer); //better check first with peekToken - break; - case R: - while((token = Array_pop(stack))->Stype != S_STOP){ - if(token->Stype != S_STOP){ - Array_push(reduceStack, token); - } - } - // Perform reduction and push result on stack (nonterminal) - Array_push(stack, Expr_performReduction(reduceStack)); - break; - case E: - StackItem *token = mem_alloc(sizeof(StackItem)); - token->Stype = S_TERMINAL; - token->token = current.token; - Array_push(stack, token); - break; - case X: - //call LexerToParserError - break; - default: - break; - - - } - } - + + Array *stack = Array_alloc(STACK_SIZE); + Array *reduceStack = Array_alloc(STACK_SIZE); + //Token *token = NULL; + StackItem *bottom = mem_alloc(sizeof(StackItem)); + + bottom->Stype = S_BOTTOM; + bottom->node = NULL; + Array_push(stack, bottom); + + LexerResult current = Lexer_nextToken(parser->lexer); + + while(true) { + + if(!current.success) return LexerToParserError(current); + enum PrecTableRelation operation = precedence_table[Expr_getPrecTbIndex(Expr_getTopTerminal(stack).token)][Expr_getPrecTbIndex(current.token)]; + if(((StackItem*)Array_get(stack, stack->size))->Stype == S_NONTERMINAL && stack->size == 1 && operation == R) { + StackItem *finalExpression = Array_get(stack, stack->size); + return ParserSuccess(finalExpression->node); + } //TODO: when to end + StackItem *shiftToken = mem_alloc(sizeof(StackItem)); + StackItem *reduceToken = mem_alloc(sizeof(StackItem)); + StackItem *equalsToken = mem_alloc(sizeof(StackItem)); + switch(operation) { + case S: + shiftToken->Stype = S_TERMINAL; + shiftToken->token = current.token; + shiftToken->node = NULL; + Expr_pushAfterTopTerminal(stack); + Array_push(stack, shiftToken); + current = Lexer_nextToken(parser->lexer); //better check first with peekToken + break; + case R: + while((reduceToken = Array_pop(stack))->Stype != S_STOP) { + if(reduceToken->Stype != S_STOP) { + Array_push(reduceStack, reduceToken); + } + } + // Perform reduction and push result on stack (nonterminal) + Array_push(stack, Expr_performReduction(reduceStack)); + break; + case E: + equalsToken->Stype = S_TERMINAL; + equalsToken->token = current.token; + Array_push(stack, equalsToken); + break; + case X: + //call LexerToParserError + break; + default: + break; + + + } + } + return ParserNoMatch(); } \ No newline at end of file diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index b2dcf94..f62c3d8 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -118,12 +118,12 @@ ParserResult __Parser_parseStatement(Parser *parser) { return ParserNoMatch(); } -ParserResult __Parser_parseExpression(Parser *parser) { - assertf(parser != NULL); +//ParserResult __Parser_parseExpression(Parser *parser) { +// assertf(parser != NULL); // TODO: Add logic for parsing expressions (using LL(1) parsing) - return ParserNoMatch(); -} +// return ParserNoMatch(); +//} ParserResult __Parser_parseTypeReference(Parser *parser) { // TODO: Add logic to output correct error messages diff --git a/src/internal/Array.c b/src/internal/Array.c index 81806fa..282e103 100644 --- a/src/internal/Array.c +++ b/src/internal/Array.c @@ -25,7 +25,7 @@ void Array_destructor(Array *array) { void Array_push(Array *array, void *value) { if(!array) return; - + printf("here"); // If size exceeds capacity, resize the array to fit more elements if(array->size >= array->capacity) { Array_resize(array, (array->capacity ? array->capacity : 1) << 1); diff --git a/test/compiler/parser/Expressions.test.c b/test/compiler/parser/Expressions.test.c new file mode 100644 index 0000000..be3ab0f --- /dev/null +++ b/test/compiler/parser/Expressions.test.c @@ -0,0 +1,26 @@ +#include "unit.h" +#include "compiler/parser/Expressions.h" +#include "allocator/MemoryAllocator.h" +#include "internal/Array.h" +#include "internal/String.h" +#include "inspector.h" + +#define STACK_SIZE 20 + +DESCRIBE(stack_top_terminal, "Get top terminal from stack"){ + StackItem *topTerminal = mem_alloc(sizeof(StackItem)); + StackItem *dollarTerminal = mem_alloc(sizeof(StackItem)); + dollarTerminal->Stype = S_BOTTOM; + Array *stack = Array_alloc(STACK_SIZE); + int num = 123; + dumpvar(num); + Array_push(stack, dollarTerminal); + dumpvar(stack); + TEST_BEGIN("one item on stack"){ + *topTerminal = Expr_getTopTerminal(stack); + EXPECT_TRUE(topTerminal->Stype == S_BOTTOM); + }TEST_END(); + + + +} \ No newline at end of file From b0cd65f2be02fa7d135a8b399659fa560ec5f569 Mon Sep 17 00:00:00 2001 From: Korunka1 Date: Sat, 4 Nov 2023 15:07:38 +0100 Subject: [PATCH 17/69] new tests for expression parser --- git | 1 + include/compiler/parser/Expressions.h | 1 + src/compiler/parser/Expressions.c | 41 +++++++++------- src/internal/Array.c | 1 - test/compiler/parser/Expressions.test.c | 63 +++++++++++++++++++++++-- 5 files changed, 84 insertions(+), 23 deletions(-) create mode 160000 git diff --git a/git b/git new file mode 160000 index 0000000..bc52045 --- /dev/null +++ b/git @@ -0,0 +1 @@ +Subproject commit bc5204569f7db44d22477485afd52ea410d83743 diff --git a/include/compiler/parser/Expressions.h b/include/compiler/parser/Expressions.h index 379a14c..28c5ebf 100644 --- a/include/compiler/parser/Expressions.h +++ b/include/compiler/parser/Expressions.h @@ -37,6 +37,7 @@ typedef struct StackItem { } StackItem; StackItem Expr_getTopTerminal(Array *stack); +StackItem *Expr_performReduction(Array *stack); #endif diff --git a/src/compiler/parser/Expressions.c b/src/compiler/parser/Expressions.c index 930bd05..f89cd9b 100644 --- a/src/compiler/parser/Expressions.c +++ b/src/compiler/parser/Expressions.c @@ -80,11 +80,10 @@ int Expr_getPrecTbIndex(Token *token) { StackItem Expr_getTopTerminal(Array *stack) { StackItem *top = NULL; for(size_t i = 0; i < stack->size; i++) { - if((top = Array_get(stack, stack->size - i - 1))->Stype == (S_TERMINAL || S_BOTTOM)) { + if((top = Array_get(stack, stack->size - i - 1))->Stype == S_TERMINAL || top->Stype == S_BOTTOM) { return *top; } } - //else error return *top; } @@ -94,8 +93,9 @@ void Expr_pushAfterTopTerminal(Array *stack) { stopReduction->Stype = S_STOP; stopReduction->node = NULL; for(size_t i = 0; i < stack->size; i++) { - if(((StackItem *)Array_get(stack, stack->size - i))->Stype == S_TERMINAL) { + if(((StackItem *)Array_get(stack, stack->size - i - 1))->Stype == S_TERMINAL) { Array_push(stack, stopReduction); + return; } } } @@ -103,37 +103,42 @@ void Expr_pushAfterTopTerminal(Array *stack) { StackItem *Expr_performReduction(Array *stack) { // E -> i - if(stack->size == 0) { + if(stack->size == 1) { StackItem *id = Array_get(stack, 0); - if(id->token->type == TOKEN_LITERAL) { - LiteralExpressionASTNode *literalE = new_LiteralExpressionASTNode(id->token->value); - id->node = (ExpressionASTNode*)literalE; - id->Stype = S_NONTERMINAL; - return id; - } - if(id->token->type == TOKEN_IDENTIFIER) { - IdentifierASTNode *identifierE = new_IdentifierASTNode(id->token->value.string); //string or identifier? - id->node = (ExpressionASTNode*)identifierE; - id->Stype = S_NONTERMINAL; - return id; + if(id->Stype == S_TERMINAL){ + if(id->token->type == TOKEN_LITERAL) { + LiteralExpressionASTNode *literalE = new_LiteralExpressionASTNode(id->token->value); + id->node = (ExpressionASTNode*)literalE; + id->Stype = S_NONTERMINAL; + return id; + } + if(id->token->type == TOKEN_IDENTIFIER) { + IdentifierASTNode *identifierE = new_IdentifierASTNode(id->token->value.string); //string or identifier? + id->node = (ExpressionASTNode*)identifierE; + id->Stype = S_NONTERMINAL; + return id; + } } + //two operators consecutively + else {return NULL;} //TODO: check for null in main } // E -> E! - if(stack->size == 1) { + if(stack->size == 2) { StackItem *operator = Array_get(stack, 1); StackItem *argument = Array_get(stack, 0); - if(operator->token->kind == TOKEN_EXCLAMATION) { + if(operator->token->kind == TOKEN_EXCLAMATION && argument->Stype == S_NONTERMINAL) { UnaryExpressionASTNode *unaryE = new_UnaryExpressionASTNode(argument->node, OPERATOR_UNWRAP); operator->node = (ExpressionASTNode*)unaryE; operator->Stype = S_NONTERMINAL; return operator; } + else {return NULL;} } // Binary operations and parentheses - if(stack->size == 2) { + if(stack->size == 3) { StackItem *operator = Array_get(stack, 1); StackItem *leftOperand = Array_get(stack, 0); StackItem *rightOperand = Array_get(stack, 2); diff --git a/src/internal/Array.c b/src/internal/Array.c index 282e103..c897a8e 100644 --- a/src/internal/Array.c +++ b/src/internal/Array.c @@ -25,7 +25,6 @@ void Array_destructor(Array *array) { void Array_push(Array *array, void *value) { if(!array) return; - printf("here"); // If size exceeds capacity, resize the array to fit more elements if(array->size >= array->capacity) { Array_resize(array, (array->capacity ? array->capacity : 1) << 1); diff --git a/test/compiler/parser/Expressions.test.c b/test/compiler/parser/Expressions.test.c index be3ab0f..38ac840 100644 --- a/test/compiler/parser/Expressions.test.c +++ b/test/compiler/parser/Expressions.test.c @@ -12,15 +12,70 @@ DESCRIBE(stack_top_terminal, "Get top terminal from stack"){ StackItem *dollarTerminal = mem_alloc(sizeof(StackItem)); dollarTerminal->Stype = S_BOTTOM; Array *stack = Array_alloc(STACK_SIZE); - int num = 123; - dumpvar(num); + Array_push(stack, dollarTerminal); - dumpvar(stack); - TEST_BEGIN("one item on stack"){ + + + TEST_BEGIN("one terminal on stack"){ *topTerminal = Expr_getTopTerminal(stack); EXPECT_TRUE(topTerminal->Stype == S_BOTTOM); }TEST_END(); + TEST_BEGIN("two terminals on stack"){ + StackItem *secondTerminal = mem_alloc(sizeof(StackItem)); + secondTerminal->Stype = S_TERMINAL; + Array_push(stack, secondTerminal); + *topTerminal = Expr_getTopTerminal(stack); + EXPECT_TRUE(topTerminal->Stype == S_TERMINAL); + }TEST_END(); + + + TEST_BEGIN("one nonterminal, two terminals on stack"){ + StackItem *secondTerminal = mem_alloc(sizeof(StackItem)); + secondTerminal->Stype = S_TERMINAL; + StackItem *nonTerminal = mem_alloc(sizeof(StackItem)); + nonTerminal->Stype = S_NONTERMINAL; + Array_push(stack, secondTerminal); + Array_push(stack, nonTerminal); + *topTerminal = Expr_getTopTerminal(stack); + EXPECT_TRUE(topTerminal->Stype == S_TERMINAL); + }TEST_END(); + +} +DESCRIBE(reduction, "performs reductions according to grammar"){ + StackItem *terminal = mem_alloc(sizeof(StackItem)); + StackItem *result = mem_alloc(sizeof(StackItem)); + Array *reductionStack = Array_alloc(STACK_SIZE); + TEST_BEGIN("reducton of literal"){ + Token *intLiteral = mem_alloc(sizeof(Token)); + intLiteral->type = TOKEN_LITERAL; + intLiteral->value.floating = 0.9; + terminal->token = intLiteral; + terminal->Stype = S_TERMINAL; + terminal->node = NULL; + Array_push(reductionStack, terminal); + result = Expr_performReduction(reductionStack); + EXPECT_TRUE(result->node->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(result->Stype = S_NONTERMINAL); + + }TEST_END(); + + //TODO: reduction of identifier + + TEST_BEGIN("reducton of 'E!' "){ + Token *intLiteral = mem_alloc(sizeof(Token)); + intLiteral->type = TOKEN_LITERAL; + intLiteral->value.floating = 0.9; + terminal->token = intLiteral; + terminal->Stype = S_TERMINAL; + terminal->node = NULL; + Array_push(reductionStack, terminal); + result = Expr_performReduction(reductionStack); + EXPECT_TRUE(result->node->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(result->Stype = S_NONTERMINAL); + + }TEST_END(); + } \ No newline at end of file From 5d150df45cf503abb681c73bce6a9a27856f6695 Mon Sep 17 00:00:00 2001 From: Korunka1 Date: Sun, 5 Nov 2023 21:50:19 +0100 Subject: [PATCH 18/69] some new errors --- include/compiler/parser/Expressions.h | 1 + src/compiler/parser/Expressions.c | 41 +++++---- test/compiler/parser/Expressions.test.c | 113 +++++++++++++++++++++--- 3 files changed, 130 insertions(+), 25 deletions(-) diff --git a/include/compiler/parser/Expressions.h b/include/compiler/parser/Expressions.h index 28c5ebf..c55c853 100644 --- a/include/compiler/parser/Expressions.h +++ b/include/compiler/parser/Expressions.h @@ -37,6 +37,7 @@ typedef struct StackItem { } StackItem; StackItem Expr_getTopTerminal(Array *stack); +void Expr_pushAfterTopTerminal(Array *stack); StackItem *Expr_performReduction(Array *stack); #endif diff --git a/src/compiler/parser/Expressions.c b/src/compiler/parser/Expressions.c index f89cd9b..486826a 100644 --- a/src/compiler/parser/Expressions.c +++ b/src/compiler/parser/Expressions.c @@ -17,7 +17,7 @@ int precedence_table[TABLE_SIZE][TABLE_SIZE] = //[stack top terminal][input token] { - // +-|*/| ! |??|r |i |( |) |$ + // +-|*/| ! |??|r |i |( |) |$ {R, S, S, R, R, S, S, R, R}, // +- {R, R, S, R, R, S, S, R, R}, // */ {R, R, R, R, R, S, S, R, R}, // ! @@ -93,8 +93,8 @@ void Expr_pushAfterTopTerminal(Array *stack) { stopReduction->Stype = S_STOP; stopReduction->node = NULL; for(size_t i = 0; i < stack->size; i++) { - if(((StackItem *)Array_get(stack, stack->size - i - 1))->Stype == S_TERMINAL) { - Array_push(stack, stopReduction); + if (((StackItem *)Array_get(stack, stack->size - i - 1))->Stype == S_TERMINAL || ((StackItem *)Array_get(stack, stack->size - i - 1))->Stype == S_BOTTOM) { + Array_insert(stack, (int)stack->size - i, stopReduction); return; } } @@ -104,7 +104,8 @@ StackItem *Expr_performReduction(Array *stack) { // E -> i if(stack->size == 1) { - StackItem *id = Array_get(stack, 0); + //StackItem *id = Array_get(stack, 0); + StackItem *id = Array_pop(stack); if(id->Stype == S_TERMINAL){ if(id->token->type == TOKEN_LITERAL) { @@ -126,8 +127,11 @@ StackItem *Expr_performReduction(Array *stack) { // E -> E! if(stack->size == 2) { - StackItem *operator = Array_get(stack, 1); - StackItem *argument = Array_get(stack, 0); + //StackItem *operator = Array_get(stack, 1); + //StackItem *argument = Array_get(stack, 0); + StackItem *operator = Array_pop(stack); + StackItem *argument = Array_pop(stack); + if(operator->token->kind == TOKEN_EXCLAMATION && argument->Stype == S_NONTERMINAL) { UnaryExpressionASTNode *unaryE = new_UnaryExpressionASTNode(argument->node, OPERATOR_UNWRAP); operator->node = (ExpressionASTNode*)unaryE; @@ -139,9 +143,12 @@ StackItem *Expr_performReduction(Array *stack) { // Binary operations and parentheses if(stack->size == 3) { - StackItem *operator = Array_get(stack, 1); - StackItem *leftOperand = Array_get(stack, 0); - StackItem *rightOperand = Array_get(stack, 2); + //StackItem *operator = Array_get(stack, 1); + //StackItem *leftOperand = Array_get(stack, 0); + //StackItem *rightOperand = Array_get(stack, 2); + StackItem *rightOperand = Array_pop(stack); + StackItem *operator = Array_pop(stack); + StackItem *leftOperand = Array_pop(stack); // E -> (E) if(operator->Stype == S_NONTERMINAL && leftOperand->token->kind == TOKEN_LEFT_PAREN && rightOperand->token->kind == TOKEN_RIGHT_PAREN) { @@ -149,6 +156,7 @@ StackItem *Expr_performReduction(Array *stack) { } enum OperatorType operatorType = 0; + if(leftOperand->Stype == S_NONTERMINAL && rightOperand->Stype == S_NONTERMINAL) switch(operator->token->kind) { case TOKEN_PLUS: operatorType = OPERATOR_PLUS; @@ -192,9 +200,8 @@ StackItem *Expr_performReduction(Array *stack) { operator->Stype = S_NONTERMINAL; return operator; } - } - return NULL; //maybe + return NULL; } ParserResult __Parser_parseExpression(Parser *parser) { @@ -215,7 +222,7 @@ ParserResult __Parser_parseExpression(Parser *parser) { if(!current.success) return LexerToParserError(current); enum PrecTableRelation operation = precedence_table[Expr_getPrecTbIndex(Expr_getTopTerminal(stack).token)][Expr_getPrecTbIndex(current.token)]; - if(((StackItem*)Array_get(stack, stack->size))->Stype == S_NONTERMINAL && stack->size == 1 && operation == R) { + if(((StackItem*)Array_get(stack, stack->size - 1))->Stype == S_NONTERMINAL && stack->size == 2 && operation == X) { StackItem *finalExpression = Array_get(stack, stack->size); return ParserSuccess(finalExpression->node); } //TODO: when to end @@ -238,7 +245,12 @@ ParserResult __Parser_parseExpression(Parser *parser) { } } // Perform reduction and push result on stack (nonterminal) - Array_push(stack, Expr_performReduction(reduceStack)); + if(reduceToken == Expr_performReduction(reduceStack)){ + Array_push(stack, reduceToken); + } + else{ + return ParserError(String_fromFormat("TODO"), NULL); + } break; case E: equalsToken->Stype = S_TERMINAL; @@ -246,8 +258,7 @@ ParserResult __Parser_parseExpression(Parser *parser) { Array_push(stack, equalsToken); break; case X: - //call LexerToParserError - break; + return ParserError(String_fromFormat("TODO"), NULL); default: break; diff --git a/test/compiler/parser/Expressions.test.c b/test/compiler/parser/Expressions.test.c index 38ac840..4e214fc 100644 --- a/test/compiler/parser/Expressions.test.c +++ b/test/compiler/parser/Expressions.test.c @@ -4,6 +4,8 @@ #include "internal/Array.h" #include "internal/String.h" #include "inspector.h" +#include "compiler/lexer/Lexer.h" +#include #define STACK_SIZE 20 @@ -31,11 +33,8 @@ DESCRIBE(stack_top_terminal, "Get top terminal from stack"){ TEST_BEGIN("one nonterminal, two terminals on stack"){ - StackItem *secondTerminal = mem_alloc(sizeof(StackItem)); - secondTerminal->Stype = S_TERMINAL; StackItem *nonTerminal = mem_alloc(sizeof(StackItem)); nonTerminal->Stype = S_NONTERMINAL; - Array_push(stack, secondTerminal); Array_push(stack, nonTerminal); *topTerminal = Expr_getTopTerminal(stack); EXPECT_TRUE(topTerminal->Stype == S_TERMINAL); @@ -43,6 +42,44 @@ DESCRIBE(stack_top_terminal, "Get top terminal from stack"){ } +DESCRIBE(stack_push_after_top_t, "Push after top terminal on stack"){ + StackItem *stopItem = mem_alloc(sizeof(StackItem)); + StackItem *dollarTerminal = mem_alloc(sizeof(StackItem)); + dollarTerminal->Stype = S_BOTTOM; + Array *stack = Array_alloc(STACK_SIZE); + + Array_push(stack, dollarTerminal); + + TEST_BEGIN("push after first terminal on stack"){ + Expr_pushAfterTopTerminal(stack); + stopItem = (StackItem*)Array_get(stack, 1); + EXPECT_TRUE(stopItem->Stype == S_STOP); + Array_pop(stack); + }TEST_END(); + + TEST_BEGIN("push after second terminal on stack"){ + stopItem->Stype = S_NONTERMINAL; + StackItem *secondTerminal = mem_alloc(sizeof(StackItem)); + secondTerminal->Stype = S_TERMINAL; + Array_push(stack, secondTerminal); + Expr_pushAfterTopTerminal(stack); + stopItem = (StackItem*)Array_get(stack, 2); + EXPECT_TRUE(stopItem->Stype == S_STOP); + Array_pop(stack); + }TEST_END(); + + TEST_BEGIN("overcome nonterminal and push after top terminal"){ + StackItem *nonTerminal = mem_alloc(sizeof(StackItem)); + nonTerminal->Stype = S_NONTERMINAL; + Array_push(stack, nonTerminal); + Expr_pushAfterTopTerminal(stack); + stopItem = (StackItem*)Array_get(stack, 2); + EXPECT_TRUE(stopItem->Stype == S_STOP); + }TEST_END(); + + +} + DESCRIBE(reduction, "performs reductions according to grammar"){ StackItem *terminal = mem_alloc(sizeof(StackItem)); StackItem *result = mem_alloc(sizeof(StackItem)); @@ -64,18 +101,74 @@ DESCRIBE(reduction, "performs reductions according to grammar"){ //TODO: reduction of identifier - TEST_BEGIN("reducton of 'E!' "){ - Token *intLiteral = mem_alloc(sizeof(Token)); - intLiteral->type = TOKEN_LITERAL; - intLiteral->value.floating = 0.9; - terminal->token = intLiteral; - terminal->Stype = S_TERMINAL; - terminal->node = NULL; + TEST_BEGIN("reducton of E! "){ + StackItem *operatorExc = mem_alloc(sizeof(StackItem)); + Token *operatorUnwrap = mem_alloc(sizeof(Token)); + operatorUnwrap->type = TOKEN_OPERATOR; + operatorUnwrap->kind = TOKEN_EXCLAMATION; + operatorExc->Stype = S_TERMINAL; + operatorExc->token = operatorUnwrap; + + Array_push(reductionStack, terminal); + Array_push(reductionStack, operatorExc); + + result = Expr_performReduction(reductionStack); + EXPECT_TRUE(result->node->_type == NODE_UNARY_EXPRESSION); + EXPECT_TRUE(result->Stype = S_NONTERMINAL); + + }TEST_END(); + + TEST_BEGIN("reduction of E + E "){ + StackItem *operator = mem_alloc(sizeof(StackItem)); + Token *operatorPlus = mem_alloc(sizeof(Token)); + operatorPlus->type = TOKEN_OPERATOR; + operatorPlus->kind = TOKEN_PLUS; + operator->Stype = S_TERMINAL; + operator->token = operatorPlus; + + Array_push(reductionStack, terminal); + Array_push(reductionStack, operator); + Array_push(reductionStack, terminal); + + result = Expr_performReduction(reductionStack); + EXPECT_TRUE(result->node->_type == NODE_BINARY_EXPRESSION); + EXPECT_TRUE(result->Stype = S_NONTERMINAL); + + }TEST_END(); + + TEST_BEGIN("reduction of (E) "){ + StackItem *opLeftParen = mem_alloc(sizeof(StackItem)); + Token *leftparen = mem_alloc(sizeof(Token)); + leftparen->type = TOKEN_OPERATOR; + leftparen->kind = TOKEN_LEFT_PAREN; + opLeftParen->Stype = S_TERMINAL; + opLeftParen->token = leftparen; + + StackItem *opRightParen = mem_alloc(sizeof(StackItem)); + Token *rigtparen = mem_alloc(sizeof(Token)); + rigtparen->type = TOKEN_OPERATOR; + rigtparen->kind = TOKEN_RIGHT_PAREN; + opRightParen->Stype = S_TERMINAL; + opRightParen->token = rigtparen; + + Array_push(reductionStack, opLeftParen); Array_push(reductionStack, terminal); + Array_push(reductionStack, opRightParen); result = Expr_performReduction(reductionStack); EXPECT_TRUE(result->node->_type == NODE_LITERAL_EXPRESSION); EXPECT_TRUE(result->Stype = S_NONTERMINAL); }TEST_END(); +} + +DESCRIBE(parse_expression, "parse expression"){ + Lexer lexer; + Lexer_constructor(&lexer); + + LexerResult result; + + TEST_BEGIN("parse single character expression"){ + result = Lexer_tokenize(&lexer, "a "); + }TEST_END(); } \ No newline at end of file From 7337723e653443aa17ac717af584ba0a58e83706 Mon Sep 17 00:00:00 2001 From: Korunka1 Date: Mon, 6 Nov 2023 21:43:29 +0100 Subject: [PATCH 19/69] remove test file --- git | 1 - test/compiler/parser/Expressions.test.c | 174 ------------------------ 2 files changed, 175 deletions(-) delete mode 160000 git delete mode 100644 test/compiler/parser/Expressions.test.c diff --git a/git b/git deleted file mode 160000 index bc52045..0000000 --- a/git +++ /dev/null @@ -1 +0,0 @@ -Subproject commit bc5204569f7db44d22477485afd52ea410d83743 diff --git a/test/compiler/parser/Expressions.test.c b/test/compiler/parser/Expressions.test.c deleted file mode 100644 index 4e214fc..0000000 --- a/test/compiler/parser/Expressions.test.c +++ /dev/null @@ -1,174 +0,0 @@ -#include "unit.h" -#include "compiler/parser/Expressions.h" -#include "allocator/MemoryAllocator.h" -#include "internal/Array.h" -#include "internal/String.h" -#include "inspector.h" -#include "compiler/lexer/Lexer.h" -#include - -#define STACK_SIZE 20 - -DESCRIBE(stack_top_terminal, "Get top terminal from stack"){ - StackItem *topTerminal = mem_alloc(sizeof(StackItem)); - StackItem *dollarTerminal = mem_alloc(sizeof(StackItem)); - dollarTerminal->Stype = S_BOTTOM; - Array *stack = Array_alloc(STACK_SIZE); - - Array_push(stack, dollarTerminal); - - - TEST_BEGIN("one terminal on stack"){ - *topTerminal = Expr_getTopTerminal(stack); - EXPECT_TRUE(topTerminal->Stype == S_BOTTOM); - }TEST_END(); - - TEST_BEGIN("two terminals on stack"){ - StackItem *secondTerminal = mem_alloc(sizeof(StackItem)); - secondTerminal->Stype = S_TERMINAL; - Array_push(stack, secondTerminal); - *topTerminal = Expr_getTopTerminal(stack); - EXPECT_TRUE(topTerminal->Stype == S_TERMINAL); - }TEST_END(); - - - TEST_BEGIN("one nonterminal, two terminals on stack"){ - StackItem *nonTerminal = mem_alloc(sizeof(StackItem)); - nonTerminal->Stype = S_NONTERMINAL; - Array_push(stack, nonTerminal); - *topTerminal = Expr_getTopTerminal(stack); - EXPECT_TRUE(topTerminal->Stype == S_TERMINAL); - }TEST_END(); - -} - -DESCRIBE(stack_push_after_top_t, "Push after top terminal on stack"){ - StackItem *stopItem = mem_alloc(sizeof(StackItem)); - StackItem *dollarTerminal = mem_alloc(sizeof(StackItem)); - dollarTerminal->Stype = S_BOTTOM; - Array *stack = Array_alloc(STACK_SIZE); - - Array_push(stack, dollarTerminal); - - TEST_BEGIN("push after first terminal on stack"){ - Expr_pushAfterTopTerminal(stack); - stopItem = (StackItem*)Array_get(stack, 1); - EXPECT_TRUE(stopItem->Stype == S_STOP); - Array_pop(stack); - }TEST_END(); - - TEST_BEGIN("push after second terminal on stack"){ - stopItem->Stype = S_NONTERMINAL; - StackItem *secondTerminal = mem_alloc(sizeof(StackItem)); - secondTerminal->Stype = S_TERMINAL; - Array_push(stack, secondTerminal); - Expr_pushAfterTopTerminal(stack); - stopItem = (StackItem*)Array_get(stack, 2); - EXPECT_TRUE(stopItem->Stype == S_STOP); - Array_pop(stack); - }TEST_END(); - - TEST_BEGIN("overcome nonterminal and push after top terminal"){ - StackItem *nonTerminal = mem_alloc(sizeof(StackItem)); - nonTerminal->Stype = S_NONTERMINAL; - Array_push(stack, nonTerminal); - Expr_pushAfterTopTerminal(stack); - stopItem = (StackItem*)Array_get(stack, 2); - EXPECT_TRUE(stopItem->Stype == S_STOP); - }TEST_END(); - - -} - -DESCRIBE(reduction, "performs reductions according to grammar"){ - StackItem *terminal = mem_alloc(sizeof(StackItem)); - StackItem *result = mem_alloc(sizeof(StackItem)); - Array *reductionStack = Array_alloc(STACK_SIZE); - - TEST_BEGIN("reducton of literal"){ - Token *intLiteral = mem_alloc(sizeof(Token)); - intLiteral->type = TOKEN_LITERAL; - intLiteral->value.floating = 0.9; - terminal->token = intLiteral; - terminal->Stype = S_TERMINAL; - terminal->node = NULL; - Array_push(reductionStack, terminal); - result = Expr_performReduction(reductionStack); - EXPECT_TRUE(result->node->_type == NODE_LITERAL_EXPRESSION); - EXPECT_TRUE(result->Stype = S_NONTERMINAL); - - }TEST_END(); - - //TODO: reduction of identifier - - TEST_BEGIN("reducton of E! "){ - StackItem *operatorExc = mem_alloc(sizeof(StackItem)); - Token *operatorUnwrap = mem_alloc(sizeof(Token)); - operatorUnwrap->type = TOKEN_OPERATOR; - operatorUnwrap->kind = TOKEN_EXCLAMATION; - operatorExc->Stype = S_TERMINAL; - operatorExc->token = operatorUnwrap; - - Array_push(reductionStack, terminal); - Array_push(reductionStack, operatorExc); - - result = Expr_performReduction(reductionStack); - EXPECT_TRUE(result->node->_type == NODE_UNARY_EXPRESSION); - EXPECT_TRUE(result->Stype = S_NONTERMINAL); - - }TEST_END(); - - TEST_BEGIN("reduction of E + E "){ - StackItem *operator = mem_alloc(sizeof(StackItem)); - Token *operatorPlus = mem_alloc(sizeof(Token)); - operatorPlus->type = TOKEN_OPERATOR; - operatorPlus->kind = TOKEN_PLUS; - operator->Stype = S_TERMINAL; - operator->token = operatorPlus; - - Array_push(reductionStack, terminal); - Array_push(reductionStack, operator); - Array_push(reductionStack, terminal); - - result = Expr_performReduction(reductionStack); - EXPECT_TRUE(result->node->_type == NODE_BINARY_EXPRESSION); - EXPECT_TRUE(result->Stype = S_NONTERMINAL); - - }TEST_END(); - - TEST_BEGIN("reduction of (E) "){ - StackItem *opLeftParen = mem_alloc(sizeof(StackItem)); - Token *leftparen = mem_alloc(sizeof(Token)); - leftparen->type = TOKEN_OPERATOR; - leftparen->kind = TOKEN_LEFT_PAREN; - opLeftParen->Stype = S_TERMINAL; - opLeftParen->token = leftparen; - - StackItem *opRightParen = mem_alloc(sizeof(StackItem)); - Token *rigtparen = mem_alloc(sizeof(Token)); - rigtparen->type = TOKEN_OPERATOR; - rigtparen->kind = TOKEN_RIGHT_PAREN; - opRightParen->Stype = S_TERMINAL; - opRightParen->token = rigtparen; - - Array_push(reductionStack, opLeftParen); - Array_push(reductionStack, terminal); - Array_push(reductionStack, opRightParen); - result = Expr_performReduction(reductionStack); - EXPECT_TRUE(result->node->_type == NODE_LITERAL_EXPRESSION); - EXPECT_TRUE(result->Stype = S_NONTERMINAL); - - }TEST_END(); - -} - -DESCRIBE(parse_expression, "parse expression"){ - Lexer lexer; - Lexer_constructor(&lexer); - - LexerResult result; - - TEST_BEGIN("parse single character expression"){ - result = Lexer_tokenize(&lexer, "a "); - }TEST_END(); -} \ No newline at end of file From a22f7ef22c37ade6d83bff6c56031d79f4bdf17a Mon Sep 17 00:00:00 2001 From: Korunka1 Date: Mon, 6 Nov 2023 21:49:47 +0100 Subject: [PATCH 20/69] small syntax changes --- src/compiler/parser/Expressions.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compiler/parser/Expressions.c b/src/compiler/parser/Expressions.c index 486826a..7d98a72 100644 --- a/src/compiler/parser/Expressions.c +++ b/src/compiler/parser/Expressions.c @@ -204,6 +204,8 @@ StackItem *Expr_performReduction(Array *stack) { return NULL; } +// TODO: function to determine whether to end or not + ParserResult __Parser_parseExpression(Parser *parser) { assertf(parser != NULL); From 7769fd7bdfecfe17e68a5bb23e7822c889150f4f Mon Sep 17 00:00:00 2001 From: Korunka1 Date: Fri, 10 Nov 2023 15:41:37 +0100 Subject: [PATCH 21/69] untested expression parser, added error handling and memory managing --- src/compiler/parser/Expressions.c | 187 ++++++++++++++++++------------ 1 file changed, 110 insertions(+), 77 deletions(-) diff --git a/src/compiler/parser/Expressions.c b/src/compiler/parser/Expressions.c index 7d98a72..b52c9f5 100644 --- a/src/compiler/parser/Expressions.c +++ b/src/compiler/parser/Expressions.c @@ -17,16 +17,16 @@ int precedence_table[TABLE_SIZE][TABLE_SIZE] = //[stack top terminal][input token] { - // +-|*/| ! |??|r |i |( |) |$ + // +-|*/| ! |??|r |i |( |) |$ {R, S, S, R, R, S, S, R, R}, // +- {R, R, S, R, R, S, S, R, R}, // */ - {R, R, R, R, R, S, S, R, R}, // ! + {R, R, X, R, R, X, X, R, R}, // ! {S, S, S, S, S, S, S, R, R}, // ?? - {S, S, S, R, R, S, S, R, R}, // r (==, !=, <, >, <=, >=) + {S, S, S, R, X, S, S, R, R}, // r (==, !=, <, >, <=, >=) {R, R, R, R, R, X, X, R, R}, // i {S, S, S, S, S, S, S, E, X}, // ( {R, R, R, R, R, X, X, R, R}, // ) - {S, S, S, S, S, S, S, X, X} // $ + {S, S, S, S, S, S, S, X, X} // $ }; @@ -93,7 +93,7 @@ void Expr_pushAfterTopTerminal(Array *stack) { stopReduction->Stype = S_STOP; stopReduction->node = NULL; for(size_t i = 0; i < stack->size; i++) { - if (((StackItem *)Array_get(stack, stack->size - i - 1))->Stype == S_TERMINAL || ((StackItem *)Array_get(stack, stack->size - i - 1))->Stype == S_BOTTOM) { + if(((StackItem *)Array_get(stack, stack->size - i - 1))->Stype == S_TERMINAL || ((StackItem *)Array_get(stack, stack->size - i - 1))->Stype == S_BOTTOM) { Array_insert(stack, (int)stack->size - i, stopReduction); return; } @@ -107,7 +107,7 @@ StackItem *Expr_performReduction(Array *stack) { //StackItem *id = Array_get(stack, 0); StackItem *id = Array_pop(stack); - if(id->Stype == S_TERMINAL){ + if(id->Stype == S_TERMINAL) { if(id->token->type == TOKEN_LITERAL) { LiteralExpressionASTNode *literalE = new_LiteralExpressionASTNode(id->token->value); id->node = (ExpressionASTNode*)literalE; @@ -122,23 +122,27 @@ StackItem *Expr_performReduction(Array *stack) { } } //two operators consecutively - else {return NULL;} //TODO: check for null in main + else { + return NULL; + } //TODO: check for null in main } // E -> E! if(stack->size == 2) { //StackItem *operator = Array_get(stack, 1); //StackItem *argument = Array_get(stack, 0); - StackItem *operator = Array_pop(stack); StackItem *argument = Array_pop(stack); + StackItem *operator = Array_pop(stack); if(operator->token->kind == TOKEN_EXCLAMATION && argument->Stype == S_NONTERMINAL) { UnaryExpressionASTNode *unaryE = new_UnaryExpressionASTNode(argument->node, OPERATOR_UNWRAP); operator->node = (ExpressionASTNode*)unaryE; operator->Stype = S_NONTERMINAL; + mem_free(argument); return operator; + } else { + return NULL; } - else {return NULL;} } // Binary operations and parentheses @@ -146,71 +150,90 @@ StackItem *Expr_performReduction(Array *stack) { //StackItem *operator = Array_get(stack, 1); //StackItem *leftOperand = Array_get(stack, 0); //StackItem *rightOperand = Array_get(stack, 2); - StackItem *rightOperand = Array_pop(stack); - StackItem *operator = Array_pop(stack); StackItem *leftOperand = Array_pop(stack); + StackItem *operator = Array_pop(stack); + StackItem *rightOperand = Array_pop(stack); // E -> (E) if(operator->Stype == S_NONTERMINAL && leftOperand->token->kind == TOKEN_LEFT_PAREN && rightOperand->token->kind == TOKEN_RIGHT_PAREN) { + mem_free(leftOperand); + mem_free(rightOperand); return operator; } enum OperatorType operatorType = 0; if(leftOperand->Stype == S_NONTERMINAL && rightOperand->Stype == S_NONTERMINAL) - switch(operator->token->kind) { - case TOKEN_PLUS: - operatorType = OPERATOR_PLUS; - break; - case TOKEN_MINUS: - operatorType = OPERATOR_MINUS; - break; - case TOKEN_STAR: - operatorType = OPERATOR_MUL; - break; - case TOKEN_SLASH: - operatorType = OPERATOR_DIV; - break; - case TOKEN_EQUALITY: - operatorType = OPERATOR_EQUAL; - break; - case TOKEN_NOT_EQUALITY: - operatorType = OPERATOR_NOT_EQUAL; - break; - case TOKEN_LESS: - operatorType = OPERATOR_LESS; - break; - case TOKEN_GREATER: - operatorType = OPERATOR_GREATER; - break; - case TOKEN_LESS_EQUAL: - operatorType = OPERATOR_LESS_EQUAL; - break; - case TOKEN_GREATER_EQUAL: - operatorType = OPERATOR_GREATER_EQUAL; - break; - case TOKEN_NULL_COALESCING: - operatorType = OPERATOR_NULL_COALESCING; - break; - default: - break; - } + switch(operator->token->kind) { + case TOKEN_PLUS: + operatorType = OPERATOR_PLUS; + break; + case TOKEN_MINUS: + operatorType = OPERATOR_MINUS; + break; + case TOKEN_STAR: + operatorType = OPERATOR_MUL; + break; + case TOKEN_SLASH: + operatorType = OPERATOR_DIV; + break; + case TOKEN_EQUALITY: + operatorType = OPERATOR_EQUAL; + break; + case TOKEN_NOT_EQUALITY: + operatorType = OPERATOR_NOT_EQUAL; + break; + case TOKEN_LESS: + operatorType = OPERATOR_LESS; + break; + case TOKEN_GREATER: + operatorType = OPERATOR_GREATER; + break; + case TOKEN_LESS_EQUAL: + operatorType = OPERATOR_LESS_EQUAL; + break; + case TOKEN_GREATER_EQUAL: + operatorType = OPERATOR_GREATER_EQUAL; + break; + case TOKEN_NULL_COALESCING: + operatorType = OPERATOR_NULL_COALESCING; + break; + default: + break; + } if(operatorType) { BinaryExpressionASTNode *binaryE = new_BinaryExpressionASTNode(leftOperand->node, rightOperand->node, operatorType); operator->node = (ExpressionASTNode*)binaryE; operator->Stype = S_NONTERMINAL; + mem_free(leftOperand); + mem_free(rightOperand); return operator; } } - return NULL; + return NULL; } -// TODO: function to determine whether to end or not +bool Expr_Reduce(Array *stack, StackItem *currentToken) { + Array *reduceStack = Array_alloc(STACK_SIZE); + + while((currentToken = Array_pop(stack))->Stype != S_STOP) { + if(currentToken->Stype != S_STOP) { + Array_push(reduceStack, currentToken); + } + } + // Perform reduction and push result on stack (nonterminal) + if(currentToken == Expr_performReduction(reduceStack)) { + Array_push(stack, currentToken); + return true; + } else { + return false; + } +} ParserResult __Parser_parseExpression(Parser *parser) { assertf(parser != NULL); Array *stack = Array_alloc(STACK_SIZE); - Array *reduceStack = Array_alloc(STACK_SIZE); + //Array *reduceStack = Array_alloc(STACK_SIZE); //Token *token = NULL; StackItem *bottom = mem_alloc(sizeof(StackItem)); @@ -218,52 +241,62 @@ ParserResult __Parser_parseExpression(Parser *parser) { bottom->node = NULL; Array_push(stack, bottom); - LexerResult current = Lexer_nextToken(parser->lexer); + int offset = 0; + LexerResult current = Lexer_peekToken(parser->lexer, offset); + LexerResult removeFromTokenStream; while(true) { if(!current.success) return LexerToParserError(current); enum PrecTableRelation operation = precedence_table[Expr_getPrecTbIndex(Expr_getTopTerminal(stack).token)][Expr_getPrecTbIndex(current.token)]; + if(((StackItem*)Array_get(stack, stack->size - 1))->Stype == S_NONTERMINAL && stack->size == 2 && operation == X) { - StackItem *finalExpression = Array_get(stack, stack->size); + StackItem *finalExpression = Array_pop(stack); + bottom = Array_pop(stack); + mem_free(bottom); + Array_free(stack); + //Array_free(reduceStack); return ParserSuccess(finalExpression->node); } //TODO: when to end - StackItem *shiftToken = mem_alloc(sizeof(StackItem)); - StackItem *reduceToken = mem_alloc(sizeof(StackItem)); - StackItem *equalsToken = mem_alloc(sizeof(StackItem)); + + StackItem *currentToken = mem_alloc(sizeof(StackItem)); + switch(operation) { case S: - shiftToken->Stype = S_TERMINAL; - shiftToken->token = current.token; - shiftToken->node = NULL; + currentToken->Stype = S_TERMINAL; + currentToken->token = current.token; + currentToken->node = NULL; Expr_pushAfterTopTerminal(stack); - Array_push(stack, shiftToken); - current = Lexer_nextToken(parser->lexer); //better check first with peekToken + Array_push(stack, currentToken); + + if(!(removeFromTokenStream = Lexer_nextToken(parser->lexer)).success) return LexerToParserError(current); + current = Lexer_peekToken(parser->lexer, ++offset); break; + case R: - while((reduceToken = Array_pop(stack))->Stype != S_STOP) { - if(reduceToken->Stype != S_STOP) { - Array_push(reduceStack, reduceToken); - } - } - // Perform reduction and push result on stack (nonterminal) - if(reduceToken == Expr_performReduction(reduceStack)){ - Array_push(stack, reduceToken); + if(!Expr_Reduce(stack, currentToken)) { + return ParserError(String_fromFormat("Syntax error in expression"), Array_fromArgs(1, current.token)); } - else{ - return ParserError(String_fromFormat("TODO"), NULL); - } break; + case E: - equalsToken->Stype = S_TERMINAL; - equalsToken->token = current.token; - Array_push(stack, equalsToken); + currentToken->Stype = S_TERMINAL; + currentToken->token = current.token; + currentToken->node = NULL; + Array_push(stack, currentToken); + + if(!(removeFromTokenStream = Lexer_nextToken(parser->lexer)).success) return LexerToParserError(current); + current = Lexer_peekToken(parser->lexer, ++offset); break; + case X: - return ParserError(String_fromFormat("TODO"), NULL); - default: + if(!Expr_Reduce(stack, currentToken)) { + return ParserError(String_fromFormat("Syntax error in expression"), Array_fromArgs(1, current.token)); + } break; + default: + break; } } From 49a2acd17046afc2c2167af72a599e8bd698e4d4 Mon Sep 17 00:00:00 2001 From: Korunka1 Date: Fri, 10 Nov 2023 15:59:14 +0100 Subject: [PATCH 22/69] removed some comments --- src/compiler/parser/Expressions.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/compiler/parser/Expressions.c b/src/compiler/parser/Expressions.c index b52c9f5..7e4c0fc 100644 --- a/src/compiler/parser/Expressions.c +++ b/src/compiler/parser/Expressions.c @@ -64,8 +64,8 @@ int Expr_getPrecTbIndex(Token *token) { case TOKEN_DEFAULT: if(token->type == TOKEN_IDENTIFIER) { return I_ID; - } //else error - return I_DOLLAR; //maybe + } + return I_DOLLAR; case TOKEN_STRING: case TOKEN_INTEGER: case TOKEN_FLOATING: @@ -104,7 +104,6 @@ StackItem *Expr_performReduction(Array *stack) { // E -> i if(stack->size == 1) { - //StackItem *id = Array_get(stack, 0); StackItem *id = Array_pop(stack); if(id->Stype == S_TERMINAL) { @@ -124,13 +123,11 @@ StackItem *Expr_performReduction(Array *stack) { //two operators consecutively else { return NULL; - } //TODO: check for null in main + } } // E -> E! if(stack->size == 2) { - //StackItem *operator = Array_get(stack, 1); - //StackItem *argument = Array_get(stack, 0); StackItem *argument = Array_pop(stack); StackItem *operator = Array_pop(stack); @@ -147,9 +144,6 @@ StackItem *Expr_performReduction(Array *stack) { // Binary operations and parentheses if(stack->size == 3) { - //StackItem *operator = Array_get(stack, 1); - //StackItem *leftOperand = Array_get(stack, 0); - //StackItem *rightOperand = Array_get(stack, 2); StackItem *leftOperand = Array_pop(stack); StackItem *operator = Array_pop(stack); StackItem *rightOperand = Array_pop(stack); @@ -257,7 +251,7 @@ ParserResult __Parser_parseExpression(Parser *parser) { Array_free(stack); //Array_free(reduceStack); return ParserSuccess(finalExpression->node); - } //TODO: when to end + } StackItem *currentToken = mem_alloc(sizeof(StackItem)); From 90074c9bb595f708fc3db3906522ede9c05d0303 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Sun, 12 Nov 2023 20:30:24 +0100 Subject: [PATCH 23/69] Renamed `Expressions` to `ExpressionParser` --- .../{Expressions.h => ExpressionParser.h} | 0 .../{Expressions.c => ExpressionParser.c} | 34 +++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) rename include/compiler/parser/{Expressions.h => ExpressionParser.h} (100%) rename src/compiler/parser/{Expressions.c => ExpressionParser.c} (88%) diff --git a/include/compiler/parser/Expressions.h b/include/compiler/parser/ExpressionParser.h similarity index 100% rename from include/compiler/parser/Expressions.h rename to include/compiler/parser/ExpressionParser.h diff --git a/src/compiler/parser/Expressions.c b/src/compiler/parser/ExpressionParser.c similarity index 88% rename from src/compiler/parser/Expressions.c rename to src/compiler/parser/ExpressionParser.c index 7e4c0fc..78dbaca 100644 --- a/src/compiler/parser/Expressions.c +++ b/src/compiler/parser/ExpressionParser.c @@ -3,10 +3,10 @@ #include #include "assertf.h" -//#include "internal/Array.h" -//#include "compiler/parser/ASTNodes.h" -//#include "compiler/lexer/Lexer.h" -#include "compiler/parser/Expressions.h" +// #include "internal/Array.h" +// #include "compiler/parser/ASTNodes.h" +// #include "compiler/lexer/Lexer.h" +#include "compiler/parser/ExpressionParser.h" #include "compiler/lexer/Token.h" #include "internal/Array.h" #include "allocator/MemoryAllocator.h" @@ -15,7 +15,7 @@ #define TABLE_SIZE 9 #define STACK_SIZE 20 -int precedence_table[TABLE_SIZE][TABLE_SIZE] = //[stack top terminal][input token] +int precedence_table[TABLE_SIZE][TABLE_SIZE] = // [stack top terminal][input token] { // +-|*/| ! |??|r |i |( |) |$ {R, S, S, R, R, S, S, R, R}, // +- @@ -64,7 +64,7 @@ int Expr_getPrecTbIndex(Token *token) { case TOKEN_DEFAULT: if(token->type == TOKEN_IDENTIFIER) { return I_ID; - } + } return I_DOLLAR; case TOKEN_STRING: case TOKEN_INTEGER: @@ -93,14 +93,14 @@ void Expr_pushAfterTopTerminal(Array *stack) { stopReduction->Stype = S_STOP; stopReduction->node = NULL; for(size_t i = 0; i < stack->size; i++) { - if(((StackItem *)Array_get(stack, stack->size - i - 1))->Stype == S_TERMINAL || ((StackItem *)Array_get(stack, stack->size - i - 1))->Stype == S_BOTTOM) { + if(((StackItem*)Array_get(stack, stack->size - i - 1))->Stype == S_TERMINAL || ((StackItem*)Array_get(stack, stack->size - i - 1))->Stype == S_BOTTOM) { Array_insert(stack, (int)stack->size - i, stopReduction); return; } } } -StackItem *Expr_performReduction(Array *stack) { +StackItem* Expr_performReduction(Array *stack) { // E -> i if(stack->size == 1) { @@ -114,16 +114,16 @@ StackItem *Expr_performReduction(Array *stack) { return id; } if(id->token->type == TOKEN_IDENTIFIER) { - IdentifierASTNode *identifierE = new_IdentifierASTNode(id->token->value.string); //string or identifier? + IdentifierASTNode *identifierE = new_IdentifierASTNode(id->token->value.string); // string or identifier? id->node = (ExpressionASTNode*)identifierE; id->Stype = S_NONTERMINAL; return id; } } - //two operators consecutively + // two operators consecutively else { return NULL; - } + } } // E -> E! @@ -137,7 +137,7 @@ StackItem *Expr_performReduction(Array *stack) { operator->Stype = S_NONTERMINAL; mem_free(argument); return operator; - } else { + } else { return NULL; } } @@ -218,7 +218,7 @@ bool Expr_Reduce(Array *stack, StackItem *currentToken) { if(currentToken == Expr_performReduction(reduceStack)) { Array_push(stack, currentToken); return true; - } else { + } else { return false; } } @@ -227,8 +227,8 @@ ParserResult __Parser_parseExpression(Parser *parser) { assertf(parser != NULL); Array *stack = Array_alloc(STACK_SIZE); - //Array *reduceStack = Array_alloc(STACK_SIZE); - //Token *token = NULL; + // Array *reduceStack = Array_alloc(STACK_SIZE); + // Token *token = NULL; StackItem *bottom = mem_alloc(sizeof(StackItem)); bottom->Stype = S_BOTTOM; @@ -249,9 +249,9 @@ ParserResult __Parser_parseExpression(Parser *parser) { bottom = Array_pop(stack); mem_free(bottom); Array_free(stack); - //Array_free(reduceStack); + // Array_free(reduceStack); return ParserSuccess(finalExpression->node); - } + } StackItem *currentToken = mem_alloc(sizeof(StackItem)); From 130f57b3f1a69dabf23f5ef4c4602bf203321683 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Sun, 12 Nov 2023 20:37:56 +0100 Subject: [PATCH 24/69] Changed formatting --- src/compiler/parser/ExpressionParser.c | 57 ++++++++++++++++---------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/src/compiler/parser/ExpressionParser.c b/src/compiler/parser/ExpressionParser.c index 78dbaca..9e94c8d 100644 --- a/src/compiler/parser/ExpressionParser.c +++ b/src/compiler/parser/ExpressionParser.c @@ -3,8 +3,6 @@ #include #include "assertf.h" -// #include "internal/Array.h" -// #include "compiler/parser/ASTNodes.h" // #include "compiler/lexer/Lexer.h" #include "compiler/parser/ExpressionParser.h" #include "compiler/lexer/Token.h" @@ -15,8 +13,7 @@ #define TABLE_SIZE 9 #define STACK_SIZE 20 -int precedence_table[TABLE_SIZE][TABLE_SIZE] = // [stack top terminal][input token] -{ +int precedence_table[TABLE_SIZE][TABLE_SIZE] = { // [stack top terminal][input token] // +-|*/| ! |??|r |i |( |) |$ {R, S, S, R, R, S, S, R, R}, // +- {R, R, S, R, R, S, S, R, R}, // */ @@ -27,11 +24,9 @@ int precedence_table[TABLE_SIZE][TABLE_SIZE] = // [stack top terminal][input {S, S, S, S, S, S, S, E, X}, // ( {R, R, R, R, R, X, X, R, R}, // ) {S, S, S, S, S, S, S, X, X} // $ - }; int Expr_getPrecTbIndex(Token *token) { - switch(token->kind) { case TOKEN_PLUS: case TOKEN_MINUS: @@ -66,6 +61,7 @@ int Expr_getPrecTbIndex(Token *token) { return I_ID; } return I_DOLLAR; + case TOKEN_STRING: case TOKEN_INTEGER: case TOKEN_FLOATING: @@ -79,11 +75,15 @@ int Expr_getPrecTbIndex(Token *token) { StackItem Expr_getTopTerminal(Array *stack) { StackItem *top = NULL; + for(size_t i = 0; i < stack->size; i++) { - if((top = Array_get(stack, stack->size - i - 1))->Stype == S_TERMINAL || top->Stype == S_BOTTOM) { + top = Array_get(stack, stack->size - i - 1); + + if(top->Stype == S_TERMINAL || top->Stype == S_BOTTOM) { return *top; } } + return *top; } @@ -92,8 +92,12 @@ void Expr_pushAfterTopTerminal(Array *stack) { stopReduction->token = NULL; stopReduction->Stype = S_STOP; stopReduction->node = NULL; + for(size_t i = 0; i < stack->size; i++) { - if(((StackItem*)Array_get(stack, stack->size - i - 1))->Stype == S_TERMINAL || ((StackItem*)Array_get(stack, stack->size - i - 1))->Stype == S_BOTTOM) { + if( + ((StackItem*)Array_get(stack, stack->size - i - 1))->Stype == S_TERMINAL || + ((StackItem*)Array_get(stack, stack->size - i - 1))->Stype == S_BOTTOM + ) { Array_insert(stack, (int)stack->size - i, stopReduction); return; } @@ -111,12 +115,15 @@ StackItem* Expr_performReduction(Array *stack) { LiteralExpressionASTNode *literalE = new_LiteralExpressionASTNode(id->token->value); id->node = (ExpressionASTNode*)literalE; id->Stype = S_NONTERMINAL; + return id; } + if(id->token->type == TOKEN_IDENTIFIER) { IdentifierASTNode *identifierE = new_IdentifierASTNode(id->token->value.string); // string or identifier? id->node = (ExpressionASTNode*)identifierE; id->Stype = S_NONTERMINAL; + return id; } } @@ -135,7 +142,9 @@ StackItem* Expr_performReduction(Array *stack) { UnaryExpressionASTNode *unaryE = new_UnaryExpressionASTNode(argument->node, OPERATOR_UNWRAP); operator->node = (ExpressionASTNode*)unaryE; operator->Stype = S_NONTERMINAL; + mem_free(argument); + return operator; } else { return NULL; @@ -152,6 +161,7 @@ StackItem* Expr_performReduction(Array *stack) { if(operator->Stype == S_NONTERMINAL && leftOperand->token->kind == TOKEN_LEFT_PAREN && rightOperand->token->kind == TOKEN_RIGHT_PAREN) { mem_free(leftOperand); mem_free(rightOperand); + return operator; } @@ -198,11 +208,14 @@ StackItem* Expr_performReduction(Array *stack) { BinaryExpressionASTNode *binaryE = new_BinaryExpressionASTNode(leftOperand->node, rightOperand->node, operatorType); operator->node = (ExpressionASTNode*)binaryE; operator->Stype = S_NONTERMINAL; + mem_free(leftOperand); mem_free(rightOperand); + return operator; } } + return NULL; } @@ -214,6 +227,7 @@ bool Expr_Reduce(Array *stack, StackItem *currentToken) { Array_push(reduceStack, currentToken); } } + // Perform reduction and push result on stack (nonterminal) if(currentToken == Expr_performReduction(reduceStack)) { Array_push(stack, currentToken); @@ -250,13 +264,14 @@ ParserResult __Parser_parseExpression(Parser *parser) { mem_free(bottom); Array_free(stack); // Array_free(reduceStack); + return ParserSuccess(finalExpression->node); } StackItem *currentToken = mem_alloc(sizeof(StackItem)); switch(operation) { - case S: + case S: { currentToken->Stype = S_TERMINAL; currentToken->token = current.token; currentToken->node = NULL; @@ -265,15 +280,15 @@ ParserResult __Parser_parseExpression(Parser *parser) { if(!(removeFromTokenStream = Lexer_nextToken(parser->lexer)).success) return LexerToParserError(current); current = Lexer_peekToken(parser->lexer, ++offset); - break; + } break; - case R: + case R: { if(!Expr_Reduce(stack, currentToken)) { return ParserError(String_fromFormat("Syntax error in expression"), Array_fromArgs(1, current.token)); } - break; + } break; - case E: + case E: { currentToken->Stype = S_TERMINAL; currentToken->token = current.token; currentToken->node = NULL; @@ -281,19 +296,17 @@ ParserResult __Parser_parseExpression(Parser *parser) { if(!(removeFromTokenStream = Lexer_nextToken(parser->lexer)).success) return LexerToParserError(current); current = Lexer_peekToken(parser->lexer, ++offset); - break; + } break; - case X: + case X: { if(!Expr_Reduce(stack, currentToken)) { return ParserError(String_fromFormat("Syntax error in expression"), Array_fromArgs(1, current.token)); } - break; - - default: - break; + } break; + default: {} break; } - } - return ParserNoMatch(); -} \ No newline at end of file + return ParserNoMatch(); + } +} From e093ef54d30b707d07681365b0dcfdc5c7b18f2b Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Sun, 12 Nov 2023 20:45:53 +0100 Subject: [PATCH 25/69] Added ability to pass `Lexer` instance directly to `Parser` constructor --- include/compiler/parser/Parser.h | 2 +- src/compiler/parser/Parser.c | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/compiler/parser/Parser.h b/include/compiler/parser/Parser.h index cda1620..3515353 100644 --- a/include/compiler/parser/Parser.h +++ b/include/compiler/parser/Parser.h @@ -10,7 +10,7 @@ typedef struct Parser { // SymbolTable *symbolTable; } Parser; -void Parser_constructor(Parser *parser); +void Parser_constructor(Parser *parser, Lexer *lexer); void Parser_destructor(Parser *parser); void Parser_setLexer(Parser *parser, Lexer *lexer); ParserResult Parser_parse(Parser *parser); diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index b13f296..e72823a 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -35,9 +35,12 @@ ParserResult __Parser_parseAssignmentStatement(Parser *parser); /* Definitions of public functions */ -void Parser_constructor(Parser *parser) { +void Parser_constructor(Parser *parser, Lexer *lexer) { + assertf(parser != NULL); + assertf(lexer != NULL); + // TODO: Symbol table management - parser->lexer = NULL; + parser->lexer = lexer; } void Parser_destructor(Parser *parser) { @@ -168,12 +171,12 @@ ParserResult __Parser_parseStatement(Parser *parser) { return ParserNoMatch(); } -//ParserResult __Parser_parseExpression(Parser *parser) { +// ParserResult __Parser_parseExpression(Parser *parser) { // assertf(parser != NULL); - // TODO: Add logic for parsing expressions (using LL(1) parsing) +// TODO: Add logic for parsing expressions (using LL(1) parsing) // return ParserNoMatch(); -//} +// } ParserResult __Parser_parseTypeReference(Parser *parser) { // TODO: Add logic to output correct error messages From 85d6a6aa3a58b442cbb5e1a4e5f590dbc10783c8 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Sun, 12 Nov 2023 21:05:31 +0100 Subject: [PATCH 26/69] Modified definition of `Expr_getTopTerminal` to prevent copying data --- include/compiler/parser/ExpressionParser.h | 7 ++----- src/compiler/parser/ExpressionParser.c | 6 +++--- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/include/compiler/parser/ExpressionParser.h b/include/compiler/parser/ExpressionParser.h index c55c853..ace4745 100644 --- a/include/compiler/parser/ExpressionParser.h +++ b/include/compiler/parser/ExpressionParser.h @@ -36,11 +36,8 @@ typedef struct StackItem { ExpressionASTNode *node; } StackItem; -StackItem Expr_getTopTerminal(Array *stack); +StackItem* Expr_getTopTerminal(Array *stack); void Expr_pushAfterTopTerminal(Array *stack); -StackItem *Expr_performReduction(Array *stack); +StackItem* Expr_performReduction(Array *stack); #endif - - -//LexerResult current = Lexer_nextToken(parser->lexer); \ No newline at end of file diff --git a/src/compiler/parser/ExpressionParser.c b/src/compiler/parser/ExpressionParser.c index 9e94c8d..e94bc78 100644 --- a/src/compiler/parser/ExpressionParser.c +++ b/src/compiler/parser/ExpressionParser.c @@ -73,18 +73,18 @@ int Expr_getPrecTbIndex(Token *token) { } } -StackItem Expr_getTopTerminal(Array *stack) { +StackItem* Expr_getTopTerminal(Array *stack) { StackItem *top = NULL; for(size_t i = 0; i < stack->size; i++) { top = Array_get(stack, stack->size - i - 1); if(top->Stype == S_TERMINAL || top->Stype == S_BOTTOM) { - return *top; + return top; } } - return *top; + return top; } void Expr_pushAfterTopTerminal(Array *stack) { From 4a587b170de8c0b815587192f41c459b955ee4fb Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Sun, 12 Nov 2023 21:07:11 +0100 Subject: [PATCH 27/69] Added message why the segfault is caused --- src/compiler/parser/ExpressionParser.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/compiler/parser/ExpressionParser.c b/src/compiler/parser/ExpressionParser.c index e94bc78..f2d1656 100644 --- a/src/compiler/parser/ExpressionParser.c +++ b/src/compiler/parser/ExpressionParser.c @@ -254,9 +254,16 @@ ParserResult __Parser_parseExpression(Parser *parser) { LexerResult removeFromTokenStream; while(true) { - if(!current.success) return LexerToParserError(current); - enum PrecTableRelation operation = precedence_table[Expr_getPrecTbIndex(Expr_getTopTerminal(stack).token)][Expr_getPrecTbIndex(current.token)]; + + StackItem *topTerminal = Expr_getTopTerminal(stack); + // topTerminal returns S_BOTTOM, which has no token, + // this token is being dereferenced in Expr_getPrecTbIndex, thus causing a segfault + + int topTerminalIndex = Expr_getPrecTbIndex(topTerminal->token); + int currentTokenIndex = Expr_getPrecTbIndex(current.token); + + enum PrecTableRelation operation = precedence_table[topTerminalIndex][currentTokenIndex]; if(((StackItem*)Array_get(stack, stack->size - 1))->Stype == S_NONTERMINAL && stack->size == 2 && operation == X) { StackItem *finalExpression = Array_pop(stack); From 9882b8016d0535611760e7341d1a922b988c94e8 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Sun, 12 Nov 2023 21:07:38 +0100 Subject: [PATCH 28/69] Added test files for `Parser` and `ExpressionParser` --- test/compiler/parser/ExpressionParser.test.c | 8 +++++++ test/compiler/parser/Parser.test.c | 23 ++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 test/compiler/parser/ExpressionParser.test.c create mode 100644 test/compiler/parser/Parser.test.c diff --git a/test/compiler/parser/ExpressionParser.test.c b/test/compiler/parser/ExpressionParser.test.c new file mode 100644 index 0000000..a1f82d0 --- /dev/null +++ b/test/compiler/parser/ExpressionParser.test.c @@ -0,0 +1,8 @@ +#include + +#include "unit.h" +#include "compiler/lexer/Lexer.h" +#include "compiler/parser/Parser.h" + + +// TODO: Tests for ExpressionParser internals here... diff --git a/test/compiler/parser/Parser.test.c b/test/compiler/parser/Parser.test.c new file mode 100644 index 0000000..df67605 --- /dev/null +++ b/test/compiler/parser/Parser.test.c @@ -0,0 +1,23 @@ +#include + +#include "unit.h" +#include "compiler/lexer/Lexer.h" +#include "compiler/parser/Parser.h" + + +DESCRIBE(variable_declaration, "Variable declaration parsing") { + Lexer lexer; + Lexer_constructor(&lexer); + + Parser parser; + Parser_constructor(&parser, &lexer); + + ParserResult result; + + TEST_BEGIN("Example use of parser") { + Lexer_setSource(&lexer, "let a = 7"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + } TEST_END(); +} From 631146969111cc7ddbb0d3fd5c94e2db7c2e41e3 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Mon, 13 Nov 2023 23:30:42 +0100 Subject: [PATCH 29/69] Fixed token no properly consuming --- src/compiler/parser/Parser.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index e72823a..bde6c35 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -609,6 +609,9 @@ ParserResult __Parser_parseVariableDeclarator(Parser *parser) { ExpressionASTNode *initializer = NULL; if(peek.token->kind == TOKEN_EQUAL) { + // Consume the `=` token + Lexer_nextToken(parser->lexer); + ParserResult initializerResult = __Parser_parseExpression(parser); if(!initializerResult.success) return initializerResult; initializer = initializerResult.node; From 305a9e0d3ec09a48fe5c54f28935593e3abd5835 Mon Sep 17 00:00:00 2001 From: Korunka1 Date: Tue, 14 Nov 2023 12:41:22 +0100 Subject: [PATCH 30/69] fixed segfault + syntax changes --- src/compiler/parser/ExpressionParser.c | 42 +++++++++++++++++--------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/compiler/parser/ExpressionParser.c b/src/compiler/parser/ExpressionParser.c index f2d1656..707ee0a 100644 --- a/src/compiler/parser/ExpressionParser.c +++ b/src/compiler/parser/ExpressionParser.c @@ -27,6 +27,9 @@ int precedence_table[TABLE_SIZE][TABLE_SIZE] = { // [stack top terminal][input }; int Expr_getPrecTbIndex(Token *token) { + if(!token){ + return I_DOLLAR; + } switch(token->kind) { case TOKEN_PLUS: case TOKEN_MINUS: @@ -94,10 +97,9 @@ void Expr_pushAfterTopTerminal(Array *stack) { stopReduction->node = NULL; for(size_t i = 0; i < stack->size; i++) { - if( - ((StackItem*)Array_get(stack, stack->size - i - 1))->Stype == S_TERMINAL || - ((StackItem*)Array_get(stack, stack->size - i - 1))->Stype == S_BOTTOM - ) { + StackItem *top = Array_get(stack, stack->size - i - 1); + + if(top->Stype == S_TERMINAL ||top->Stype == S_BOTTOM) { Array_insert(stack, (int)stack->size - i, stopReduction); return; } @@ -222,6 +224,10 @@ StackItem* Expr_performReduction(Array *stack) { bool Expr_Reduce(Array *stack, StackItem *currentToken) { Array *reduceStack = Array_alloc(STACK_SIZE); + if(stack->size == 1){ + return false; + } + while((currentToken = Array_pop(stack))->Stype != S_STOP) { if(currentToken->Stype != S_STOP) { Array_push(reduceStack, currentToken); @@ -229,7 +235,8 @@ bool Expr_Reduce(Array *stack, StackItem *currentToken) { } // Perform reduction and push result on stack (nonterminal) - if(currentToken == Expr_performReduction(reduceStack)) { + currentToken = Expr_performReduction(reduceStack); + if(currentToken != NULL) { Array_push(stack, currentToken); return true; } else { @@ -247,9 +254,11 @@ ParserResult __Parser_parseExpression(Parser *parser) { bottom->Stype = S_BOTTOM; bottom->node = NULL; + bottom->token = NULL; Array_push(stack, bottom); - int offset = 0; + bool reductionSucces; + int offset = 1; LexerResult current = Lexer_peekToken(parser->lexer, offset); LexerResult removeFromTokenStream; @@ -265,7 +274,8 @@ ParserResult __Parser_parseExpression(Parser *parser) { enum PrecTableRelation operation = precedence_table[topTerminalIndex][currentTokenIndex]; - if(((StackItem*)Array_get(stack, stack->size - 1))->Stype == S_NONTERMINAL && stack->size == 2 && operation == X) { + StackItem *isItFinal = Array_get(stack, stack->size - 1); + if(isItFinal->Stype == S_NONTERMINAL && stack->size == 2 && operation == X) { StackItem *finalExpression = Array_pop(stack); bottom = Array_pop(stack); mem_free(bottom); @@ -285,12 +295,14 @@ ParserResult __Parser_parseExpression(Parser *parser) { Expr_pushAfterTopTerminal(stack); Array_push(stack, currentToken); - if(!(removeFromTokenStream = Lexer_nextToken(parser->lexer)).success) return LexerToParserError(current); - current = Lexer_peekToken(parser->lexer, ++offset); + removeFromTokenStream = Lexer_nextToken(parser->lexer); + if(!(removeFromTokenStream.success)) return LexerToParserError(current); + current = Lexer_peekToken(parser->lexer, offset); } break; case R: { - if(!Expr_Reduce(stack, currentToken)) { + reductionSucces = Expr_Reduce(stack, currentToken); + if(!reductionSucces) { return ParserError(String_fromFormat("Syntax error in expression"), Array_fromArgs(1, current.token)); } } break; @@ -301,12 +313,14 @@ ParserResult __Parser_parseExpression(Parser *parser) { currentToken->node = NULL; Array_push(stack, currentToken); - if(!(removeFromTokenStream = Lexer_nextToken(parser->lexer)).success) return LexerToParserError(current); - current = Lexer_peekToken(parser->lexer, ++offset); + removeFromTokenStream = Lexer_nextToken(parser->lexer); + if(!(removeFromTokenStream.success)) return LexerToParserError(current); + current = Lexer_peekToken(parser->lexer, offset); } break; case X: { - if(!Expr_Reduce(stack, currentToken)) { + reductionSucces = Expr_Reduce(stack, currentToken); + if(!reductionSucces) { return ParserError(String_fromFormat("Syntax error in expression"), Array_fromArgs(1, current.token)); } } break; @@ -314,6 +328,6 @@ ParserResult __Parser_parseExpression(Parser *parser) { default: {} break; } - return ParserNoMatch(); } + return ParserNoMatch(); } From 380288fd35838f043ccad370413411e0585fc16f Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Tue, 14 Nov 2023 19:27:22 +0100 Subject: [PATCH 31/69] Fix brackets parsing in Block --- src/compiler/parser/Parser.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index bde6c35..1426629 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -98,11 +98,19 @@ ParserResult __Parser_parseBlock(Parser *parser, bool requireBraces) { // Parse statements Array *statements = Array_alloc(0); - while(!Parser_isAtEnd(parser)) { + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + while((!requireBraces && !Parser_isAtEnd(parser)) || (requireBraces && peek.token->kind != TOKEN_RIGHT_BRACE)) { ParserResult result = __Parser_parseStatement(parser); if(!result.success) return result; Array_push(statements, result.node); + + if(requireBraces) { + peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + } } // Check for right brace From 3ac5ee7033feba6ff2e7035bc65e0c860d93e54c Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Tue, 14 Nov 2023 19:28:36 +0100 Subject: [PATCH 32/69] Fix peeking on zero in parseStatements --- src/compiler/parser/Parser.c | 146 +++++++++++++++++++---------------- 1 file changed, 78 insertions(+), 68 deletions(-) diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index 1426629..bf4eeee 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -27,7 +27,7 @@ ParserResult __Parser_parseWhileStatement(Parser *parser); ParserResult __Parser_parseReturnStatement(Parser *parser); ParserResult __Parser_parseVariableDeclarator(Parser *parser); ParserResult __Parser_parseVariableDeclarationList(Parser *parser); -ParserResult __Parser_parseVariableDeclarationStatement(Parser *parser); +ParserResult __Parser_parseVariableDeclarationStatement(Parser *parser, bool isConstant); ParserResult __Parser_parseArgument(Parser *parser); ParserResult __Parser_parseArgumentList(Parser *parser); ParserResult __Parser_parseFunctionCallExpression(Parser *parser); @@ -133,40 +133,41 @@ ParserResult __Parser_parseBlock(Parser *parser, bool requireBraces) { ParserResult __Parser_parseStatement(Parser *parser) { assertf(parser != NULL); - LexerResult peek = Lexer_peekToken(parser->lexer, 0); - if(!peek.success) return LexerToParserError(peek); + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); - if(peek.token->kind == TOKEN_FUNC) { + if(result.token->kind == TOKEN_FUNC) { ParserResult funcResult = __Parser_parseFuncStatement(parser); if(!funcResult.success) return funcResult; return ParserSuccess(funcResult.node); } - if(peek.token->kind == TOKEN_IF) { + if(result.token->kind == TOKEN_IF) { ParserResult ifResult = __Parser_parseIfStatement(parser); if(!ifResult.success) return ifResult; return ParserSuccess(ifResult.node); } - if(peek.token->kind == TOKEN_WHILE) { + if(result.token->kind == TOKEN_WHILE) { ParserResult whileResult = __Parser_parseWhileStatement(parser); if(!whileResult.success) return whileResult; return ParserSuccess(whileResult.node); } - if(peek.token->kind == TOKEN_RETURN) { + if(result.token->kind == TOKEN_RETURN) { ParserResult returnResult = __Parser_parseReturnStatement(parser); if(!returnResult.success) return returnResult; return ParserSuccess(returnResult.node); } - if(peek.token->kind == TOKEN_LET || peek.token->kind == TOKEN_VAR) { - ParserResult variableDeclarationResult = __Parser_parseVariableDeclarationStatement(parser); + if(result.token->kind == TOKEN_LET || result.token->kind == TOKEN_VAR) { + bool isConstant = result.token->kind == TOKEN_LET; + ParserResult variableDeclarationResult = __Parser_parseVariableDeclarationStatement(parser, isConstant); if(!variableDeclarationResult.success) return variableDeclarationResult; return ParserSuccess(variableDeclarationResult.node); } - if(peek.token->type == TOKEN_IDENTIFIER) { + if(result.token->type == TOKEN_IDENTIFIER) { LexerResult tmp = Lexer_peekToken(parser->lexer, 1); if(!tmp.success) return LexerToParserError(tmp); if(tmp.token->kind == TOKEN_EQUAL) { @@ -189,11 +190,9 @@ ParserResult __Parser_parseStatement(Parser *parser) { ParserResult __Parser_parseTypeReference(Parser *parser) { // TODO: Add logic to output correct error messages assertf(parser != NULL); - - LexerResult result = Lexer_nextToken(parser->lexer); - LexerResult peek; int nullable = false; + LexerResult result = Lexer_nextToken(parser->lexer); if(!result.success) return LexerToParserError(result); if(result.token->type != TOKEN_IDENTIFIER) { @@ -202,8 +201,9 @@ ParserResult __Parser_parseTypeReference(Parser *parser) { Array_fromArgs(1, result.token)); } - peek = Lexer_peekToken(parser->lexer, 1); - if(!peek.success) return LexerToParserError(result); + + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); // nullable type @@ -229,6 +229,7 @@ ParserResult __Parser_parseParameter(Parser *parser) { IdentifierASTNode *paramLocalId = NULL; IdentifierASTNode *paramExternalId = NULL; ExpressionASTNode *initializer = NULL; + LexerResult peek; LexerResult result = Lexer_nextToken(parser->lexer); @@ -306,16 +307,18 @@ ParserResult __Parser_parseParameterList(Parser *parser) { // parser parameter-list Array *parameters = Array_alloc(0); - while(true) { + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + while(peek.token->kind != TOKEN_RIGHT_PAREN) { + ParserResult paramResult = __Parser_parseParameter(parser); if(!paramResult.success) return paramResult; Array_push(parameters, (ParameterASTNode*)paramResult.node); - LexerResult peek = Lexer_peekToken(parser->lexer, 1); - - - if(!peek.success) return LexerToParserError(result); + peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); if(peek.token->kind == TOKEN_COMMA) { result = Lexer_nextToken(parser->lexer); @@ -323,13 +326,12 @@ ParserResult __Parser_parseParameterList(Parser *parser) { } - peek = Lexer_peekToken(parser->lexer, 1); - if(peek.token->kind == TOKEN_RIGHT_PAREN) { - result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); - break; - } } + + // skip ')' + result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + ParameterListASTNode *parameterList = new_ParameterListASTNode(parameters); return ParserSuccess(parameterList); @@ -340,13 +342,11 @@ ParserResult __Parser_parseFuncStatement(Parser *parser) { // TODO: Symbol table management assertf(parser != NULL); - // skip func keyword LexerResult result = Lexer_nextToken(parser->lexer); if(!result.success) return LexerToParserError(result); - result = Lexer_nextToken(parser->lexer); LexerResult peek; - if(!result.success) return LexerToParserError(result); + if(result.token->type != TOKEN_IDENTIFIER) { return ParserError( @@ -359,6 +359,7 @@ ParserResult __Parser_parseFuncStatement(Parser *parser) { ParserResult parameterListResult = __Parser_parseParameterList(parser); if(!parameterListResult.success) return parameterListResult; + peek = Lexer_peekToken(parser->lexer, 1); if(!peek.success) return LexerToParserError(peek); @@ -376,6 +377,7 @@ ParserResult __Parser_parseFuncStatement(Parser *parser) { returnType = NULL; } + ParserResult blockResult = __Parser_parseBlock(parser, true); if(!blockResult.success) return blockResult; @@ -402,6 +404,10 @@ ParserResult __Parser_parsePattern(Parser *parser) { if(!peek.success) return LexerToParserError(peek); if(peek.token->kind == TOKEN_COLON) { + // skip ':' + LexerResult tmp = Lexer_nextToken(parser->lexer); + if(!tmp.success) return LexerToParserError(tmp); + ParserResult typeResult = __Parser_parseTypeReference(parser); if(!typeResult.success) return typeResult; type = (TypeReferenceASTNode*)typeResult.node; @@ -450,6 +456,10 @@ ParserResult __Parser_parseCondition(Parser *parser) { ExpressionASTNode *expression = NULL; OptionalBindingConditionASTNode *bindingCondition = NULL; + // consume '(' + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + LexerResult peek = Lexer_peekToken(parser->lexer, 1); if(!peek.success) return LexerToParserError(peek); @@ -464,6 +474,10 @@ ParserResult __Parser_parseCondition(Parser *parser) { expression = (ExpressionASTNode*)expressionResult.node; } + // consume ')' + result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + ConditionASTNode *condition = new_ConditionASTNode(expression, bindingCondition); return ParserSuccess(condition); @@ -503,10 +517,6 @@ ParserResult __Parser_parseElseClause(Parser *parser) { ParserResult __Parser_parseIfStatement(Parser *parser) { assertf(parser != NULL); - // skip if keyword - LexerResult result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); - LexerResult peek = Lexer_peekToken(parser->lexer, 1); if(!peek.success) return LexerToParserError(peek); @@ -549,10 +559,6 @@ ParserResult __Parser_parseIfStatement(Parser *parser) { ParserResult __Parser_parseWhileStatement(Parser *parser) { assertf(parser != NULL); - // skip while keyword - LexerResult result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); - LexerResult peek = Lexer_peekToken(parser->lexer, 1); if(!peek.success) return LexerToParserError(peek); @@ -583,11 +589,6 @@ ParserResult __Parser_parseWhileStatement(Parser *parser) { ParserResult __Parser_parseReturnStatement(Parser *parser) { assertf(parser != NULL); - // skip return keyword - LexerResult result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); - - LexerResult peek = Lexer_peekToken(parser->lexer, 1); if(!peek.success) return LexerToParserError(peek); @@ -618,7 +619,8 @@ ParserResult __Parser_parseVariableDeclarator(Parser *parser) { if(peek.token->kind == TOKEN_EQUAL) { // Consume the `=` token - Lexer_nextToken(parser->lexer); + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); ParserResult initializerResult = __Parser_parseExpression(parser); if(!initializerResult.success) return initializerResult; @@ -637,41 +639,43 @@ ParserResult __Parser_parseVariableDeclarationList(Parser *parser) { Array *declarators = Array_alloc(0); - while(true) { + peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + while(peek.token->type != TOKEN_EOF) { ParserResult declaratorResult = __Parser_parseVariableDeclarator(parser); if(!declaratorResult.success) return declaratorResult; Array_push(declarators, (VariableDeclaratorASTNode*)declaratorResult.node); peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + if(peek.token->kind == TOKEN_COMMA) { result = Lexer_nextToken(parser->lexer); if(!result.success) return LexerToParserError(result); - + } else if(peek.token->type != TOKEN_EOF) { + return ParserError( + String_fromFormat("found an unexpected second identifier in constant declaration; is there an accidental break?"), + Array_fromArgs(1, peek.token)); } peek = Lexer_peekToken(parser->lexer, 1); - if(peek.token->type == TOKEN_EOF) { - result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); - break; - } + if(!peek.success) return LexerToParserError(peek); } + result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + VariableDeclarationListASTNode *variableDeclarationList = new_VariableDeclarationListASTNode(declarators); return ParserSuccess(variableDeclarationList); } -ParserResult __Parser_parseVariableDeclarationStatement(Parser *parser) { +ParserResult __Parser_parseVariableDeclarationStatement(Parser *parser, bool isConstant) { assertf(parser != NULL); - // let/var - LexerResult result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); - bool isConstant = result.token->kind == TOKEN_LET; - ParserResult declarationList = __Parser_parseVariableDeclarationList(parser); if(!declarationList.success) return declarationList; @@ -716,15 +720,19 @@ ParserResult __Parser_parseArgumentList(Parser *parser) { // parse argument-list Array *arguments = Array_alloc(0); - while(true) { + + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + + while(peek.token->kind != TOKEN_RIGHT_PAREN) { ParserResult argumentResult = __Parser_parseArgument(parser); if(!argumentResult.success) return argumentResult; Array_push(arguments, (ArgumentASTNode*)argumentResult.node); LexerResult peek = Lexer_peekToken(parser->lexer, 1); - - if(!peek.success) return LexerToParserError(result); + if(!peek.success) return LexerToParserError(peek); if(peek.token->kind == TOKEN_COMMA) { result = Lexer_nextToken(parser->lexer); @@ -733,12 +741,14 @@ ParserResult __Parser_parseArgumentList(Parser *parser) { } peek = Lexer_peekToken(parser->lexer, 1); - if(peek.token->kind == TOKEN_RIGHT_PAREN) { - result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); - break; - } + if(!peek.success) return LexerToParserError(peek); } + + // skip ')' + result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + ArgumentListASTNode *argumentList = new_ArgumentListASTNode(arguments); return ParserSuccess(argumentList); @@ -766,12 +776,12 @@ ParserResult __Parser_parseAssignmentStatement(Parser *parser) { assertf(parser != NULL); // identifier - LexerResult result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); - IdentifierASTNode *variableId = new_IdentifierASTNode(result.token->value.string); + LexerResult peek = Lexer_peekToken(parser->lexer, 0); + if(!peek.success) return LexerToParserError(peek); + IdentifierASTNode *variableId = new_IdentifierASTNode(peek.token->value.string); // skip '=' - result = Lexer_nextToken(parser->lexer); + LexerResult result = Lexer_nextToken(parser->lexer); if(!result.success) return LexerToParserError(result); ParserResult assignmentResult = __Parser_parseExpression(parser); From 19194c68b7a0250e819ee008d185af83268c15af Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Tue, 14 Nov 2023 21:35:07 +0100 Subject: [PATCH 33/69] Renamed `name`s to `id`s --- include/compiler/parser/ASTNodes.h | 7 +++---- src/compiler/parser/ASTNodes.c | 12 ++++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/compiler/parser/ASTNodes.h b/include/compiler/parser/ASTNodes.h index a030148..2693998 100644 --- a/include/compiler/parser/ASTNodes.h +++ b/include/compiler/parser/ASTNodes.h @@ -112,10 +112,10 @@ typedef struct ReturnStatementASTNode { typedef struct ParameterASTNode { enum ASTNodeType _type; - IdentifierASTNode *id; + IdentifierASTNode *internalId; TypeReferenceASTNode *type; ExpressionASTNode *initializer; - IdentifierASTNode *externalName; + IdentifierASTNode *externalId; bool isLabeless; } ParameterASTNode; @@ -169,7 +169,6 @@ typedef struct UnaryExpressionASTNode { ExpressionASTNode *argument; enum OperatorType operator; // bool isPrefix; - } UnaryExpressionASTNode; typedef struct LiteralExpressionASTNode { @@ -179,7 +178,7 @@ typedef struct LiteralExpressionASTNode { typedef struct PatternASTNode { enum ASTNodeType _type; - IdentifierASTNode *name; + IdentifierASTNode *id; TypeReferenceASTNode *type; } PatternASTNode; diff --git a/src/compiler/parser/ASTNodes.c b/src/compiler/parser/ASTNodes.c index b475333..769069a 100644 --- a/src/compiler/parser/ASTNodes.c +++ b/src/compiler/parser/ASTNodes.c @@ -87,17 +87,17 @@ ReturnStatementASTNode * new_ReturnStatementASTNode( } ParameterASTNode * new_ParameterASTNode( - IdentifierASTNode *id, + IdentifierASTNode *internalId, TypeReferenceASTNode *type, ExpressionASTNode *initializer, - IdentifierASTNode *externalName, + IdentifierASTNode *externalId, bool isLabeless ) { prepare_node_of(ParameterASTNode, NODE_PARAMETER) - node->id = id; + node->internalId = internalId; node->type = type; node->initializer = initializer; - node->externalName = externalName; + node->externalId = externalId; node->isLabeless = isLabeless; return node; } @@ -153,11 +153,11 @@ FunctionCallASTNode * new_FunctionCallASTNode( } PatternASTNode * new_PatternASTNode( - IdentifierASTNode *name, + IdentifierASTNode *id, TypeReferenceASTNode *type ) { prepare_node_of(PatternASTNode, NODE_PATTERN) - node->name = name; + node->id = id; node->type = type; return node; } From dcb24c0c53426f6ca1c65ad72095b3ee455d4f53 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Tue, 14 Nov 2023 21:48:00 +0100 Subject: [PATCH 34/69] Renamed `name`s in function declarations --- include/compiler/parser/ASTNodes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/compiler/parser/ASTNodes.h b/include/compiler/parser/ASTNodes.h index 2693998..f165060 100644 --- a/include/compiler/parser/ASTNodes.h +++ b/include/compiler/parser/ASTNodes.h @@ -234,7 +234,7 @@ VariableDeclarationASTNode* new_VariableDeclarationASTNode(VariableDeclarationLi VariableDeclaratorASTNode* new_VariableDeclaratorASTNode(PatternASTNode *pattern, ExpressionASTNode *initializer); VariableDeclarationListASTNode* new_VariableDeclarationListASTNode(Array *declarators); ReturnStatementASTNode* new_ReturnStatementASTNode(ExpressionASTNode *expression); -ParameterASTNode* new_ParameterASTNode(IdentifierASTNode *id, TypeReferenceASTNode *type, ExpressionASTNode *initializer, IdentifierASTNode *externalName, bool isLabeless); +ParameterASTNode* new_ParameterASTNode(IdentifierASTNode *internalId, TypeReferenceASTNode *type, ExpressionASTNode *initializer, IdentifierASTNode *externalId, bool isLabeless); ParameterListASTNode* new_ParameterListASTNode(Array *parameters); FunctionDeclarationASTNode* new_FunctionDeclarationASTNode(IdentifierASTNode *id, ParameterListASTNode *parameterList, TypeReferenceASTNode *returnType, BlockASTNode *body); BinaryExpressionASTNode* new_BinaryExpressionASTNode(ExpressionASTNode *left, ExpressionASTNode *right, enum OperatorType operator); @@ -243,7 +243,7 @@ LiteralExpressionASTNode* new_LiteralExpressionASTNode(union TokenValue value); ArgumentASTNode* new_ArgumentASTNode(ExpressionASTNode *expression, IdentifierASTNode *label); ArgumentListASTNode* new_ArgumentListASTNode(Array *arguments); FunctionCallASTNode* new_FunctionCallASTNode(IdentifierASTNode *id, ArgumentListASTNode *argumentList); -PatternASTNode* new_PatternASTNode(IdentifierASTNode *name, TypeReferenceASTNode *type); +PatternASTNode* new_PatternASTNode(IdentifierASTNode *id, TypeReferenceASTNode *type); OptionalBindingConditionASTNode* new_OptionalBindingConditionASTNode(PatternASTNode *pattern, ExpressionASTNode *initializer, bool isConstant); ConditionASTNode* new_ConditionASTNode(ExpressionASTNode *expression, OptionalBindingConditionASTNode *optionalBindingCondition); ElseClauseASTNode* new_ElseClauseASTNode(IfStatementASTNode *ifStatement, BlockASTNode *body, bool isElseIf); From 401dc44b9aaa637131bb7e92b271c8e7d340c6f1 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Tue, 14 Nov 2023 21:55:53 +0100 Subject: [PATCH 35/69] Renamed `enum OperatorType` to just `OperatorType` cuz vs code didn't like it --- include/compiler/parser/ASTNodes.h | 13 +++++++------ src/compiler/parser/ASTNodes.c | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/compiler/parser/ASTNodes.h b/include/compiler/parser/ASTNodes.h index f165060..1eded25 100644 --- a/include/compiler/parser/ASTNodes.h +++ b/include/compiler/parser/ASTNodes.h @@ -36,7 +36,7 @@ enum ASTNodeType { NODE_ASSIGNMENT_STATEMENT }; -enum OperatorType { +typedef enum OperatorType { OPERATOR_DEFAULT = 0, OPERATOR_PLUS, OPERATOR_MINUS, @@ -50,7 +50,8 @@ enum OperatorType { OPERATOR_GREATER, OPERATOR_LESS_EQUAL, OPERATOR_GREATER_EQUAL -}; +} OperatorType; + /* Definition of AST nodes */ @@ -161,13 +162,13 @@ typedef struct BinaryExpressionASTNode { enum ASTNodeType _type; ExpressionASTNode *left; ExpressionASTNode *right; - enum OperatorType operator; + OperatorType operator; } BinaryExpressionASTNode; typedef struct UnaryExpressionASTNode { enum ASTNodeType _type; ExpressionASTNode *argument; - enum OperatorType operator; + OperatorType operator; // bool isPrefix; } UnaryExpressionASTNode; @@ -237,8 +238,8 @@ ReturnStatementASTNode* new_ReturnStatementASTNode(ExpressionASTNode *expression ParameterASTNode* new_ParameterASTNode(IdentifierASTNode *internalId, TypeReferenceASTNode *type, ExpressionASTNode *initializer, IdentifierASTNode *externalId, bool isLabeless); ParameterListASTNode* new_ParameterListASTNode(Array *parameters); FunctionDeclarationASTNode* new_FunctionDeclarationASTNode(IdentifierASTNode *id, ParameterListASTNode *parameterList, TypeReferenceASTNode *returnType, BlockASTNode *body); -BinaryExpressionASTNode* new_BinaryExpressionASTNode(ExpressionASTNode *left, ExpressionASTNode *right, enum OperatorType operator); -UnaryExpressionASTNode* new_UnaryExpressionASTNode(ExpressionASTNode *argument, enum OperatorType operator /*, bool isPrefix*/); +BinaryExpressionASTNode* new_BinaryExpressionASTNode(ExpressionASTNode *left, ExpressionASTNode *right, OperatorType operator); +UnaryExpressionASTNode* new_UnaryExpressionASTNode(ExpressionASTNode *argument, OperatorType operator /*, bool isPrefix*/); LiteralExpressionASTNode* new_LiteralExpressionASTNode(union TokenValue value); ArgumentASTNode* new_ArgumentASTNode(ExpressionASTNode *expression, IdentifierASTNode *label); ArgumentListASTNode* new_ArgumentListASTNode(Array *arguments); diff --git a/src/compiler/parser/ASTNodes.c b/src/compiler/parser/ASTNodes.c index 769069a..bcebcec 100644 --- a/src/compiler/parser/ASTNodes.c +++ b/src/compiler/parser/ASTNodes.c @@ -231,7 +231,7 @@ AssignmentStatementASTNode * new_AssignmentStatementASTNode( BinaryExpressionASTNode* new_BinaryExpressionASTNode( ExpressionASTNode *left, ExpressionASTNode *right, - enum OperatorType operator + OperatorType operator ) { prepare_node_of(BinaryExpressionASTNode, NODE_BINARY_EXPRESSION) node->left = left; @@ -242,7 +242,7 @@ BinaryExpressionASTNode* new_BinaryExpressionASTNode( UnaryExpressionASTNode* new_UnaryExpressionASTNode( ExpressionASTNode *argument, - enum OperatorType operator + OperatorType operator //bool IsPrefix ) { prepare_node_of(UnaryExpressionASTNode, NODE_UNARY_EXPRESSION) From 5ce26d7d37b80db6389d6f2e38037a9325f8faf6 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Tue, 14 Nov 2023 21:56:54 +0100 Subject: [PATCH 36/69] Added `LiteralType` info to `LiteralExpressionASTNode` --- include/compiler/lexer/Token.h | 3 +++ include/compiler/parser/ASTNodes.h | 11 ++++++++++- src/compiler/parser/ASTNodes.c | 2 ++ src/compiler/parser/ExpressionParser.c | 2 +- 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/include/compiler/lexer/Token.h b/include/compiler/lexer/Token.h index 9f8667c..2c6bacb 100644 --- a/include/compiler/lexer/Token.h +++ b/include/compiler/lexer/Token.h @@ -21,6 +21,9 @@ enum TokenKind { TOKEN_DEFAULT = 0, // Literals + // This has to be at this position in the enum + // because it's being converted into another + // enum with the same values. TOKEN_STRING, TOKEN_INTEGER, TOKEN_FLOATING, diff --git a/include/compiler/parser/ASTNodes.h b/include/compiler/parser/ASTNodes.h index 1eded25..afc7102 100644 --- a/include/compiler/parser/ASTNodes.h +++ b/include/compiler/parser/ASTNodes.h @@ -52,6 +52,14 @@ typedef enum OperatorType { OPERATOR_GREATER_EQUAL } OperatorType; +typedef enum LiteralType { + LITERAL_INVALID = 0, + LITERAL_STRING, + LITERAL_INTEGER, + LITERAL_FLOATING, + LITERAL_BOOLEAN, + LITERAL_NIL +} LiteralType; /* Definition of AST nodes */ @@ -174,6 +182,7 @@ typedef struct UnaryExpressionASTNode { typedef struct LiteralExpressionASTNode { enum ASTNodeType _type; + LiteralType type; union TokenValue value; } LiteralExpressionASTNode; @@ -240,7 +249,7 @@ ParameterListASTNode* new_ParameterListASTNode(Array *parameters); FunctionDeclarationASTNode* new_FunctionDeclarationASTNode(IdentifierASTNode *id, ParameterListASTNode *parameterList, TypeReferenceASTNode *returnType, BlockASTNode *body); BinaryExpressionASTNode* new_BinaryExpressionASTNode(ExpressionASTNode *left, ExpressionASTNode *right, OperatorType operator); UnaryExpressionASTNode* new_UnaryExpressionASTNode(ExpressionASTNode *argument, OperatorType operator /*, bool isPrefix*/); -LiteralExpressionASTNode* new_LiteralExpressionASTNode(union TokenValue value); +LiteralExpressionASTNode* new_LiteralExpressionASTNode(LiteralType type, union TokenValue value); ArgumentASTNode* new_ArgumentASTNode(ExpressionASTNode *expression, IdentifierASTNode *label); ArgumentListASTNode* new_ArgumentListASTNode(Array *arguments); FunctionCallASTNode* new_FunctionCallASTNode(IdentifierASTNode *id, ArgumentListASTNode *argumentList); diff --git a/src/compiler/parser/ASTNodes.c b/src/compiler/parser/ASTNodes.c index bcebcec..a2d54f6 100644 --- a/src/compiler/parser/ASTNodes.c +++ b/src/compiler/parser/ASTNodes.c @@ -253,9 +253,11 @@ UnaryExpressionASTNode* new_UnaryExpressionASTNode( } LiteralExpressionASTNode* new_LiteralExpressionASTNode( + LiteralType type, union TokenValue value ) { prepare_node_of(LiteralExpressionASTNode, NODE_LITERAL_EXPRESSION) + node->type = type; node->value = value; return node; } diff --git a/src/compiler/parser/ExpressionParser.c b/src/compiler/parser/ExpressionParser.c index 707ee0a..a682e78 100644 --- a/src/compiler/parser/ExpressionParser.c +++ b/src/compiler/parser/ExpressionParser.c @@ -114,7 +114,7 @@ StackItem* Expr_performReduction(Array *stack) { if(id->Stype == S_TERMINAL) { if(id->token->type == TOKEN_LITERAL) { - LiteralExpressionASTNode *literalE = new_LiteralExpressionASTNode(id->token->value); + LiteralExpressionASTNode *literalE = new_LiteralExpressionASTNode((LiteralType)id->token->kind, id->token->value); id->node = (ExpressionASTNode*)literalE; id->Stype = S_NONTERMINAL; From ffc500ebba23ea3ae9f4cca31e786ff24e879214 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Tue, 14 Nov 2023 21:58:31 +0100 Subject: [PATCH 37/69] Fixed typo --- src/compiler/parser/ExpressionParser.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/compiler/parser/ExpressionParser.c b/src/compiler/parser/ExpressionParser.c index a682e78..34052d8 100644 --- a/src/compiler/parser/ExpressionParser.c +++ b/src/compiler/parser/ExpressionParser.c @@ -257,7 +257,7 @@ ParserResult __Parser_parseExpression(Parser *parser) { bottom->token = NULL; Array_push(stack, bottom); - bool reductionSucces; + bool reductionSuccess; int offset = 1; LexerResult current = Lexer_peekToken(parser->lexer, offset); LexerResult removeFromTokenStream; @@ -301,8 +301,8 @@ ParserResult __Parser_parseExpression(Parser *parser) { } break; case R: { - reductionSucces = Expr_Reduce(stack, currentToken); - if(!reductionSucces) { + reductionSuccess = Expr_Reduce(stack, currentToken); + if(!reductionSuccess) { return ParserError(String_fromFormat("Syntax error in expression"), Array_fromArgs(1, current.token)); } } break; @@ -319,8 +319,8 @@ ParserResult __Parser_parseExpression(Parser *parser) { } break; case X: { - reductionSucces = Expr_Reduce(stack, currentToken); - if(!reductionSucces) { + reductionSuccess = Expr_Reduce(stack, currentToken); + if(!reductionSuccess) { return ParserError(String_fromFormat("Syntax error in expression"), Array_fromArgs(1, current.token)); } } break; From 1f9f2bcf25b14a8f7e6e5e35809d92672d740c9e Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Tue, 14 Nov 2023 22:25:26 +0100 Subject: [PATCH 38/69] Added/fixed include guards --- include/compiler/Result.h | 4 ++-- include/compiler/parser/Parser.h | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/compiler/Result.h b/include/compiler/Result.h index cb35684..b279af2 100644 --- a/include/compiler/Result.h +++ b/include/compiler/Result.h @@ -2,8 +2,8 @@ #include "internal/String.h" #include "internal/Array.h" -#ifndef PARSER_H -#define PARSER_H +#ifndef RESULT_H +#define RESULT_H enum Severity { SEVERITY_NONE = 0, diff --git a/include/compiler/parser/Parser.h b/include/compiler/parser/Parser.h index 3515353..396aa6b 100644 --- a/include/compiler/parser/Parser.h +++ b/include/compiler/parser/Parser.h @@ -4,6 +4,9 @@ #include "compiler/lexer/Lexer.h" #include "compiler/lexer/Token.h" +#ifndef PARSER_H +#define PARSER_H + // TODO: Symbol table management typedef struct Parser { Lexer *lexer; @@ -14,3 +17,5 @@ void Parser_constructor(Parser *parser, Lexer *lexer); void Parser_destructor(Parser *parser); void Parser_setLexer(Parser *parser, Lexer *lexer); ParserResult Parser_parse(Parser *parser); + +#endif From 5cdf62eac91c451b347b44db642c5213346d6665 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Tue, 14 Nov 2023 22:25:43 +0100 Subject: [PATCH 39/69] Sorted includes --- src/compiler/parser/ExpressionParser.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/compiler/parser/ExpressionParser.c b/src/compiler/parser/ExpressionParser.c index 34052d8..b6fe747 100644 --- a/src/compiler/parser/ExpressionParser.c +++ b/src/compiler/parser/ExpressionParser.c @@ -1,13 +1,13 @@ -#include "compiler/parser/Parser.h" +#include "compiler/parser/ExpressionParser.h" #include #include "assertf.h" // #include "compiler/lexer/Lexer.h" -#include "compiler/parser/ExpressionParser.h" -#include "compiler/lexer/Token.h" -#include "internal/Array.h" #include "allocator/MemoryAllocator.h" +#include "internal/Array.h" +#include "compiler/lexer/Token.h" +#include "compiler/parser/Parser.h" #include "compiler/parser/ASTNodes.h" #define TABLE_SIZE 9 From d11c5956e7b031e6b32ef33514d7db752870871c Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Tue, 14 Nov 2023 22:27:28 +0100 Subject: [PATCH 40/69] Added helper macros for testing parser --- test/compiler/parser/parser_assertions.h | 41 ++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 test/compiler/parser/parser_assertions.h diff --git a/test/compiler/parser/parser_assertions.h b/test/compiler/parser/parser_assertions.h new file mode 100644 index 0000000..d15eddd --- /dev/null +++ b/test/compiler/parser/parser_assertions.h @@ -0,0 +1,41 @@ +#include "compiler/parser/Parser.h" +#include "unit.h" + +#ifndef PARSER_ASSERTIONS_H +#define PARSER_ASSERTIONS_H + +/** + * Validates program body and looks for a single statemtnt of a given type. + * This creates `_program`, `_block`, `_statements` and `statement` variables to use. + */ +#define EXPECT_STATEMENT(node, type) EXPECT_NOT_NULL(node); \ + \ + ProgramASTNode *_program = (ProgramASTNode*)node; \ + EXPECT_NOT_NULL(_program->block); \ + \ + BlockASTNode *_block = _program->block; \ + EXPECT_NOT_NULL(_block->statements); \ + \ + Array *_statements = _block->statements; \ + EXPECT_EQUAL_INT(_statements->size, 1); \ + \ + StatementASTNode *statement = Array_get(_statements, 0); \ + EXPECT_NOT_NULL(statement); \ + EXPECT_TRUE(statement->_type == type); + +/** + * Validates program body and looks for a statement list of a given size. + * This creates `_program`, `_block` and `statements` variables to use. + */ +#define EXPECT_STATEMENTS(node, count) EXPECT_NOT_NULL(node); \ + \ + ProgramASTNode *_program = (ProgramASTNode*)node; \ + EXPECT_NOT_NULL(_program->block); \ + \ + BlockASTNode *_block = _program->block; \ + EXPECT_NOT_NULL(_block->statements); \ + \ + Array *statements = _block->statements; \ + EXPECT_EQUAL_INT(statementsArr->size, count); + +#endif From 1b55518c6555a75de99c36bf14240a652d062fb8 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Tue, 14 Nov 2023 22:27:46 +0100 Subject: [PATCH 41/69] Finished example parser test --- test/compiler/parser/Parser.test.c | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/compiler/parser/Parser.test.c b/test/compiler/parser/Parser.test.c index df67605..2275664 100644 --- a/test/compiler/parser/Parser.test.c +++ b/test/compiler/parser/Parser.test.c @@ -1,8 +1,11 @@ #include #include "unit.h" +#include "parser_assertions.h" + #include "compiler/lexer/Lexer.h" #include "compiler/parser/Parser.h" +#include "compiler/parser/ASTNodes.h" DESCRIBE(variable_declaration, "Variable declaration parsing") { @@ -19,5 +22,33 @@ DESCRIBE(variable_declaration, "Variable declaration parsing") { result = Parser_parse(&parser); EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_VARIABLE_DECLARATION); + + VariableDeclarationASTNode *declaration = (VariableDeclarationASTNode*)statement; + EXPECT_TRUE(declaration->isConstant); + EXPECT_NOT_NULL(declaration->declaratorList); + + VariableDeclarationListASTNode *list = declaration->declaratorList; + EXPECT_NOT_NULL(list->declarators); + + Array *arr = list->declarators; + EXPECT_EQUAL_INT(arr->size, 1); + + VariableDeclaratorASTNode *declarator = Array_get(arr, 0); + EXPECT_NOT_NULL(declarator); + + PatternASTNode *pattern = declarator->pattern; + EXPECT_NOT_NULL(pattern); + EXPECT_NULL(pattern->type); + + IdentifierASTNode *id = pattern->id; + EXPECT_NOT_NULL(id); + EXPECT_TRUE(String_equals(id->name, "a")); + + LiteralExpressionASTNode *initializer = (LiteralExpressionASTNode*)declarator->initializer; + EXPECT_NOT_NULL(initializer); + EXPECT_TRUE(initializer->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(initializer->type == LITERAL_INTEGER); + EXPECT_EQUAL_INT(initializer->value.integer, 7); } TEST_END(); } From 7e062020becbddacc9d28d78a78958c4551c270c Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Tue, 14 Nov 2023 23:40:06 +0100 Subject: [PATCH 42/69] Fixed `__Parser_parseCondition` consuming `(` without actually making sure --- src/compiler/parser/Parser.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index bf4eeee..2bb7478 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -456,13 +456,21 @@ ParserResult __Parser_parseCondition(Parser *parser) { ExpressionASTNode *expression = NULL; OptionalBindingConditionASTNode *bindingCondition = NULL; - // consume '(' - LexerResult result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); - + // consume '(' optionally + bool hasOptionalParen = false; LexerResult peek = Lexer_peekToken(parser->lexer, 1); if(!peek.success) return LexerToParserError(peek); + if(peek.token->kind == TOKEN_LEFT_PAREN) { + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + hasOptionalParen = true; + } + + peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + if(peek.token->kind == TOKEN_LET || peek.token->kind == TOKEN_VAR) { ParserResult bindingConditionResult = __Parser_parseOptionalBindingCondition(parser); if(!bindingConditionResult.success) return bindingConditionResult; @@ -474,10 +482,16 @@ ParserResult __Parser_parseCondition(Parser *parser) { expression = (ExpressionASTNode*)expressionResult.node; } - // consume ')' - result = Lexer_nextToken(parser->lexer); + // consume ')' if we consumed '(' + LexerResult result = Lexer_nextToken(parser->lexer); if(!result.success) return LexerToParserError(result); + if(hasOptionalParen && result.token->kind != TOKEN_RIGHT_PAREN) { + return ParserError( + String_fromFormat("expected ')' in condition"), + Array_fromArgs(1, result.token)); + } + ConditionASTNode *condition = new_ConditionASTNode(expression, bindingCondition); return ParserSuccess(condition); From b831cdcc728eb476a35860bdc3391c4f1674b78e Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Tue, 14 Nov 2023 23:49:31 +0100 Subject: [PATCH 43/69] Fixed `if(let a)` being a valid statement --- src/compiler/parser/Parser.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index 2bb7478..7e14350 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -472,6 +472,12 @@ ParserResult __Parser_parseCondition(Parser *parser) { if(!peek.success) return LexerToParserError(peek); if(peek.token->kind == TOKEN_LET || peek.token->kind == TOKEN_VAR) { + if(hasOptionalParen) { + return ParserError( + String_fromFormat("cannot use optional binding in condition with parentheses"), + Array_fromArgs(1, peek.token)); + } + ParserResult bindingConditionResult = __Parser_parseOptionalBindingCondition(parser); if(!bindingConditionResult.success) return bindingConditionResult; bindingCondition = (OptionalBindingConditionASTNode*)bindingConditionResult.node; From 819e775139e7b295cf3385b4cf988c06891f246c Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Wed, 15 Nov 2023 11:32:41 +0100 Subject: [PATCH 44/69] Fixed `LexerToParserError` improperly propagating result type and severity --- include/compiler/parser/ParserResult.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/compiler/parser/ParserResult.h b/include/compiler/parser/ParserResult.h index 845bcfc..564eb86 100644 --- a/include/compiler/parser/ParserResult.h +++ b/include/compiler/parser/ParserResult.h @@ -36,6 +36,6 @@ ParserResult ParserResult_construct( #define ParserSuccess(node) ParserResult_construct(RESULT_SUCCESS, SEVERITY_NONE, NULL, NULL, (ASTNode*)node) #define ParserNoMatch() ParserResult_construct(RESULT_NO_MATCH, SEVERITY_NONE, NULL, NULL, NULL) #define ParserError(message, markers) ParserResult_construct(RESULT_ERROR_STATIC_SYNTACTIC_ANALYSIS, SEVERITY_ERROR, message, markers, NULL) -#define LexerToParserError(lexerResult) ParserResult_construct(RESULT_ERROR_STATIC_SYNTACTIC_ANALYSIS, SEVERITY_ERROR, lexerResult.message, lexerResult.markers, NULL) +#define LexerToParserError(lexerResult) ParserResult_construct(lexerResult.type, lexerResult.severity, lexerResult.message, lexerResult.markers, NULL) #endif From 1603e0f4255c474323a3e5c383b7c6f24443815f Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Wed, 15 Nov 2023 11:39:59 +0100 Subject: [PATCH 45/69] Added and renamed some of the `ResultType`s --- include/compiler/Result.h | 12 +++++++----- include/compiler/lexer/LexerResult.h | 2 +- include/compiler/parser/ParserResult.h | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/compiler/Result.h b/include/compiler/Result.h index b279af2..cee73bc 100644 --- a/include/compiler/Result.h +++ b/include/compiler/Result.h @@ -34,11 +34,13 @@ enum ResultType { RESULT_NO_MATCH = -2, RESULT_ASSERTION = -1, RESULT_SUCCESS = 0, - RESULT_ERROR_STATIC_LEXICAL_ANALYSIS = 1, - RESULT_ERROR_STATIC_SYNTACTIC_ANALYSIS = 2, - // TODO: Add more error types - RESULT_ERROR_SEMANTIC_FUNCTION = 3, - RESULT_ERROR_RUNTIME_UNDEFINED_VARIABLE = 5, + RESULT_ERROR_LEXICAL_ANALYSIS = 1, + RESULT_ERROR_SYNTACTIC_ANALYSIS = 2, + RESULT_ERROR_SEMANTIC_FUNCTION_DEFINITION = 3, + RESULT_ERROR_SEMANTIC_INVALID_FUNCTION_CALL = 4, + RESULT_ERROR_SEMANTIC_UNDEFINED_VARIABLE = 5, + RESULT_ERROR_SEMANTIC_INVALID_RETURN = 6, + RESULT_ERROR_SEMANTIC_INVALID_TYPE = 7, RESULT_ERROR_SEMANTIC_OTHER = 8, RESULT_ERROR_INTERNAL = 99 }; diff --git a/include/compiler/lexer/LexerResult.h b/include/compiler/lexer/LexerResult.h index 8a0a301..36539da 100644 --- a/include/compiler/lexer/LexerResult.h +++ b/include/compiler/lexer/LexerResult.h @@ -35,6 +35,6 @@ LexerResult LexerResult_construct( #define LexerSuccess() LexerResult_construct(RESULT_SUCCESS, SEVERITY_NONE, NULL, NULL, NULL) #define LexerNoMatch() LexerResult_construct(RESULT_NO_MATCH, SEVERITY_NONE, NULL, NULL, NULL) -#define LexerError(message, markers) LexerResult_construct(RESULT_ERROR_STATIC_LEXICAL_ANALYSIS, SEVERITY_ERROR, message, markers, NULL) +#define LexerError(message, markers) LexerResult_construct(RESULT_ERROR_LEXICAL_ANALYSIS, SEVERITY_ERROR, message, markers, NULL) #endif diff --git a/include/compiler/parser/ParserResult.h b/include/compiler/parser/ParserResult.h index 564eb86..4502d90 100644 --- a/include/compiler/parser/ParserResult.h +++ b/include/compiler/parser/ParserResult.h @@ -35,7 +35,7 @@ ParserResult ParserResult_construct( #define ParserSuccess(node) ParserResult_construct(RESULT_SUCCESS, SEVERITY_NONE, NULL, NULL, (ASTNode*)node) #define ParserNoMatch() ParserResult_construct(RESULT_NO_MATCH, SEVERITY_NONE, NULL, NULL, NULL) -#define ParserError(message, markers) ParserResult_construct(RESULT_ERROR_STATIC_SYNTACTIC_ANALYSIS, SEVERITY_ERROR, message, markers, NULL) +#define ParserError(message, markers) ParserResult_construct(RESULT_ERROR_SYNTACTIC_ANALYSIS, SEVERITY_ERROR, message, markers, NULL) #define LexerToParserError(lexerResult) ParserResult_construct(lexerResult.type, lexerResult.severity, lexerResult.message, lexerResult.markers, NULL) #endif From 7088827e77c1fe3a742ef430e86edfd78c655764 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Wed, 15 Nov 2023 17:45:57 +0100 Subject: [PATCH 46/69] Add variable declaration tests --- test/compiler/parser/Parser.test.c | 133 ++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/test/compiler/parser/Parser.test.c b/test/compiler/parser/Parser.test.c index 2275664..14c6c7a 100644 --- a/test/compiler/parser/Parser.test.c +++ b/test/compiler/parser/Parser.test.c @@ -17,7 +17,7 @@ DESCRIBE(variable_declaration, "Variable declaration parsing") { ParserResult result; - TEST_BEGIN("Example use of parser") { + TEST_BEGIN("Constant without type annotation") { Lexer_setSource(&lexer, "let a = 7"); result = Parser_parse(&parser); @@ -51,4 +51,135 @@ DESCRIBE(variable_declaration, "Variable declaration parsing") { EXPECT_TRUE(initializer->type == LITERAL_INTEGER); EXPECT_EQUAL_INT(initializer->value.integer, 7); } TEST_END(); + + + TEST_BEGIN("Constant with type annotation") { + Lexer_setSource(&lexer, "let hello_string_variable: String = \"hello\""); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_VARIABLE_DECLARATION); + + VariableDeclarationASTNode *declaration = (VariableDeclarationASTNode*)statement; + EXPECT_TRUE(declaration->isConstant); + EXPECT_NOT_NULL(declaration->declaratorList); + + VariableDeclarationListASTNode *list = declaration->declaratorList; + EXPECT_NOT_NULL(list->declarators); + + Array *arr = list->declarators; + EXPECT_EQUAL_INT(arr->size, 1); + + VariableDeclaratorASTNode *declarator = Array_get(arr, 0); + EXPECT_NOT_NULL(declarator); + + PatternASTNode *pattern = declarator->pattern; + EXPECT_NOT_NULL(pattern); + EXPECT_NOT_NULL(pattern->type); + EXPECT_TRUE(pattern->_type == NODE_PATTERN); + EXPECT_TRUE(pattern->type->_type == NODE_TYPE_REFERENCE); + EXPECT_TRUE(String_equals(pattern->type->id->name,"String")); + + IdentifierASTNode *id = pattern->id; + EXPECT_NOT_NULL(id); + EXPECT_TRUE(String_equals(id->name, "hello_string_variable")); + + LiteralExpressionASTNode *initializer = (LiteralExpressionASTNode*)declarator->initializer; + EXPECT_NOT_NULL(initializer); + EXPECT_TRUE(initializer->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(initializer->type == LITERAL_STRING); + EXPECT_TRUE(String_equals(initializer->value.string, "hello")); + + } TEST_END(); + + TEST_BEGIN("More variables declaration") { + Lexer_setSource(&lexer, "var a = \"hello\", b: Int = 20, c = 10.12"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_VARIABLE_DECLARATION); + + VariableDeclarationASTNode *declaration = (VariableDeclarationASTNode*)statement; + EXPECT_FALSE(declaration->isConstant); + EXPECT_NOT_NULL(declaration->declaratorList); + + VariableDeclarationListASTNode *list = declaration->declaratorList; + EXPECT_NOT_NULL(list->declarators); + + Array *arr = list->declarators; + EXPECT_EQUAL_INT(arr->size, 3); + + // first declarator a = "hello" + VariableDeclaratorASTNode *declarator = Array_get(arr, 0); + EXPECT_NOT_NULL(declarator); + + PatternASTNode *pattern = declarator->pattern; + EXPECT_NOT_NULL(pattern); + EXPECT_NULL(pattern->type); + + IdentifierASTNode *id = pattern->id; + EXPECT_NOT_NULL(id); + EXPECT_TRUE(String_equals(id->name, "a")); + + LiteralExpressionASTNode *initializer = (LiteralExpressionASTNode*)declarator->initializer; + EXPECT_NOT_NULL(initializer); + EXPECT_TRUE(initializer->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(initializer->type == LITERAL_STRING); + EXPECT_TRUE(String_equals(initializer->value.string, "hello")); + + // second declarator b = 20 + declarator = Array_get(arr, 1); + EXPECT_NOT_NULL(declarator); + + pattern = declarator->pattern; + EXPECT_NOT_NULL(pattern); + EXPECT_NOT_NULL(pattern->type); + EXPECT_TRUE(pattern->_type == NODE_PATTERN); + EXPECT_TRUE(pattern->type->_type == NODE_TYPE_REFERENCE); + EXPECT_TRUE(String_equals(pattern->type->id->name,"Int")); + + id = pattern->id; + EXPECT_NOT_NULL(id); + EXPECT_TRUE(String_equals(id->name, "b")); + + initializer = (LiteralExpressionASTNode*)declarator->initializer; + EXPECT_NOT_NULL(initializer); + EXPECT_TRUE(initializer->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(initializer->type == LITERAL_INTEGER); + EXPECT_EQUAL_INT(initializer->value.integer, 20); + + // third declarator c = 10.12 + declarator = Array_get(arr, 2); + EXPECT_NOT_NULL(declarator); + + pattern = declarator->pattern; + EXPECT_NOT_NULL(pattern); + EXPECT_NULL(pattern->type); + + id = pattern->id; + EXPECT_NOT_NULL(id); + EXPECT_TRUE(String_equals(id->name, "c")); + + initializer = (LiteralExpressionASTNode*)declarator->initializer; + EXPECT_NOT_NULL(initializer); + EXPECT_TRUE(initializer->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(initializer->type == LITERAL_FLOATING); + EXPECT_EQUAL_FLOAT(initializer->value.floating, 10.12); + + } TEST_END(); + + TEST_BEGIN("Missing colon") { + Lexer_setSource(&lexer, "let hello_string_variable String = \"hello\""); + result = Parser_parse(&parser); + + EXPECT_FALSE(result.success); + EXPECT_NULL(result.node); + + EXPECT_TRUE(result.type == RESULT_ERROR_SYNTACTIC_ANALYSIS); + EXPECT_TRUE(result.severity == SEVERITY_ERROR); + // prbbly later add message check also + + } TEST_END(); + +} } From ae9235ded74fc7f34c868f2cff85b958d0133b37 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Wed, 15 Nov 2023 17:46:49 +0100 Subject: [PATCH 47/69] Refactor variable declaration tests --- test/compiler/parser/Parser.test.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/test/compiler/parser/Parser.test.c b/test/compiler/parser/Parser.test.c index 14c6c7a..15723e2 100644 --- a/test/compiler/parser/Parser.test.c +++ b/test/compiler/parser/Parser.test.c @@ -54,7 +54,7 @@ DESCRIBE(variable_declaration, "Variable declaration parsing") { TEST_BEGIN("Constant with type annotation") { - Lexer_setSource(&lexer, "let hello_string_variable: String = \"hello\""); + Lexer_setSource(&lexer, "let hello_string_variable: String = \"hello\""); result = Parser_parse(&parser); EXPECT_TRUE(result.success); @@ -78,7 +78,7 @@ DESCRIBE(variable_declaration, "Variable declaration parsing") { EXPECT_NOT_NULL(pattern->type); EXPECT_TRUE(pattern->_type == NODE_PATTERN); EXPECT_TRUE(pattern->type->_type == NODE_TYPE_REFERENCE); - EXPECT_TRUE(String_equals(pattern->type->id->name,"String")); + EXPECT_TRUE(String_equals(pattern->type->id->name, "String")); IdentifierASTNode *id = pattern->id; EXPECT_NOT_NULL(id); @@ -93,7 +93,7 @@ DESCRIBE(variable_declaration, "Variable declaration parsing") { } TEST_END(); TEST_BEGIN("More variables declaration") { - Lexer_setSource(&lexer, "var a = \"hello\", b: Int = 20, c = 10.12"); + Lexer_setSource(&lexer, "var a = \"hello\", b: Int = 20, c = 10.12"); result = Parser_parse(&parser); EXPECT_TRUE(result.success); @@ -109,7 +109,7 @@ DESCRIBE(variable_declaration, "Variable declaration parsing") { Array *arr = list->declarators; EXPECT_EQUAL_INT(arr->size, 3); - // first declarator a = "hello" + // first declarator a = "hello" VariableDeclaratorASTNode *declarator = Array_get(arr, 0); EXPECT_NOT_NULL(declarator); @@ -127,8 +127,8 @@ DESCRIBE(variable_declaration, "Variable declaration parsing") { EXPECT_TRUE(initializer->type == LITERAL_STRING); EXPECT_TRUE(String_equals(initializer->value.string, "hello")); - // second declarator b = 20 - declarator = Array_get(arr, 1); + // second declarator b = 20 + declarator = Array_get(arr, 1); EXPECT_NOT_NULL(declarator); pattern = declarator->pattern; @@ -136,7 +136,7 @@ DESCRIBE(variable_declaration, "Variable declaration parsing") { EXPECT_NOT_NULL(pattern->type); EXPECT_TRUE(pattern->_type == NODE_PATTERN); EXPECT_TRUE(pattern->type->_type == NODE_TYPE_REFERENCE); - EXPECT_TRUE(String_equals(pattern->type->id->name,"Int")); + EXPECT_TRUE(String_equals(pattern->type->id->name, "Int")); id = pattern->id; EXPECT_NOT_NULL(id); @@ -148,8 +148,8 @@ DESCRIBE(variable_declaration, "Variable declaration parsing") { EXPECT_TRUE(initializer->type == LITERAL_INTEGER); EXPECT_EQUAL_INT(initializer->value.integer, 20); - // third declarator c = 10.12 - declarator = Array_get(arr, 2); + // third declarator c = 10.12 + declarator = Array_get(arr, 2); EXPECT_NOT_NULL(declarator); pattern = declarator->pattern; @@ -169,15 +169,15 @@ DESCRIBE(variable_declaration, "Variable declaration parsing") { } TEST_END(); TEST_BEGIN("Missing colon") { - Lexer_setSource(&lexer, "let hello_string_variable String = \"hello\""); + Lexer_setSource(&lexer, "let hello_string_variable String = \"hello\""); result = Parser_parse(&parser); EXPECT_FALSE(result.success); EXPECT_NULL(result.node); - EXPECT_TRUE(result.type == RESULT_ERROR_SYNTACTIC_ANALYSIS); - EXPECT_TRUE(result.severity == SEVERITY_ERROR); - // prbbly later add message check also + EXPECT_TRUE(result.type == RESULT_ERROR_SYNTACTIC_ANALYSIS); + EXPECT_TRUE(result.severity == SEVERITY_ERROR); + // prbbly later add message check also } TEST_END(); From 2ff690f3d9a3de704faaeee9101ccd093585982a Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Wed, 15 Nov 2023 17:46:59 +0100 Subject: [PATCH 48/69] Add function declaration tests --- test/compiler/parser/Parser.test.c | 303 +++++++++++++++++++++++++++++ 1 file changed, 303 insertions(+) diff --git a/test/compiler/parser/Parser.test.c b/test/compiler/parser/Parser.test.c index 15723e2..8eb82a7 100644 --- a/test/compiler/parser/Parser.test.c +++ b/test/compiler/parser/Parser.test.c @@ -182,4 +182,307 @@ DESCRIBE(variable_declaration, "Variable declaration parsing") { } TEST_END(); } + +DESCRIBE(function_declaration, "Function declaration parsing") { + Lexer lexer; + Lexer_constructor(&lexer); + + Parser parser; + Parser_constructor(&parser, &lexer); + + ParserResult result; + + TEST_BEGIN("No parameters empty body") { + Lexer_setSource(&lexer, "func empty_function(){}"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_FUNCTION_DECLARATION); + + FunctionDeclarationASTNode *declaration = (FunctionDeclarationASTNode*)statement; + + IdentifierASTNode *id = declaration->id; + EXPECT_NOT_NULL(id); + EXPECT_TRUE(String_equals(id->name, "empty_function")); + + // return type + EXPECT_NULL(declaration->returnType); + + // parameters + ParameterListASTNode *list = declaration->parameterList; + EXPECT_NOT_NULL(list->parameters); + + Array *arr = list->parameters; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + // body + BlockASTNode *body = declaration->body; + EXPECT_NOT_NULL(body->statements); + arr = body->statements; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + } TEST_END(); + + + TEST_BEGIN("With simple parameters empty body") { + Lexer_setSource(&lexer, "func parameters_function(a: Int, b: Int = 10, c: String = \"hello\"){}"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_FUNCTION_DECLARATION); + + FunctionDeclarationASTNode *declaration = (FunctionDeclarationASTNode*)statement; + + IdentifierASTNode *id = declaration->id; + EXPECT_NOT_NULL(id); + EXPECT_TRUE(String_equals(id->name, "parameters_function")); + + // return type + EXPECT_NULL(declaration->returnType); + + // parameters + ParameterListASTNode *list = declaration->parameterList; + EXPECT_NOT_NULL(list->parameters); + + Array *arr = list->parameters; + EXPECT_NOT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 3); + + // first parameter a: Int + ParameterASTNode *parameter = Array_get(arr, 0); + EXPECT_NOT_NULL(parameter); + EXPECT_FALSE(parameter->isLabeless); + + EXPECT_NULL(parameter->externalId); + EXPECT_TRUE(String_equals(parameter->internalId->name, "a")); + + EXPECT_NOT_NULL(parameter->type); + EXPECT_TRUE(parameter->type->_type == NODE_TYPE_REFERENCE); + EXPECT_TRUE(String_equals(parameter->type->id->name, "Int")); + + EXPECT_NULL(parameter->initializer); + + // second parameter b: Int = 10 + parameter = Array_get(arr, 1); + EXPECT_NOT_NULL(parameter); + EXPECT_FALSE(parameter->isLabeless); + + EXPECT_NULL(parameter->externalId); + EXPECT_TRUE(String_equals(parameter->internalId->name, "b")); + + EXPECT_NOT_NULL(parameter->type); + EXPECT_TRUE(parameter->type->_type == NODE_TYPE_REFERENCE); + EXPECT_TRUE(String_equals(parameter->type->id->name, "Int")); + + EXPECT_NOT_NULL(parameter->initializer); + + LiteralExpressionASTNode *initializer = (LiteralExpressionASTNode*)parameter->initializer; + EXPECT_NOT_NULL(initializer); + EXPECT_TRUE(initializer->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(initializer->type == LITERAL_INTEGER); + EXPECT_EQUAL_INT(initializer->value.integer, 10); + + // third parameter c = "hello" + parameter = Array_get(arr, 2); + EXPECT_NOT_NULL(parameter); + EXPECT_FALSE(parameter->isLabeless); + + EXPECT_NULL(parameter->externalId); + EXPECT_TRUE(String_equals(parameter->internalId->name, "c")); + + EXPECT_NOT_NULL(parameter->type); + EXPECT_TRUE(parameter->type->_type == NODE_TYPE_REFERENCE); + EXPECT_TRUE(String_equals(parameter->type->id->name, "String")); + + EXPECT_NOT_NULL(parameter->initializer); + + initializer = (LiteralExpressionASTNode*)parameter->initializer; + EXPECT_NOT_NULL(initializer); + EXPECT_TRUE(initializer->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(initializer->type == LITERAL_STRING); + EXPECT_TRUE(String_equals(initializer->value.string, "hello")); + + // body + BlockASTNode *body = declaration->body; + EXPECT_NOT_NULL(body->statements); + arr = body->statements; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + } TEST_END(); + + TEST_BEGIN("With advanced parameters empty body") { + Lexer_setSource(&lexer, "func parameters_function(a_external a_internal: Int, _ b_internal: Int = 100, _ _: Double = 12.3){}"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_FUNCTION_DECLARATION); + + FunctionDeclarationASTNode *declaration = (FunctionDeclarationASTNode*)statement; + + IdentifierASTNode *id = declaration->id; + EXPECT_NOT_NULL(id); + EXPECT_TRUE(String_equals(id->name, "parameters_function")); + + // return type + EXPECT_NULL(declaration->returnType); + + // parameters + ParameterListASTNode *list = declaration->parameterList; + EXPECT_NOT_NULL(list->parameters); + + Array *arr = list->parameters; + EXPECT_NOT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 3); + + // first parameter a_external a_internal: Int + ParameterASTNode *parameter = Array_get(arr, 0); + EXPECT_NOT_NULL(parameter); + EXPECT_FALSE(parameter->isLabeless); + + EXPECT_TRUE(String_equals(parameter->externalId->name, "a_external")); + EXPECT_TRUE(String_equals(parameter->internalId->name, "a_internal")); + + EXPECT_NOT_NULL(parameter->type); + EXPECT_TRUE(parameter->type->_type == NODE_TYPE_REFERENCE); + EXPECT_TRUE(String_equals(parameter->type->id->name, "Int")); + + EXPECT_NULL(parameter->initializer); + + // second parameter _ b_internal: Int = 100 + parameter = Array_get(arr, 1); + EXPECT_NOT_NULL(parameter); + EXPECT_TRUE(parameter->isLabeless); + + EXPECT_TRUE(String_equals(parameter->externalId->name, "_")); + EXPECT_TRUE(String_equals(parameter->internalId->name, "b_internal")); + + EXPECT_NOT_NULL(parameter->type); + EXPECT_TRUE(parameter->type->_type == NODE_TYPE_REFERENCE); + EXPECT_TRUE(String_equals(parameter->type->id->name, "Int")); + + EXPECT_NOT_NULL(parameter->initializer); + + LiteralExpressionASTNode *initializer = (LiteralExpressionASTNode*)parameter->initializer; + EXPECT_NOT_NULL(initializer); + EXPECT_TRUE(initializer->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(initializer->type == LITERAL_INTEGER); + EXPECT_EQUAL_INT(initializer->value.integer, 100); + + // third parameter _ _: Double = 12.3 + parameter = Array_get(arr, 2); + EXPECT_NOT_NULL(parameter); + EXPECT_TRUE(parameter->isLabeless); + + EXPECT_TRUE(String_equals(parameter->externalId->name, "_")); + EXPECT_TRUE(String_equals(parameter->internalId->name, "_")); + + EXPECT_NOT_NULL(parameter->type); + EXPECT_TRUE(parameter->type->_type == NODE_TYPE_REFERENCE); + EXPECT_TRUE(String_equals(parameter->type->id->name, "Double")); + + EXPECT_NOT_NULL(parameter->initializer); + + initializer = (LiteralExpressionASTNode*)parameter->initializer; + EXPECT_NOT_NULL(initializer); + EXPECT_TRUE(initializer->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(initializer->type == LITERAL_FLOATING); + EXPECT_EQUAL_INT(initializer->value.floating, 12.3); + + // body + BlockASTNode *body = declaration->body; + EXPECT_NOT_NULL(body->statements); + arr = body->statements; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + } TEST_END(); + + TEST_BEGIN("With parameters and simple block") { + Lexer_setSource(&lexer, "func isGreater(_ a:Int, _ b:Int) -> Bool { return a > b}"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_FUNCTION_DECLARATION); + + FunctionDeclarationASTNode *declaration = (FunctionDeclarationASTNode*)statement; + + IdentifierASTNode *id = declaration->id; + EXPECT_NOT_NULL(id); + EXPECT_TRUE(String_equals(id->name, "isGreater")); + + // return type + EXPECT_NOT_NULL(declaration->returnType); + EXPECT_TRUE(declaration->returnType->_type == NODE_TYPE_REFERENCE); + EXPECT_TRUE(String_equals(declaration->returnType->id->name, "Bool")); + + // parameters + ParameterListASTNode *list = declaration->parameterList; + EXPECT_NOT_NULL(list->parameters); + + Array *arr = list->parameters; + EXPECT_NOT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 2); + + // first parameter a: Int + ParameterASTNode *parameter = Array_get(arr, 0); + EXPECT_NOT_NULL(parameter); + EXPECT_TRUE(parameter->isLabeless); + + EXPECT_TRUE(String_equals(parameter->externalId->name, "_")); + EXPECT_TRUE(String_equals(parameter->internalId->name, "a")); + + EXPECT_NOT_NULL(parameter->type); + EXPECT_TRUE(parameter->type->_type == NODE_TYPE_REFERENCE); + EXPECT_TRUE(String_equals(parameter->type->id->name, "Int")); + + EXPECT_NULL(parameter->initializer); + + // second parameter a: Int + parameter = Array_get(arr, 1); + EXPECT_NOT_NULL(parameter); + EXPECT_TRUE(parameter->isLabeless); + + EXPECT_TRUE(String_equals(parameter->externalId->name, "_")); + EXPECT_TRUE(String_equals(parameter->internalId->name, "b")); + + EXPECT_NOT_NULL(parameter->type); + EXPECT_TRUE(parameter->type->_type == NODE_TYPE_REFERENCE); + EXPECT_TRUE(String_equals(parameter->type->id->name, "Int")); + + EXPECT_NULL(parameter->initializer); + + // body + BlockASTNode *body = declaration->body; + EXPECT_NOT_NULL(body->statements); + + arr = body->statements; + EXPECT_NOT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 1); + + StatementASTNode *body_statement = Array_get(arr, 0); + EXPECT_TRUE(body_statement->_type == NODE_RETURN_STATEMENT); + ReturnStatementASTNode *return_statement = (ReturnStatementASTNode*)body_statement; + + BinaryExpressionASTNode *function_return = (BinaryExpressionASTNode*)return_statement->expression; + EXPECT_NOT_NULL(function_return); + EXPECT_TRUE(function_return->_type == NODE_BINARY_EXPRESSION); + + EXPECT_TRUE(function_return->left); + + IdentifierASTNode *left = (IdentifierASTNode*)function_return->left; + EXPECT_NOT_NULL(left); + EXPECT_TRUE(left->_type == NODE_IDENTIFIER); + EXPECT_TRUE(String_equals(left->name, "a")); + + IdentifierASTNode *right = (IdentifierASTNode*)function_return->right; + EXPECT_NOT_NULL(right); + EXPECT_TRUE(right->_type == NODE_IDENTIFIER); + EXPECT_TRUE(String_equals(right->name, "b")); + + EXPECT_TRUE(function_return->operator == OPERATOR_GREATER); + + } TEST_END(); } From 5752cc1f62e28ef7b8cc71372ebf5a9911e17376 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Wed, 15 Nov 2023 17:47:23 +0100 Subject: [PATCH 49/69] Fix optional parameter initializer --- src/compiler/parser/Parser.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index 7e14350..55ad25e 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -81,7 +81,6 @@ ParserResult __Parser_parseProgram(Parser *parser) { ParserResult __Parser_parseBlock(Parser *parser, bool requireBraces) { assertf(parser != NULL); - // Check for left brace if(requireBraces) { LexerResult result = Lexer_nextToken(parser->lexer); @@ -221,8 +220,6 @@ ParserResult __Parser_parseTypeReference(Parser *parser) { } ParserResult __Parser_parseParameter(Parser *parser) { - // TODO: Add logic to output correct error messages - // TODO: Add expression parsing assertf(parser != NULL); bool isLabeless = false; @@ -272,7 +269,6 @@ ParserResult __Parser_parseParameter(Parser *parser) { } // check for Type - ParserResult typeResult = __Parser_parseTypeReference(parser); if(!typeResult.success) return typeResult; @@ -280,12 +276,14 @@ ParserResult __Parser_parseParameter(Parser *parser) { // check for initializer peek = Lexer_peekToken(parser->lexer, 1); if(!peek.success) return LexerToParserError(peek); - if(peek.token->kind == TOKEN_EQUAL) { - // TODO: Add expression parsing - // : Add constructor for ExpressionASTNode - // : Expression until , or ) - initializer = NULL; + // Skip the '=' token + LexerResult tmp = Lexer_nextToken(parser->lexer); + if(!tmp.success) return LexerToParserError(result); + + ParserResult initializerResult = __Parser_parseExpression(parser); + if(!initializerResult.success) return initializerResult; + initializer = (ExpressionASTNode*)initializerResult.node; } ParameterASTNode *paramNode = new_ParameterASTNode(paramLocalId, (TypeReferenceASTNode*)typeResult.node, initializer, paramExternalId, isLabeless); @@ -328,7 +326,7 @@ ParserResult __Parser_parseParameterList(Parser *parser) { } - // skip ')' + // consume ')' result = Lexer_nextToken(parser->lexer); if(!result.success) return LexerToParserError(result); @@ -625,7 +623,6 @@ ParserResult __Parser_parseReturnStatement(Parser *parser) { return ParserSuccess(returnStatement); } - ParserResult __Parser_parseVariableDeclarator(Parser *parser) { assertf(parser != NULL); From 3b354f61433040ae463fb5da42ee7f995d2b5fe8 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Wed, 15 Nov 2023 18:17:47 +0100 Subject: [PATCH 50/69] Added strings tokenizations + interpolated string support + tests --- include/compiler/lexer/Token.h | 1 + src/compiler/lexer/Lexer.c | 307 +++++++++++++++-- test/compiler/lexer/Lexer.test.c | 572 ++++++++++++++++++++++++++++++- 3 files changed, 855 insertions(+), 25 deletions(-) diff --git a/include/compiler/lexer/Token.h b/include/compiler/lexer/Token.h index 2c6bacb..e25e4ff 100644 --- a/include/compiler/lexer/Token.h +++ b/include/compiler/lexer/Token.h @@ -9,6 +9,7 @@ enum TokenType { TOKEN_EOF = 1, TOKEN_CONTROL, // Forgot what this is for :( TOKEN_MARKER, // Marker in the source code (for error messages) + TOKEN_STRING_INTERPOLATION_MARKER, TOKEN_LITERAL, TOKEN_IDENTIFIER, diff --git a/src/compiler/lexer/Lexer.c b/src/compiler/lexer/Lexer.c index a8016d0..094fde8 100644 --- a/src/compiler/lexer/Lexer.c +++ b/src/compiler/lexer/Lexer.c @@ -7,7 +7,7 @@ #include "inspector.h" #include "assertf.h" -char __Lexer_resolveEscapedChar(char ch); +bool __Lexer_resolveEscapedChar(char ch, char *out); LexerResult __Lexer_tokenizeWhitespace(Lexer *lexer); LexerResult __Lexer_tokenizeSpace(Lexer *lexer); @@ -357,6 +357,7 @@ LexerResult Lexer_tokenizeNextToken(Lexer *lexer) { return LexerSuccess(); } // Match string literals + // TODO: Allow only double-quoted string literals else if(ch == '"' || ch == '\'') { return __Lexer_tokenizeString(lexer); } @@ -496,24 +497,180 @@ LexerResult Lexer_tokenize(Lexer *lexer, char *source) { return LexerSuccess(); } -char __Lexer_resolveEscapedChar(char ch) { +LexerResult __Lexer_tokenizeUntilStringInterpolationTerminator(Lexer *lexer) { + if(!lexer) return LexerNoMatch(); + if(!lexer->currentChar) return LexerNoMatch(); + + // Keep track of the depth in case of nested parentheses + // 1 - the initial string interpolation parenthesis to match with the closing one + size_t depth = 1; + + // While there are tokens to process + LexerResult result = LexerSuccess(); + // while((result = Lexer_tokenizeNextToken(lexer)).token && result.token->type != TOKEN_EOF) { + // if(result.token->kind == TOKEN_LEFT_PAREN) { + // depth++; + // } else if(result.token->kind == TOKEN_RIGHT_PAREN) { + // depth--; + + // if(depth == 0) break; + // } + // } + + do { + result = Lexer_tokenizeNextToken(lexer); + if(!result.success) return result; + + // Get the token at the top of the token stream + Token *token = Array_get(lexer->tokens, -1); + + // If the token is an EOF, the interpolation is not terminated + if(token->type == TOKEN_EOF) return LexerError( + String_fromFormat("cannot find ')' to match opening '(' in string interpolation"), + Array_fromArgs( + 1, + Token_alloc( + TOKEN_MARKER, + TOKEN_CARET, + WHITESPACE_NONE, + TextRange_construct( + lexer->currentChar - 1, + lexer->currentChar, + lexer->line, + lexer->column + ), + (union TokenValue){0} + ) + ) + ); + + // Sort out the parentheses + // We must NOT consume the closing parentheses, because of potentionally consuming + // whitespace directly after the closing parenthesis which is part of the string, + // so instead we will just check for the current char + if(*lexer->currentChar == '(') { + depth++; + } else if(*lexer->currentChar == ')') { + depth--; + } + } while(depth != 0); + + // Consume the closing parenthesis + Lexer_advance(lexer); + + return LexerSuccess(); +} + +bool __Lexer_resolveEscapedChar(char ch, char *out) { switch(ch) { - case '0': return '\0'; - case 'a': return '\a'; - case 'b': return '\b'; - case 'f': return '\f'; - case 'n': return '\n'; - case 'r': return '\r'; - case 't': return '\t'; - case 'v': return '\v'; - case '\\': return '\\'; - case '\'': return '\''; - case '"': return '"'; - case 'd': return 'd'; - case 'x': return 'x'; - case 'u': return 'u'; - default: return '\0'; + case '0': *out = '\0'; break; + case 'n': *out = '\n'; break; + case 'r': *out = '\r'; break; + case 't': *out = '\t'; break; + case '\\': *out = '\\'; break; + case '\'': *out = '\''; break; + case '"': *out = '"'; break; + default: return false; } + + return true; +} + +LexerResult __Lexer_parseUnicodeEscapeSequence(Lexer *lexer, char *out) { + // Consume the opening brace + if(!Lexer_match(lexer, "{")) return LexerError( + String_fromFormat("expected hexadecimal code in braces after unicode escape"), + Array_fromArgs( + 1, + Token_alloc( + TOKEN_MARKER, + TOKEN_CARET, + WHITESPACE_NONE, + TextRange_construct( + lexer->currentChar, + lexer->currentChar + 1, + lexer->line, + lexer->column + ), + (union TokenValue){0} + ) + ) + ); + + // Parse the hexadecimal code + char *start = lexer->currentChar; + char *end = NULL; + long code = strtol(lexer->currentChar, &end, 16); + + // Check if the code is valid + if(!end || code < 0 || code > 0x10FFFF) return LexerError( + String_fromFormat("invalid unicode scalar '%d'", code), + Array_fromArgs( + 1, + Token_alloc( + TOKEN_MARKER, + TOKEN_CARET, + WHITESPACE_NONE, + TextRange_construct( + start, + end, + lexer->line, + lexer->column + ), + (union TokenValue){0} + ) + ) + ); + + // Check for length of the code + size_t length = end - lexer->currentChar; + if(length < 1 || length > 8) return LexerError( + String_fromFormat("\\u{...} escape sequence expects between 1 and 8 hex digits"), + Array_fromArgs( + 1, + Token_alloc( + TOKEN_MARKER, + TOKEN_CARET, + WHITESPACE_NONE, + TextRange_construct( + start, + end, + lexer->line, + lexer->column + ), + (union TokenValue){0} + ) + ) + ); + + // Consume the hexadecimal code + lexer->currentChar = end; + + // Check for the closing brace + if(!Lexer_match(lexer, "}")) return LexerError( + String_fromFormat("expected closing brace '}' after unicode escape"), + Array_fromArgs( + 1, + Token_alloc( + TOKEN_MARKER, + TOKEN_CARET, + WHITESPACE_NONE, + TextRange_construct( + lexer->currentChar, + lexer->currentChar + 1, + lexer->line, + lexer->column + ), + (union TokenValue){0} + ) + ) + ); + + // Write the code to the output + // NOTICE: This will only work for values in range 0-255 + *out = code; + + return LexerSuccess(); } // Swift multiline string literals @@ -563,7 +720,7 @@ LexerResult __Lexer_tokenizeString(Lexer *lexer) { // Match string while(ch != quote) { // Handle unterminated string literals - if(ch == '\0') return LexerError( + if(ch == '\0' || ch == '\n') return LexerError( String_fromFormat("unterminated string literal"), Array_fromArgs( 1, @@ -582,15 +739,119 @@ LexerResult __Lexer_tokenizeString(Lexer *lexer) { ) ); + // TODO: Handle multiline string literals + // This consumes two characters, so in case of `\`, both backslash // and the quote are consumed and therefore the loop will not terminate if(ch == '\\') { - char escaped = Lexer_advance(lexer); - // TODO: Add support for unicode and hex escapes according to the language specification - ch = __Lexer_resolveEscapedChar(escaped); - } + // Get the character to escape (consume the backslash) + char toEscape = Lexer_advance(lexer); + + // Pick the escaping strategy + if(toEscape == 'u') { + // TODO: Add support for unicode and hex escapes according to the language specification + // Consume the 'u' + Lexer_advance(lexer); + + char escaped = '\0'; + LexerResult res = __Lexer_parseUnicodeEscapeSequence(lexer, &escaped); + + if(!res.success) return res; + + String_appendChar(string, escaped); + lexer->currentChar--; // Go back one character + } else if(toEscape == '(') { + // TODO: Add escape sequences for interpolation + + // Finish the current string literal + { + TextRange range; + TextRange_constructor(&range, start, lexer->currentChar, lexer->line, lexer->column); + + fetch_next_whitespace(lexer); + + // Create a token + Token *token = Token_alloc(TOKEN_LITERAL, TOKEN_STRING, __wh_bit, range, (union TokenValue){.string = string}); + assertf(token != NULL); + + // Add the token to the array + Array_push(lexer->tokens, token); + } + + // Add string interpolation marker + { + TextRange range; + TextRange_constructor(&range, lexer->currentChar, lexer->currentChar, lexer->line, lexer->column); + + fetch_next_whitespace(lexer); + + // Create a token + Token *token = Token_alloc(TOKEN_STRING_INTERPOLATION_MARKER, TOKEN_DEFAULT, __wh_bit, range, (union TokenValue){0}); + assertf(token != NULL); + + // Add the token to the array + Array_push(lexer->tokens, token); + } + + // Tokenize the interpolated expression + { + // Consume the opening paren + Lexer_advance(lexer); + + LexerResult res = __Lexer_tokenizeUntilStringInterpolationTerminator(lexer); + if(!res.success) return res; + + lexer->currentChar--; // Go back one character + } + + // Add string interpolation marker + { + TextRange range; + TextRange_constructor(&range, lexer->currentChar, lexer->currentChar, lexer->line, lexer->column); + + fetch_next_whitespace(lexer); + + // Create a token + Token *token = Token_alloc(TOKEN_STRING_INTERPOLATION_MARKER, TOKEN_DEFAULT, __wh_bit, range, (union TokenValue){0}); + assertf(token != NULL); + + // Add the token to the array + Array_push(lexer->tokens, token); + } + + // Start a new string literal + { + start = lexer->currentChar + 1; // +1 to fix the text range + string = String_alloc(""); + } + } else { + char escaped = '\0'; + bool res = __Lexer_resolveEscapedChar(toEscape, &escaped); + + if(!res) return LexerError( + String_fromFormat("invalid escape sequence '\\%s' in literal", format_char(toEscape)), + Array_fromArgs( + 1, + Token_alloc( + TOKEN_MARKER, + TOKEN_CARET, + WHITESPACE_NONE, + TextRange_construct( + lexer->currentChar - 1, + lexer->currentChar, + lexer->line, + lexer->column + ), + (union TokenValue){0} + ) + ) + ); - String_appendChar(string, ch); + String_appendChar(string, escaped); + } + } else { + String_appendChar(string, ch); + } ch = Lexer_advance(lexer); } diff --git a/test/compiler/lexer/Lexer.test.c b/test/compiler/lexer/Lexer.test.c index b2d2032..b452bf2 100644 --- a/test/compiler/lexer/Lexer.test.c +++ b/test/compiler/lexer/Lexer.test.c @@ -1299,8 +1299,576 @@ DESCRIBE(string_tokenization, "String literals tokenization") { LexerResult result; Token *token; - (void)token; - (void)result; + TEST("Empty string", { + result = Lexer_tokenize(&lexer, "\"\""); + EXPECT_TRUE(result.success); + EXPECT_EQUAL_INT(lexer.tokens->size, 2); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "")); + EXPECT_EQUAL_INT(token->value.string->length, 0); + }) + + TEST("Single character string", { + result = Lexer_tokenize(&lexer, "\"a\""); + EXPECT_TRUE(result.success); + EXPECT_EQUAL_INT(lexer.tokens->size, 2); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "a")); + EXPECT_EQUAL_INT(token->value.string->length, 1); + }) + + TEST("Multicharacter string", { + result = Lexer_tokenize(&lexer, "\"abc\""); + EXPECT_TRUE(result.success); + EXPECT_EQUAL_INT(lexer.tokens->size, 2); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "abc")); + EXPECT_EQUAL_INT(token->value.string->length, 3); + }) +} + +DESCRIBE(string_invalid_tokeniz, "Invalid string literals tokenization") { + Lexer lexer; + Lexer_constructor(&lexer); + + LexerResult result; + + TEST("Unterminated string", { + result = Lexer_tokenize(&lexer, "\""); + EXPECT_FALSE(result.success); + }) + + TEST("Unescaped quote", { + result = Lexer_tokenize(&lexer, "\"Hello \" World\""); + EXPECT_FALSE(result.success); + }) +} + +DESCRIBE(string_quotes_escape, "String literals tokenization with escaped quotes") { + Lexer lexer; + Lexer_constructor(&lexer); + + LexerResult result; + Token *token; + + TEST("Single escaped double quote", { + result = Lexer_tokenize(&lexer, "\"\\\"\""); + EXPECT_TRUE(result.success); + EXPECT_EQUAL_INT(lexer.tokens->size, 2); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "\"")); + EXPECT_EQUAL_INT(token->value.string->length, 1); + }) + + TEST("Multiple escaped double quotes", { + result = Lexer_tokenize(&lexer, "\"\\\"\\\"\""); + EXPECT_TRUE(result.success); + EXPECT_EQUAL_INT(lexer.tokens->size, 2); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "\"\"")); + EXPECT_EQUAL_INT(token->value.string->length, 2); + }) + + TEST("Multiple escaped double quotes with text", { + result = Lexer_tokenize(&lexer, "\"pre \\\"in\\\" post\""); + EXPECT_TRUE(result.success); + EXPECT_EQUAL_INT(lexer.tokens->size, 2); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "pre \"in\" post")); + EXPECT_EQUAL_INT(token->value.string->length, 13); + }) +} + +DESCRIBE(string_backslash_escape, "String literals tokenization with escaped backslashes") { + Lexer lexer; + Lexer_constructor(&lexer); + + LexerResult result; + Token *token; + + TEST("Single escaped backslash", { + result = Lexer_tokenize(&lexer, "\"\\\\\""); + EXPECT_TRUE(result.success); + EXPECT_EQUAL_INT(lexer.tokens->size, 2); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "\\")); + EXPECT_EQUAL_INT(token->value.string->length, 1); + }) + + TEST("Multiple escaped backslashes", { + result = Lexer_tokenize(&lexer, "\"\\\\\\\\\""); + EXPECT_TRUE(result.success); + EXPECT_EQUAL_INT(lexer.tokens->size, 2); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "\\\\")); + EXPECT_EQUAL_INT(token->value.string->length, 2); + }) + + TEST("Multiple escaped backslashes with text", { + result = Lexer_tokenize(&lexer, "\"pre \\\\in\\\\ post\""); + EXPECT_TRUE(result.success); + EXPECT_EQUAL_INT(lexer.tokens->size, 2); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "pre \\in\\ post")); + EXPECT_EQUAL_INT(token->value.string->length, 13); + }) +} + +DESCRIBE(string_whitespace_escape, "String literals tokenization with escaped whitespace") { + Lexer lexer; + Lexer_constructor(&lexer); + + LexerResult result; + Token *token; + + TEST("Single line feed", { + result = Lexer_tokenize(&lexer, "\"\\n\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "\n")); + EXPECT_EQUAL_INT(token->value.string->length, 1); + }) + + TEST("Multiple line feeds", { + result = Lexer_tokenize(&lexer, "\"\\n\\n\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "\n\n")); + EXPECT_EQUAL_INT(token->value.string->length, 2); + }) + + + TEST("Single carriage return", { + result = Lexer_tokenize(&lexer, "\"\\r\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "\r")); + EXPECT_EQUAL_INT(token->value.string->length, 1); + }) + + TEST("Multiple carriage returns", { + result = Lexer_tokenize(&lexer, "\"\\r\\r\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "\r\r")); + EXPECT_EQUAL_INT(token->value.string->length, 2); + }) + + + TEST("Single tab", { + result = Lexer_tokenize(&lexer, "\"\\t\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "\t")); + EXPECT_EQUAL_INT(token->value.string->length, 1); + }) + + TEST("Multiple tabs", { + result = Lexer_tokenize(&lexer, "\"\\t\\t\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "\t\t")); + EXPECT_EQUAL_INT(token->value.string->length, 2); + }) +} + +DESCRIBE(string_unicode_escape, "String literals tokenization with escaped unicode sequences") { + Lexer lexer; + Lexer_constructor(&lexer); + + LexerResult result; + Token *token; + + TEST("Single unicode character", { + result = Lexer_tokenize(&lexer, "\"\\u{61}\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "a")); + EXPECT_EQUAL_INT(token->value.string->length, 1); + }) + + TEST("Single prefixed unicode character", { + result = Lexer_tokenize(&lexer, "\"\\u{0061}\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "a")); + EXPECT_EQUAL_INT(token->value.string->length, 1); + }) + + TEST("Multiple unicode characters", { + result = Lexer_tokenize(&lexer, "\"\\u{61}\\u{62}\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "ab")); + EXPECT_EQUAL_INT(token->value.string->length, 2); + }) + + TEST("Multiple unicode characters in text", { + result = Lexer_tokenize(&lexer, "\"pre \\u{61} in \\u{62} post\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "pre a in b post")); + EXPECT_EQUAL_INT(token->value.string->length, 15); + }) +} + +DESCRIBE(str_invalid_escape, "Invalid escape sequences in string literals tokenization") { + Lexer lexer; + Lexer_constructor(&lexer); + + LexerResult result; + + TEST("Invalid escape sequence", { + result = Lexer_tokenize(&lexer, "\"\\a\""); + EXPECT_FALSE(result.success); + }) + + TEST("Incomplete unicode sequence 1", { + result = Lexer_tokenize(&lexer, "\"\\u\""); + EXPECT_FALSE(result.success); + }) + + TEST("Incomplete unicode sequence 2", { + result = Lexer_tokenize(&lexer, "\"\\u{\""); + EXPECT_FALSE(result.success); + }) + + TEST("Incomplete unicode sequence 3", { + result = Lexer_tokenize(&lexer, "\"\\u{61\""); + EXPECT_FALSE(result.success); + }) + + TEST("Too short unicode sequence", { + result = Lexer_tokenize(&lexer, "\"\\u{}\""); + EXPECT_FALSE(result.success); + }) + + TEST("Too long unicode sequence", { + result = Lexer_tokenize(&lexer, "\"\\u{123456789}\""); + EXPECT_FALSE(result.success); + }) + + TEST("Out of the range unicode sequence", { + result = Lexer_tokenize(&lexer, "\"\\u{FFFFFFFF}\""); + EXPECT_FALSE(result.success); + }) + + TEST("Invalid characters in unicode sequence", { + result = Lexer_tokenize(&lexer, "\"\\u{FFXF}\""); + EXPECT_FALSE(result.success); + }) +} + +DESCRIBE(string_escape_sequences, "String literals tokenization with escaped sequences") { + Lexer lexer; + Lexer_constructor(&lexer); + + LexerResult result; + Token *token; + + TEST("Combination of all supported escape sequences", { + result = Lexer_tokenize(&lexer, "\"\\\"\\\\\\n\\r\\t\\u{61}\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "\"\\\n\r\ta")); + EXPECT_EQUAL_INT(token->value.string->length, 6); + }) +} + +DESCRIBE(string_interpolation, "Interpolated string literal tokenization") { + Lexer lexer; + Lexer_constructor(&lexer); + + LexerResult result; + Token *token; + + TEST("Simple interpolated string", { + result = Lexer_tokenize(&lexer, "\"Hello \\(name)!\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "Hello ")); + + token = (Token*)Array_get(lexer.tokens, 1); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 2); + EXPECT_TRUE(token->type == TOKEN_IDENTIFIER); + EXPECT_TRUE(String_equals(token->value.identifier, "name")); + + token = (Token*)Array_get(lexer.tokens, 3); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 4); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "!")); + }) + + TEST_BEGIN("Interpolated string starts with expression") { + result = Lexer_tokenize(&lexer, "\"\\(expr) post\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "")); + + token = (Token*)Array_get(lexer.tokens, 1); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 2); + EXPECT_TRUE(token->type == TOKEN_IDENTIFIER); + EXPECT_TRUE(String_equals(token->value.identifier, "expr")); + + token = (Token*)Array_get(lexer.tokens, 3); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 4); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, " post")); + } TEST_END() + + TEST("Interpolated string ends with expression", { + result = Lexer_tokenize(&lexer, "\"pre \\(expr)\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "pre ")); + + token = (Token*)Array_get(lexer.tokens, 1); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 2); + EXPECT_TRUE(token->type == TOKEN_IDENTIFIER); + EXPECT_TRUE(String_equals(token->value.identifier, "expr")); + + token = (Token*)Array_get(lexer.tokens, 3); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 4); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "")); + }) + + TEST("Interpolated string starts & ends with expression", { + result = Lexer_tokenize(&lexer, "\"\\(expr)\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "")); + + token = (Token*)Array_get(lexer.tokens, 1); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 2); + EXPECT_TRUE(token->type == TOKEN_IDENTIFIER); + EXPECT_TRUE(String_equals(token->value.identifier, "expr")); + + token = (Token*)Array_get(lexer.tokens, 3); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 4); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "")); + }) + + TEST("Interpolated string contining parentheses in expression", { + result = Lexer_tokenize(&lexer, "\"pre \\(expr * (a + b)) post\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "pre ")); + + token = (Token*)Array_get(lexer.tokens, 1); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 2); + EXPECT_TRUE(token->type == TOKEN_IDENTIFIER); + EXPECT_TRUE(String_equals(token->value.identifier, "expr")); + + token = (Token*)Array_get(lexer.tokens, 3); + EXPECT_TRUE(token->type == TOKEN_OPERATOR); + EXPECT_TRUE(token->kind == TOKEN_STAR); + + token = (Token*)Array_get(lexer.tokens, 4); + EXPECT_TRUE(token->type == TOKEN_PUNCTUATOR); + EXPECT_TRUE(token->kind == TOKEN_LEFT_PAREN); + + token = (Token*)Array_get(lexer.tokens, 5); + EXPECT_TRUE(token->type == TOKEN_IDENTIFIER); + EXPECT_TRUE(String_equals(token->value.identifier, "a")); + + token = (Token*)Array_get(lexer.tokens, 6); + EXPECT_TRUE(token->type == TOKEN_OPERATOR); + EXPECT_TRUE(token->kind == TOKEN_PLUS); + + token = (Token*)Array_get(lexer.tokens, 7); + EXPECT_TRUE(token->type == TOKEN_IDENTIFIER); + EXPECT_TRUE(String_equals(token->value.identifier, "b")); + + token = (Token*)Array_get(lexer.tokens, 8); + EXPECT_TRUE(token->type == TOKEN_PUNCTUATOR); + EXPECT_TRUE(token->kind == TOKEN_RIGHT_PAREN); + + token = (Token*)Array_get(lexer.tokens, 9); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 10); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, " post")); + }) + + TEST("Interpolated string contining string as teh expression", { + result = Lexer_tokenize(&lexer, "\"pre \\(\"in\") post\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "pre ")); + + token = (Token*)Array_get(lexer.tokens, 1); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 2); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "in")); + + token = (Token*)Array_get(lexer.tokens, 3); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 4); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, " post")); + }) + + TEST("Nested interpolated strings", { + result = Lexer_tokenize(&lexer, "\"pre \\(\"in_pre \\(expr) in_post\") post\""); + EXPECT_TRUE(result.success); + + token = (Token*)Array_get(lexer.tokens, 0); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "pre ")); + + token = (Token*)Array_get(lexer.tokens, 1); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 2); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, "in_pre ")); + + token = (Token*)Array_get(lexer.tokens, 3); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 4); + EXPECT_TRUE(token->type == TOKEN_IDENTIFIER); + EXPECT_TRUE(String_equals(token->value.identifier, "expr")); + + token = (Token*)Array_get(lexer.tokens, 5); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 6); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, " in_post")); + + token = (Token*)Array_get(lexer.tokens, 7); + EXPECT_TRUE(token->type == TOKEN_STRING_INTERPOLATION_MARKER); + + token = (Token*)Array_get(lexer.tokens, 8); + EXPECT_TRUE(token->kind == TOKEN_STRING); + EXPECT_TRUE(String_equals(token->value.string, " post")); + }) + + TEST("Invalid use interpolated strings", { + result = Lexer_tokenize(&lexer, "\"\\(\""); + EXPECT_FALSE(result.success); + + result = Lexer_tokenize(&lexer, "\"pre \\(\""); + EXPECT_FALSE(result.success); + + result = Lexer_tokenize(&lexer, "\"pre \\( post\""); + EXPECT_FALSE(result.success); + + result = Lexer_tokenize(&lexer, "\"\\( post\""); + EXPECT_FALSE(result.success); + + result = Lexer_tokenize(&lexer, "\"pre \\(\"\"\""); + EXPECT_FALSE(result.success); + + result = Lexer_tokenize(&lexer, "\"pre \\(\"\" post\""); + EXPECT_FALSE(result.success); + + result = Lexer_tokenize(&lexer, "\"pre \\(\"in\" post\""); + EXPECT_FALSE(result.success); + + result = Lexer_tokenize(&lexer, "\"pre \\(\"in_pre \\(expr)\" post\""); + EXPECT_FALSE(result.success); + + result = Lexer_tokenize(&lexer, "\"pre \\(\"in_pre \\(expr\") post\""); + EXPECT_FALSE(result.success); + }) } DESCRIBE(nextToken, "Token stream (nextToken)") { From ed325a9b57d60c3c9a3905b488b095456df5bc34 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Wed, 15 Nov 2023 20:57:47 +0100 Subject: [PATCH 51/69] Add basic if statement parsing --- test/compiler/parser/Parser.test.c | 169 ++++++++++++++++++++++++++++- 1 file changed, 167 insertions(+), 2 deletions(-) diff --git a/test/compiler/parser/Parser.test.c b/test/compiler/parser/Parser.test.c index 8eb82a7..1aa9561 100644 --- a/test/compiler/parser/Parser.test.c +++ b/test/compiler/parser/Parser.test.c @@ -52,7 +52,6 @@ DESCRIBE(variable_declaration, "Variable declaration parsing") { EXPECT_EQUAL_INT(initializer->value.integer, 7); } TEST_END(); - TEST_BEGIN("Constant with type annotation") { Lexer_setSource(&lexer, "let hello_string_variable: String = \"hello\""); result = Parser_parse(&parser); @@ -225,7 +224,6 @@ DESCRIBE(function_declaration, "Function declaration parsing") { } TEST_END(); - TEST_BEGIN("With simple parameters empty body") { Lexer_setSource(&lexer, "func parameters_function(a: Int, b: Int = 10, c: String = \"hello\"){}"); result = Parser_parse(&parser); @@ -486,3 +484,170 @@ DESCRIBE(function_declaration, "Function declaration parsing") { } TEST_END(); } + +DESCRIBE(if_statement, "If statement parsing") { + Lexer lexer; + Lexer_constructor(&lexer); + + Parser parser; + Parser_constructor(&parser, &lexer); + + ParserResult result; + + TEST_BEGIN("Simple condition no body, no else") { + Lexer_setSource(&lexer, "if (true) {}"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_IF_STATEMENT); + + IfStatementASTNode *if_statement = (IfStatementASTNode*)statement; + + EXPECT_NOT_NULL(if_statement->condition); + EXPECT_TRUE(if_statement->condition->_type == NODE_CONDITION); + + LiteralExpressionASTNode *condition_expression = (LiteralExpressionASTNode*)if_statement->condition->expression; + EXPECT_NOT_NULL(condition_expression); + EXPECT_TRUE(condition_expression->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(condition_expression->type == LITERAL_BOOLEAN); + EXPECT_TRUE(condition_expression->value.boolean); + + // if body + BlockASTNode *body = if_statement->body; + EXPECT_NOT_NULL(body->statements); + Array *arr = body->statements; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + // else + EXPECT_NULL(if_statement->elseClause); + + } TEST_END(); + + TEST_BEGIN("Simple condition no parens, no body") { + Lexer_setSource(&lexer, "if (true) {}"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_IF_STATEMENT); + + IfStatementASTNode *if_statement = (IfStatementASTNode*)statement; + + EXPECT_NOT_NULL(if_statement->condition); + EXPECT_TRUE(if_statement->condition->_type == NODE_CONDITION); + + LiteralExpressionASTNode *condition_expression = (LiteralExpressionASTNode*)if_statement->condition->expression; + EXPECT_NOT_NULL(condition_expression); + EXPECT_TRUE(condition_expression->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(condition_expression->type == LITERAL_BOOLEAN); + EXPECT_TRUE(condition_expression->value.boolean); + + // if body + BlockASTNode *body = if_statement->body; + EXPECT_NOT_NULL(body->statements); + Array *arr = body->statements; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + // else + EXPECT_NOT_NULL(if_statement->elseClause); + EXPECT_FALSE(if_statement->elseClause->isElseIf); + EXPECT_NULL(if_statement->elseClause->ifStatement); + EXPECT_NOT_NULL(if_statement->elseClause->body); + + // else body + body = if_statement->elseClause->body; + EXPECT_NOT_NULL(body->statements); + arr = body->statements; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + } TEST_END(); + + TEST_BEGIN("Else if condition, no body") { + Lexer_setSource(&lexer, "if a > 10 {} else if (a < 10) {} else {}"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_IF_STATEMENT); + + // if + IfStatementASTNode *if_statement = (IfStatementASTNode*)statement; + + EXPECT_NOT_NULL(if_statement->condition); + EXPECT_TRUE(if_statement->condition->_type == NODE_CONDITION); + + BinaryExpressionASTNode *condition_expression = (BinaryExpressionASTNode*)if_statement->condition->expression; + EXPECT_NOT_NULL(condition_expression); + EXPECT_TRUE(condition_expression->_type == NODE_BINARY_EXPRESSION); + + IdentifierASTNode *left = (IdentifierASTNode*)condition_expression->left; + EXPECT_NOT_NULL(left); + EXPECT_TRUE(left->_type == NODE_IDENTIFIER); + EXPECT_TRUE(String_equals(left->name, "a")); + + LiteralExpressionASTNode *right = (LiteralExpressionASTNode*)condition_expression->right; + EXPECT_NOT_NULL(right); + EXPECT_TRUE(right->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(right->type == LITERAL_INTEGER); + EXPECT_EQUAL_INT(right->value.integer, 10); + + EXPECT_TRUE(condition_expression->operator == OPERATOR_GREATER); + + // if body + BlockASTNode *body = if_statement->body; + EXPECT_NOT_NULL(body->statements); + Array *arr = body->statements; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + // else if + EXPECT_NOT_NULL(if_statement->elseClause); + EXPECT_TRUE(if_statement->elseClause->isElseIf); + EXPECT_NOT_NULL(if_statement->elseClause->ifStatement); + + IfStatementASTNode *elseif = if_statement->elseClause->ifStatement; + + EXPECT_NOT_NULL(elseif->condition); + EXPECT_TRUE(elseif->condition->_type == NODE_CONDITION); + + condition_expression = (BinaryExpressionASTNode*)elseif->condition->expression; + EXPECT_NOT_NULL(condition_expression); + EXPECT_TRUE(condition_expression->_type == NODE_BINARY_EXPRESSION); + + left = (IdentifierASTNode*)condition_expression->left; + EXPECT_NOT_NULL(left); + EXPECT_TRUE(left->_type == NODE_IDENTIFIER); + EXPECT_TRUE(String_equals(left->name, "a")); + + right = (LiteralExpressionASTNode*)condition_expression->right; + EXPECT_NOT_NULL(right); + EXPECT_TRUE(right->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(right->type == LITERAL_INTEGER); + EXPECT_EQUAL_INT(right->value.integer, 10); + + EXPECT_TRUE(condition_expression->operator == OPERATOR_LESS); + + // else if body + body = elseif->body; + EXPECT_NOT_NULL(body->statements); + arr = body->statements; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + // else + EXPECT_NOT_NULL(elseif->elseClause); + EXPECT_FALSE(elseif->elseClause->isElseIf); + EXPECT_NULL(elseif->elseClause->ifStatement); + EXPECT_NOT_NULL(elseif->elseClause->body); + + // else body + body = elseif->elseClause->body; + EXPECT_NOT_NULL(body->statements); + arr = body->statements; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + } TEST_END(); +} + From 6b7e66d27b7ec6e3e73b68a3e5c1c8bdc954b178 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Thu, 16 Nov 2023 15:07:19 +0100 Subject: [PATCH 52/69] Updated some comments --- include/compiler/lexer/Token.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/compiler/lexer/Token.h b/include/compiler/lexer/Token.h index e25e4ff..cc83f21 100644 --- a/include/compiler/lexer/Token.h +++ b/include/compiler/lexer/Token.h @@ -71,13 +71,13 @@ enum TokenKind { enum WhitespaceType { WHITESPACE_NONE = 0, // No whitespace - // WHITESPACE_LEFT_LIMIT = 1 << 0, // BOF or EOF - WHITESPACE_LEFT_SPACE = 1 << 1, // Space, tab, or vertical tab - WHITESPACE_LEFT_NEWLINE = 1 << 2, // Line feed or carriage return + // WHITESPACE_LEFT_LIMIT = 1 << 0, // BOF or EOF + WHITESPACE_LEFT_SPACE = 1 << 1, // Space, tab, vertical tab or multi-line comment on a single line + WHITESPACE_LEFT_NEWLINE = 1 << 2, // Line feed, carriage return, multi-line comment on multiple lines or single-line comment - // WHITESPACE_RIGHT_LIMIT = 1 << 4, // BOF or EOF - WHITESPACE_RIGHT_SPACE = 1 << 5, // Space, tab, or vertical tab - WHITESPACE_RIGHT_NEWLINE = 1 << 6, // Line feed or carriage return + // WHITESPACE_RIGHT_LIMIT = 1 << 4, // BOF or EOF + WHITESPACE_RIGHT_SPACE = 1 << 5, // Space, tab, vertical tab or multi-line comment on a single line + WHITESPACE_RIGHT_NEWLINE = 1 << 6, // Line feed, carriage return, multi-line comment on multiple lines or single-line comment WHITESPACE_LEFT = WHITESPACE_LEFT_SPACE | WHITESPACE_LEFT_NEWLINE, WHITESPACE_RIGHT = WHITESPACE_RIGHT_SPACE | WHITESPACE_RIGHT_NEWLINE, From 0fc69f2a38f9aeb8a7b71ac5fb8fc18c25e15856 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Thu, 16 Nov 2023 15:11:26 +0100 Subject: [PATCH 53/69] Added more macros for matching whitespace --- include/compiler/lexer/Token.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/compiler/lexer/Token.h b/include/compiler/lexer/Token.h index cc83f21..db6c3f7 100644 --- a/include/compiler/lexer/Token.h +++ b/include/compiler/lexer/Token.h @@ -87,7 +87,11 @@ enum WhitespaceType { WHITESPACE_MASK_RIGHT = /*WHITESPACE_RIGHT_LIMIT |*/ WHITESPACE_RIGHT_SPACE | WHITESPACE_RIGHT_NEWLINE }; -#define whitespace_both(whitespace) (((whitespace) & WHITESPACE_LEFT) && ((whitespace) & WHITESPACE_RIGHT)) +#define whitespace_left(whitespace) ((whitespace) & WHITESPACE_LEFT) +#define whitespace_right(whitespace) ((whitespace) & WHITESPACE_RIGHT) +#define whitespace_both(whitespace) (whitespace_left(whitespace) && whitespace_right(whitespace)) +#define whitespace_none(whitespace) ((whitespace) == WHITESPACE_NONE) +#define whitespace_consistent(whitespace) (whitespace_left(whitespace) && whitespace_right(whitespace) || whitespace_none(whitespace)) #define right_to_left_whitespace(whitespace) (((whitespace) & WHITESPACE_RIGHT) >> 4) #define left_to_right_whitespace(whitespace) (((whitespace) & WHITESPACE_LEFT) << 4) From 238be77a77d5d903c58a538b84d1eb3259a55d91 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Thu, 16 Nov 2023 15:15:17 +0100 Subject: [PATCH 54/69] Simplified the `whitespace_consistent` macro --- include/compiler/lexer/Token.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/compiler/lexer/Token.h b/include/compiler/lexer/Token.h index db6c3f7..add7dfd 100644 --- a/include/compiler/lexer/Token.h +++ b/include/compiler/lexer/Token.h @@ -91,7 +91,7 @@ enum WhitespaceType { #define whitespace_right(whitespace) ((whitespace) & WHITESPACE_RIGHT) #define whitespace_both(whitespace) (whitespace_left(whitespace) && whitespace_right(whitespace)) #define whitespace_none(whitespace) ((whitespace) == WHITESPACE_NONE) -#define whitespace_consistent(whitespace) (whitespace_left(whitespace) && whitespace_right(whitespace) || whitespace_none(whitespace)) +#define whitespace_consistent(whitespace) (whitespace_both(whitespace) || whitespace_none(whitespace)) #define right_to_left_whitespace(whitespace) (((whitespace) & WHITESPACE_RIGHT) >> 4) #define left_to_right_whitespace(whitespace) (((whitespace) & WHITESPACE_LEFT) << 4) From 2ba9d63fdcea11243e43e60a0961dbbdc3884141 Mon Sep 17 00:00:00 2001 From: Korunka1 Date: Thu, 16 Nov 2023 16:00:54 +0100 Subject: [PATCH 55/69] 'if (true)' bug fix --- include/compiler/parser/ExpressionParser.h | 4 ++-- src/compiler/parser/ExpressionParser.c | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/compiler/parser/ExpressionParser.h b/include/compiler/parser/ExpressionParser.h index ace4745..2021ff1 100644 --- a/include/compiler/parser/ExpressionParser.h +++ b/include/compiler/parser/ExpressionParser.h @@ -18,8 +18,8 @@ enum PrecTableIndex { I_NIL_COALES, I_REL_OP, I_ID, - I_LEFT_BRAC, - I_RIGHT_BRAC, + I_LEFT_PAREN, + I_RIGHT_PAREN, I_DOLLAR }; diff --git a/src/compiler/parser/ExpressionParser.c b/src/compiler/parser/ExpressionParser.c index b6fe747..5ff9f5f 100644 --- a/src/compiler/parser/ExpressionParser.c +++ b/src/compiler/parser/ExpressionParser.c @@ -14,7 +14,7 @@ #define STACK_SIZE 20 int precedence_table[TABLE_SIZE][TABLE_SIZE] = { // [stack top terminal][input token] - // +-|*/| ! |??|r |i |( |) |$ + // +-|*/| ! |??|r |i |( |) |$ {R, S, S, R, R, S, S, R, R}, // +- {R, R, S, R, R, S, S, R, R}, // */ {R, R, X, R, R, X, X, R, R}, // ! @@ -53,11 +53,11 @@ int Expr_getPrecTbIndex(Token *token) { case TOKEN_GREATER_EQUAL: return I_REL_OP; - case TOKEN_LEFT_BRACE: - return I_LEFT_BRAC; + case TOKEN_LEFT_PAREN: + return I_LEFT_PAREN; - case TOKEN_RIGHT_BRACE: - return I_RIGHT_BRAC; + case TOKEN_RIGHT_PAREN: + return I_RIGHT_PAREN; case TOKEN_DEFAULT: if(token->type == TOKEN_IDENTIFIER) { @@ -69,6 +69,7 @@ int Expr_getPrecTbIndex(Token *token) { case TOKEN_INTEGER: case TOKEN_FLOATING: case TOKEN_NIL: + case TOKEN_BOOLEAN: return I_ID; default: @@ -236,6 +237,8 @@ bool Expr_Reduce(Array *stack, StackItem *currentToken) { // Perform reduction and push result on stack (nonterminal) currentToken = Expr_performReduction(reduceStack); + Array_free(reduceStack); + if(currentToken != NULL) { Array_push(stack, currentToken); return true; From b6605d24f62ab286e6942cae555567c12ee2c4af Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Thu, 16 Nov 2023 16:55:00 +0100 Subject: [PATCH 56/69] Change test lexer source --- test/compiler/parser/Parser.test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/compiler/parser/Parser.test.c b/test/compiler/parser/Parser.test.c index 1aa9561..6e9bca9 100644 --- a/test/compiler/parser/Parser.test.c +++ b/test/compiler/parser/Parser.test.c @@ -525,7 +525,7 @@ DESCRIBE(if_statement, "If statement parsing") { } TEST_END(); TEST_BEGIN("Simple condition no parens, no body") { - Lexer_setSource(&lexer, "if (true) {}"); + Lexer_setSource(&lexer, "if true {} else {}"); result = Parser_parse(&parser); EXPECT_TRUE(result.success); From 31004380580e55567862651ede92ee1490f6bf19 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Thu, 16 Nov 2023 16:55:44 +0100 Subject: [PATCH 57/69] Fix consuming optinal closing param --- src/compiler/parser/Parser.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index 55ad25e..1e84561 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -486,14 +486,19 @@ ParserResult __Parser_parseCondition(Parser *parser) { expression = (ExpressionASTNode*)expressionResult.node; } - // consume ')' if we consumed '(' - LexerResult result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); + peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); - if(hasOptionalParen && result.token->kind != TOKEN_RIGHT_PAREN) { - return ParserError( - String_fromFormat("expected ')' in condition"), - Array_fromArgs(1, result.token)); + // consume ')' if we consumed '(' + if(hasOptionalParen) { + if(peek.token->kind == TOKEN_RIGHT_PAREN) { + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + } else { + return ParserError( + String_fromFormat("expected ')' in condition"), + Array_fromArgs(1, peek.token)); + } } ConditionASTNode *condition = new_ConditionASTNode(expression, bindingCondition); From 3cd444f78059e70666f0162cb2d244d78b549dc5 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Thu, 16 Nov 2023 17:00:42 +0100 Subject: [PATCH 58/69] Fix not consuming if keyword in else if --- src/compiler/parser/Parser.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index 1e84561..3bd735f 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -513,7 +513,6 @@ ParserResult __Parser_parseElseClause(Parser *parser) { LexerResult result = Lexer_nextToken(parser->lexer); if(!result.success) return LexerToParserError(result); - LexerResult peek = Lexer_peekToken(parser->lexer, 1); if(!peek.success) return LexerToParserError(peek); @@ -522,6 +521,10 @@ ParserResult __Parser_parseElseClause(Parser *parser) { BlockASTNode *body = NULL; if(peek.token->kind == TOKEN_IF) { + // consume if keyword + result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + ParserResult ifStatementResult = __Parser_parseIfStatement(parser); if(!ifStatementResult.success) return ifStatementResult; ifStatement = (IfStatementASTNode*)ifStatementResult.node; From 9e9a283d6b9a34be9f75c75f7e8e6f6dd005ace7 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Fri, 17 Nov 2023 01:04:42 +0100 Subject: [PATCH 59/69] Fix array name in statements macro --- test/compiler/parser/parser_assertions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/compiler/parser/parser_assertions.h b/test/compiler/parser/parser_assertions.h index d15eddd..bdd0c45 100644 --- a/test/compiler/parser/parser_assertions.h +++ b/test/compiler/parser/parser_assertions.h @@ -36,6 +36,6 @@ EXPECT_NOT_NULL(_block->statements); \ \ Array *statements = _block->statements; \ - EXPECT_EQUAL_INT(statementsArr->size, count); + EXPECT_EQUAL_INT(statements->size, count); #endif From b4f95595414ee520f4cbb375ca33099508e6b564 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Fri, 17 Nov 2023 01:06:12 +0100 Subject: [PATCH 60/69] Add tests for binding condition --- test/compiler/parser/Parser.test.c | 59 ++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/test/compiler/parser/Parser.test.c b/test/compiler/parser/Parser.test.c index 6e9bca9..d76988f 100644 --- a/test/compiler/parser/Parser.test.c +++ b/test/compiler/parser/Parser.test.c @@ -649,5 +649,64 @@ DESCRIBE(if_statement, "If statement parsing") { EXPECT_EQUAL_INT(arr->size, 0); } TEST_END(); + + TEST_BEGIN("Binding condition with parantheses") { + Lexer_setSource(&lexer, "if (let b = a) {}"); + result = Parser_parse(&parser); + + EXPECT_FALSE(result.success); + EXPECT_NULL(result.node); + + EXPECT_TRUE(result.type == RESULT_ERROR_SYNTACTIC_ANALYSIS); + EXPECT_TRUE(result.severity == SEVERITY_ERROR); + // prbbly later add message check also + + } TEST_END(); + + TEST_BEGIN("Binding condition no body, no else") { + Lexer_setSource(&lexer, "if let b = a {}"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_IF_STATEMENT); + + IfStatementASTNode *if_statement = (IfStatementASTNode*)statement; + + EXPECT_NOT_NULL(if_statement->condition); + EXPECT_TRUE(if_statement->condition->_type == NODE_CONDITION); + + EXPECT_NULL(if_statement->condition->expression); + + OptionalBindingConditionASTNode *binding_condition = (OptionalBindingConditionASTNode*)if_statement->condition->optionalBindingCondition; + EXPECT_NOT_NULL(binding_condition); + EXPECT_TRUE(binding_condition->isConstant); + EXPECT_TRUE(binding_condition->_type == NODE_OPTIONAL_BINDING_CONDITION); + + PatternASTNode *pattern = binding_condition->pattern; + EXPECT_NOT_NULL(pattern); + EXPECT_NULL(pattern->type); + + IdentifierASTNode *id = pattern->id; + EXPECT_NOT_NULL(id); + EXPECT_TRUE(String_equals(id->name, "b")); + + IdentifierASTNode *initializer = (IdentifierASTNode*)binding_condition->initializer; + + EXPECT_NOT_NULL(initializer); + EXPECT_TRUE(initializer->_type == NODE_IDENTIFIER); + EXPECT_NOT_NULL(initializer->name); + EXPECT_TRUE(String_equals(initializer->name, "a")); + + // if body + BlockASTNode *body = if_statement->body; + EXPECT_NOT_NULL(body->statements); + Array *arr = body->statements; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + // else + EXPECT_NULL(if_statement->elseClause); + + } TEST_END(); } From d40b2b012cdc6d13b92b609d7d842fcfe752f966 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Fri, 17 Nov 2023 01:06:22 +0100 Subject: [PATCH 61/69] Add simple tests for while statement --- test/compiler/parser/Parser.test.c | 119 +++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/test/compiler/parser/Parser.test.c b/test/compiler/parser/Parser.test.c index d76988f..83d5c6c 100644 --- a/test/compiler/parser/Parser.test.c +++ b/test/compiler/parser/Parser.test.c @@ -710,3 +710,122 @@ DESCRIBE(if_statement, "If statement parsing") { } TEST_END(); } +DESCRIBE(while_statement, "While statement parsing") { + Lexer lexer; + Lexer_constructor(&lexer); + + Parser parser; + Parser_constructor(&parser, &lexer); + + ParserResult result; + + TEST_BEGIN("Simple condition no body") { + Lexer_setSource(&lexer, "while (true) {}"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_WHILE_STATEMENT); + + WhileStatementASTNode *while_statement = (WhileStatementASTNode*)statement; + + EXPECT_NOT_NULL(while_statement->condition); + EXPECT_TRUE(while_statement->condition->_type == NODE_CONDITION); + + LiteralExpressionASTNode *condition_expression = (LiteralExpressionASTNode*)while_statement->condition->expression; + EXPECT_NOT_NULL(condition_expression); + EXPECT_TRUE(condition_expression->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(condition_expression->type == LITERAL_BOOLEAN); + EXPECT_TRUE(condition_expression->value.boolean); + + // while body + BlockASTNode *body = while_statement->body; + EXPECT_NOT_NULL(body->statements); + Array *arr = body->statements; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + } TEST_END(); + + TEST_BEGIN("Simple condition no parens, no body") { + Lexer_setSource(&lexer, "while true {}"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_WHILE_STATEMENT); + + WhileStatementASTNode *while_statement = (WhileStatementASTNode*)statement; + + EXPECT_NOT_NULL(while_statement->condition); + EXPECT_TRUE(while_statement->condition->_type == NODE_CONDITION); + + LiteralExpressionASTNode *condition_expression = (LiteralExpressionASTNode*)while_statement->condition->expression; + EXPECT_NOT_NULL(condition_expression); + EXPECT_TRUE(condition_expression->_type == NODE_LITERAL_EXPRESSION); + EXPECT_TRUE(condition_expression->type == LITERAL_BOOLEAN); + EXPECT_TRUE(condition_expression->value.boolean); + + // while body + BlockASTNode *body = while_statement->body; + EXPECT_NOT_NULL(body->statements); + Array *arr = body->statements; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + } TEST_END(); + + TEST_BEGIN("Binding condition with parantheses") { + Lexer_setSource(&lexer, "while (let hello = world) {}"); + result = Parser_parse(&parser); + + EXPECT_FALSE(result.success); + EXPECT_NULL(result.node); + + EXPECT_TRUE(result.type == RESULT_ERROR_SYNTACTIC_ANALYSIS); + EXPECT_TRUE(result.severity == SEVERITY_ERROR); + // prbbly later add message check also + + } TEST_END(); + + TEST_BEGIN("Binding condition no body") { + Lexer_setSource(&lexer, "while let hello = world {}"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_WHILE_STATEMENT); + + WhileStatementASTNode *while_statement = (WhileStatementASTNode*)statement; + + EXPECT_NOT_NULL(while_statement->condition); + EXPECT_TRUE(while_statement->condition->_type == NODE_CONDITION); + + EXPECT_NULL(while_statement->condition->expression); + + OptionalBindingConditionASTNode *binding_condition = (OptionalBindingConditionASTNode*)while_statement->condition->optionalBindingCondition; + EXPECT_NOT_NULL(binding_condition); + EXPECT_TRUE(binding_condition->isConstant); + EXPECT_TRUE(binding_condition->_type == NODE_OPTIONAL_BINDING_CONDITION); + + PatternASTNode *pattern = binding_condition->pattern; + EXPECT_NOT_NULL(pattern); + EXPECT_NULL(pattern->type); + + IdentifierASTNode *id = pattern->id; + EXPECT_NOT_NULL(id); + EXPECT_TRUE(String_equals(id->name, "hello")); + + IdentifierASTNode *initializer = (IdentifierASTNode*)binding_condition->initializer; + + EXPECT_NOT_NULL(initializer); + EXPECT_TRUE(initializer->_type == NODE_IDENTIFIER); + EXPECT_NOT_NULL(initializer->name); + EXPECT_TRUE(String_equals(initializer->name, "world")); + + // while body + BlockASTNode *body = while_statement->body; + EXPECT_NOT_NULL(body->statements); + Array *arr = body->statements; + EXPECT_NULL(arr->data); + EXPECT_EQUAL_INT(arr->size, 0); + + } TEST_END(); +} From c022367c713d6c1bb42c02f360fddd9794f05004 Mon Sep 17 00:00:00 2001 From: Radim Mifka Date: Fri, 17 Nov 2023 02:06:44 +0100 Subject: [PATCH 62/69] Replace unnecessary ElseClauseASTNode --- include/compiler/parser/ASTNodes.h | 13 +----- src/compiler/parser/ASTNodes.c | 20 ++-------- src/compiler/parser/Parser.c | 63 +++++++++++------------------- test/compiler/parser/Parser.test.c | 30 +++++--------- 4 files changed, 39 insertions(+), 87 deletions(-) diff --git a/include/compiler/parser/ASTNodes.h b/include/compiler/parser/ASTNodes.h index afc7102..ace7516 100644 --- a/include/compiler/parser/ASTNodes.h +++ b/include/compiler/parser/ASTNodes.h @@ -28,7 +28,6 @@ enum ASTNodeType { NODE_ARGUMENT_LIST, NODE_FUNCTION_CALL, NODE_IF_STATEMENT, - NODE_ELSE_CLAUSE, NODE_PATTERN, NODE_CONDITION, NODE_OPTIONAL_BINDING_CONDITION, @@ -205,18 +204,11 @@ typedef struct ConditionASTNode { OptionalBindingConditionASTNode *optionalBindingCondition; } ConditionASTNode; -typedef struct ElseClauseASTNode { - enum ASTNodeType _type; - struct IfStatementASTNode *ifStatement; - BlockASTNode *body; - bool isElseIf; -} ElseClauseASTNode; - typedef struct IfStatementASTNode { enum ASTNodeType _type; ConditionASTNode *condition; BlockASTNode *body; - ElseClauseASTNode *elseClause; + ASTNode /* BlockASTNode | IfStatementASTNode | null */ *alternate; } IfStatementASTNode; typedef struct WhileStatementASTNode { @@ -256,8 +248,7 @@ FunctionCallASTNode* new_FunctionCallASTNode(IdentifierASTNode *id, ArgumentList PatternASTNode* new_PatternASTNode(IdentifierASTNode *id, TypeReferenceASTNode *type); OptionalBindingConditionASTNode* new_OptionalBindingConditionASTNode(PatternASTNode *pattern, ExpressionASTNode *initializer, bool isConstant); ConditionASTNode* new_ConditionASTNode(ExpressionASTNode *expression, OptionalBindingConditionASTNode *optionalBindingCondition); -ElseClauseASTNode* new_ElseClauseASTNode(IfStatementASTNode *ifStatement, BlockASTNode *body, bool isElseIf); -IfStatementASTNode* new_IfStatementASTNode(ConditionASTNode *condition, BlockASTNode *body, ElseClauseASTNode *elseClause); +IfStatementASTNode* new_IfStatementASTNode(ConditionASTNode *condition, BlockASTNode *body, ASTNode *alternate); WhileStatementASTNode* new_WhileStatementASTNode(ConditionASTNode *condition, BlockASTNode *body); AssignmentStatementASTNode* new_AssignmentStatementASTNode(IdentifierASTNode *id, ExpressionASTNode *assignment); diff --git a/src/compiler/parser/ASTNodes.c b/src/compiler/parser/ASTNodes.c index a2d54f6..02441d7 100644 --- a/src/compiler/parser/ASTNodes.c +++ b/src/compiler/parser/ASTNodes.c @@ -184,27 +184,15 @@ ConditionASTNode * new_ConditionASTNode( return node; } -ElseClauseASTNode * new_ElseClauseASTNode( - IfStatementASTNode *ifStatement, - BlockASTNode *body, - bool isElseIf -) { - prepare_node_of(ElseClauseASTNode, NODE_ELSE_CLAUSE) - node->ifStatement = ifStatement; - node->body = body; - node->isElseIf = isElseIf; - return node; -} - IfStatementASTNode * new_IfStatementASTNode( ConditionASTNode *condition, BlockASTNode *body, - ElseClauseASTNode *elseClause + ASTNode *alternate ) { prepare_node_of(IfStatementASTNode, NODE_IF_STATEMENT) node->condition = condition; node->body = body; - node->elseClause = elseClause; + node->alternate = alternate; return node; } @@ -243,12 +231,12 @@ BinaryExpressionASTNode* new_BinaryExpressionASTNode( UnaryExpressionASTNode* new_UnaryExpressionASTNode( ExpressionASTNode *argument, OperatorType operator - //bool IsPrefix + // bool IsPrefix ) { prepare_node_of(UnaryExpressionASTNode, NODE_UNARY_EXPRESSION) node->argument = argument; node->operator = operator; - //node->isPrefix = isPrefix; + // node->isPrefix = isPrefix; return node; } diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index 3bd735f..3855edd 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -21,7 +21,6 @@ ParserResult __Parser_parseFuncStatement(Parser *parser); ParserResult __Parser_parsePattern(Parser *parser); ParserResult __Parser_parseOptionalBindingCondition(Parser *parser); ParserResult __Parser_parseCondition(Parser *parser); -ParserResult __Parser_parseElseClause(Parser *parser); ParserResult __Parser_parseIfStatement(Parser *parser); ParserResult __Parser_parseWhileStatement(Parser *parser); ParserResult __Parser_parseReturnStatement(Parser *parser); @@ -506,40 +505,6 @@ ParserResult __Parser_parseCondition(Parser *parser) { return ParserSuccess(condition); } -ParserResult __Parser_parseElseClause(Parser *parser) { - assertf(parser != NULL); - - // skip else keyword - LexerResult result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); - - LexerResult peek = Lexer_peekToken(parser->lexer, 1); - if(!peek.success) return LexerToParserError(peek); - - IfStatementASTNode *ifStatement = NULL; - bool isElseIf = false; - BlockASTNode *body = NULL; - - if(peek.token->kind == TOKEN_IF) { - // consume if keyword - result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); - - ParserResult ifStatementResult = __Parser_parseIfStatement(parser); - if(!ifStatementResult.success) return ifStatementResult; - ifStatement = (IfStatementASTNode*)ifStatementResult.node; - isElseIf = true; - } else { - ParserResult blockResult = __Parser_parseBlock(parser, true); - if(!blockResult.success) return blockResult; - body = (BlockASTNode*)blockResult.node; - } - - ElseClauseASTNode *elseClause = new_ElseClauseASTNode(ifStatement, body, isElseIf); - - return ParserSuccess(elseClause); -} - ParserResult __Parser_parseIfStatement(Parser *parser) { assertf(parser != NULL); @@ -569,15 +534,33 @@ ParserResult __Parser_parseIfStatement(Parser *parser) { peek = Lexer_peekToken(parser->lexer, 1); if(!peek.success) return LexerToParserError(peek); - ElseClauseASTNode *elseClause = NULL; + ASTNode *alternate = NULL; if(peek.token->kind == TOKEN_ELSE) { - ParserResult elseClauseResult = __Parser_parseElseClause(parser); - if(!elseClauseResult.success) return elseClauseResult; - elseClause = (ElseClauseASTNode*)elseClauseResult.node; + // skip else keyword + LexerResult result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + LexerResult peek = Lexer_peekToken(parser->lexer, 1); + if(!peek.success) return LexerToParserError(peek); + + if(peek.token->kind == TOKEN_IF) { + // consume if keyword + result = Lexer_nextToken(parser->lexer); + if(!result.success) return LexerToParserError(result); + + ParserResult ifStatementResult = __Parser_parseIfStatement(parser); + if(!ifStatementResult.success) return ifStatementResult; + alternate = (ASTNode*)ifStatementResult.node; + + } else { + ParserResult blockResult = __Parser_parseBlock(parser, true); + if(!blockResult.success) return blockResult; + alternate = (ASTNode*)blockResult.node; + } } - IfStatementASTNode *ifStatement = new_IfStatementASTNode((ConditionASTNode*)conditionResult.node, (BlockASTNode*)blockResult.node, (ElseClauseASTNode*)elseClause); + IfStatementASTNode *ifStatement = new_IfStatementASTNode((ConditionASTNode*)conditionResult.node, (BlockASTNode*)blockResult.node, (ASTNode*)alternate); return ParserSuccess(ifStatement); } diff --git a/test/compiler/parser/Parser.test.c b/test/compiler/parser/Parser.test.c index 83d5c6c..a9fc298 100644 --- a/test/compiler/parser/Parser.test.c +++ b/test/compiler/parser/Parser.test.c @@ -520,7 +520,7 @@ DESCRIBE(if_statement, "If statement parsing") { EXPECT_EQUAL_INT(arr->size, 0); // else - EXPECT_NULL(if_statement->elseClause); + EXPECT_NULL(if_statement->alternate); } TEST_END(); @@ -549,14 +549,9 @@ DESCRIBE(if_statement, "If statement parsing") { EXPECT_NULL(arr->data); EXPECT_EQUAL_INT(arr->size, 0); - // else - EXPECT_NOT_NULL(if_statement->elseClause); - EXPECT_FALSE(if_statement->elseClause->isElseIf); - EXPECT_NULL(if_statement->elseClause->ifStatement); - EXPECT_NOT_NULL(if_statement->elseClause->body); - // else body - body = if_statement->elseClause->body; + EXPECT_TRUE(if_statement->alternate->_type == NODE_BLOCK); + body = (BlockASTNode*)if_statement->alternate; EXPECT_NOT_NULL(body->statements); arr = body->statements; EXPECT_NULL(arr->data); @@ -602,11 +597,11 @@ DESCRIBE(if_statement, "If statement parsing") { EXPECT_EQUAL_INT(arr->size, 0); // else if - EXPECT_NOT_NULL(if_statement->elseClause); - EXPECT_TRUE(if_statement->elseClause->isElseIf); - EXPECT_NOT_NULL(if_statement->elseClause->ifStatement); + EXPECT_NOT_NULL(if_statement->alternate); + + EXPECT_TRUE(if_statement->alternate->_type == NODE_IF_STATEMENT); - IfStatementASTNode *elseif = if_statement->elseClause->ifStatement; + IfStatementASTNode *elseif = (IfStatementASTNode*)if_statement->alternate; EXPECT_NOT_NULL(elseif->condition); EXPECT_TRUE(elseif->condition->_type == NODE_CONDITION); @@ -635,14 +630,9 @@ DESCRIBE(if_statement, "If statement parsing") { EXPECT_NULL(arr->data); EXPECT_EQUAL_INT(arr->size, 0); - // else - EXPECT_NOT_NULL(elseif->elseClause); - EXPECT_FALSE(elseif->elseClause->isElseIf); - EXPECT_NULL(elseif->elseClause->ifStatement); - EXPECT_NOT_NULL(elseif->elseClause->body); - // else body - body = elseif->elseClause->body; + EXPECT_TRUE(elseif->alternate->_type == NODE_BLOCK); + body = (BlockASTNode*)elseif->alternate; EXPECT_NOT_NULL(body->statements); arr = body->statements; EXPECT_NULL(arr->data); @@ -705,7 +695,7 @@ DESCRIBE(if_statement, "If statement parsing") { EXPECT_EQUAL_INT(arr->size, 0); // else - EXPECT_NULL(if_statement->elseClause); + EXPECT_NULL(if_statement->alternate); } TEST_END(); } From 083fab9c1e8783fbeaeb738738d7d0b50f62e02f Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Sat, 18 Nov 2023 02:48:16 +0100 Subject: [PATCH 63/69] Moved error descriptions inline to each enum item --- include/compiler/Result.h | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/include/compiler/Result.h b/include/compiler/Result.h index cee73bc..4de2427 100644 --- a/include/compiler/Result.h +++ b/include/compiler/Result.h @@ -12,37 +12,20 @@ enum Severity { SEVERITY_INFO }; - -/* - - • 1 - chyba v programu v rámci lexikální analýzy (chybná struktura aktuálního lexému). - • 2 - chyba v programu v rámci syntaktické analýzy (chybná syntaxe programu, chybějící hlavička, atp.). - • 3 - sémantická chyba v programu – nedefinovaná funkce, pokus o redefinice funkce. - • 4 - sémantická/běhová chyba v programu – špatný počet/typ parametrů u volání funkce či typ návratové hodnoty z funkce. - • 5 - sémantická chyba v programu – použití nedefinované proměnné. - • 6 - sémantická/běhová chyba v programu – chybějící/přebývající výraz v příkazu návratu z funkce. - • 7 - sémantická/běhová chyba typové kompatibility v aritmetických, řetězcových a - relačních výrazech. - • 8 - ostatní sémantické chyby. - • 99 - interní chyba překladače tj. neovlivněná vstupním programem (např. chyba alokace paměti atd.). - - - - */ enum ResultType { RESULT_INVALID = -3, RESULT_NO_MATCH = -2, RESULT_ASSERTION = -1, RESULT_SUCCESS = 0, - RESULT_ERROR_LEXICAL_ANALYSIS = 1, - RESULT_ERROR_SYNTACTIC_ANALYSIS = 2, - RESULT_ERROR_SEMANTIC_FUNCTION_DEFINITION = 3, - RESULT_ERROR_SEMANTIC_INVALID_FUNCTION_CALL = 4, - RESULT_ERROR_SEMANTIC_UNDEFINED_VARIABLE = 5, - RESULT_ERROR_SEMANTIC_INVALID_RETURN = 6, - RESULT_ERROR_SEMANTIC_INVALID_TYPE = 7, - RESULT_ERROR_SEMANTIC_OTHER = 8, - RESULT_ERROR_INTERNAL = 99 + RESULT_ERROR_LEXICAL_ANALYSIS = 1, // Chybná struktura aktuálního lexému + RESULT_ERROR_SYNTACTIC_ANALYSIS = 2, // Chybná syntaxe programu, chybějící hlavička, atp. + RESULT_ERROR_SEMANTIC_FUNCTION_DEFINITION = 3, // Nedefinovaná funkce, pokus o redefinice funkce + RESULT_ERROR_SEMANTIC_INVALID_FUNCTION_CALL = 4, // Špatný počet/typ parametrů u volání funkce či typ návratové hodnoty z funkce + RESULT_ERROR_SEMANTIC_UNDEFINED_VARIABLE = 5, // Použití nedefinované proměnné + RESULT_ERROR_SEMANTIC_INVALID_RETURN = 6, // Chybějící/přebývající výraz v příkazu návratu z funkce + RESULT_ERROR_SEMANTIC_INVALID_TYPE = 7, // Chyba typové kompatibility v aritmetických, řetězcových a relačních výrazech + RESULT_ERROR_SEMANTIC_OTHER = 8, // Ostatní sémantické chyby + RESULT_ERROR_INTERNAL = 99 // Interní chyba překladače tj. neovlivněná vstupním programem (např. chyba alokace paměti atd.) }; typedef struct Result { From 346fdc647d7e11e1a06e10eeb2abdaeda81604e6 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Sat, 18 Nov 2023 02:49:19 +0100 Subject: [PATCH 64/69] Added `LF` macro + fixed formatting --- test/compiler/parser/parser_assertions.h | 46 ++++++++++++------------ 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/test/compiler/parser/parser_assertions.h b/test/compiler/parser/parser_assertions.h index bdd0c45..cef597a 100644 --- a/test/compiler/parser/parser_assertions.h +++ b/test/compiler/parser/parser_assertions.h @@ -4,38 +4,40 @@ #ifndef PARSER_ASSERTIONS_H #define PARSER_ASSERTIONS_H +#define LF "\n" + /** * Validates program body and looks for a single statemtnt of a given type. * This creates `_program`, `_block`, `_statements` and `statement` variables to use. */ -#define EXPECT_STATEMENT(node, type) EXPECT_NOT_NULL(node); \ - \ - ProgramASTNode *_program = (ProgramASTNode*)node; \ - EXPECT_NOT_NULL(_program->block); \ - \ - BlockASTNode *_block = _program->block; \ - EXPECT_NOT_NULL(_block->statements); \ - \ - Array *_statements = _block->statements; \ - EXPECT_EQUAL_INT(_statements->size, 1); \ - \ - StatementASTNode *statement = Array_get(_statements, 0); \ - EXPECT_NOT_NULL(statement); \ +#define EXPECT_STATEMENT(node, type) EXPECT_NOT_NULL(node); \ + \ + ProgramASTNode *_program = (ProgramASTNode*)node; \ + EXPECT_NOT_NULL(_program->block); \ + \ + BlockASTNode *_block = _program->block; \ + EXPECT_NOT_NULL(_block->statements); \ + \ + Array *_statements = _block->statements; \ + EXPECT_EQUAL_INT(_statements->size, 1); \ + \ + StatementASTNode *statement = Array_get(_statements, 0); \ + EXPECT_NOT_NULL(statement); \ EXPECT_TRUE(statement->_type == type); /** * Validates program body and looks for a statement list of a given size. * This creates `_program`, `_block` and `statements` variables to use. */ -#define EXPECT_STATEMENTS(node, count) EXPECT_NOT_NULL(node); \ - \ - ProgramASTNode *_program = (ProgramASTNode*)node; \ - EXPECT_NOT_NULL(_program->block); \ - \ - BlockASTNode *_block = _program->block; \ - EXPECT_NOT_NULL(_block->statements); \ - \ - Array *statements = _block->statements; \ +#define EXPECT_STATEMENTS(node, count) EXPECT_NOT_NULL(node); \ + \ + ProgramASTNode *_program = (ProgramASTNode*)node; \ + EXPECT_NOT_NULL(_program->block); \ + \ + BlockASTNode *_block = _program->block; \ + EXPECT_NOT_NULL(_block->statements); \ + \ + Array *statements = _block->statements; \ EXPECT_EQUAL_INT(statements->size, count); #endif From d15ad2e6d3fedac08b6b52e4931d82461b741979 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Sat, 18 Nov 2023 02:51:28 +0100 Subject: [PATCH 65/69] Fixed bug when parsing two variable declarations right after another --- src/compiler/parser/Parser.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index 3855edd..b69a337 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -645,12 +645,11 @@ ParserResult __Parser_parseVariableDeclarationList(Parser *parser) { LexerResult peek; LexerResult result; - Array *declarators = Array_alloc(0); peek = Lexer_peekToken(parser->lexer, 1); if(!peek.success) return LexerToParserError(peek); - while(peek.token->type != TOKEN_EOF) { + while(true) { ParserResult declaratorResult = __Parser_parseVariableDeclarator(parser); if(!declaratorResult.success) return declaratorResult; @@ -659,23 +658,15 @@ ParserResult __Parser_parseVariableDeclarationList(Parser *parser) { peek = Lexer_peekToken(parser->lexer, 1); if(!peek.success) return LexerToParserError(peek); - + // Consume the `,` token if(peek.token->kind == TOKEN_COMMA) { result = Lexer_nextToken(parser->lexer); if(!result.success) return LexerToParserError(result); - } else if(peek.token->type != TOKEN_EOF) { - return ParserError( - String_fromFormat("found an unexpected second identifier in constant declaration; is there an accidental break?"), - Array_fromArgs(1, peek.token)); + } else { + break; } - - peek = Lexer_peekToken(parser->lexer, 1); - if(!peek.success) return LexerToParserError(peek); } - result = Lexer_nextToken(parser->lexer); - if(!result.success) return LexerToParserError(result); - VariableDeclarationListASTNode *variableDeclarationList = new_VariableDeclarationListASTNode(declarators); return ParserSuccess(variableDeclarationList); From 949093a344ede83fc7ca21760aa7d943cf658269 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Sat, 18 Nov 2023 02:52:18 +0100 Subject: [PATCH 66/69] Added check for statement separation + very light tests --- src/compiler/parser/Parser.c | 17 +++++++++++++ test/compiler/parser/Parser.test.c | 41 ++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/src/compiler/parser/Parser.c b/src/compiler/parser/Parser.c index b69a337..2fbb433 100644 --- a/src/compiler/parser/Parser.c +++ b/src/compiler/parser/Parser.c @@ -105,6 +105,23 @@ ParserResult __Parser_parseBlock(Parser *parser, bool requireBraces) { Array_push(statements, result.node); + // Check for delimiter after statement + peek = Lexer_peekToken(parser->lexer, 0); + if(!peek.success) return LexerToParserError(peek); + + // They don't want us to have semicolons :( + if(peek.token->kind == TOKEN_SEMICOLON) { + return ParserError( + String_fromFormat("';' is not supported after statement, use new line instead"), + Array_fromArgs(1, peek.token) + ); + } else if(!(peek.token->whitespace & WHITESPACE_RIGHT_NEWLINE) && !Lexer_isAtEnd(parser->lexer)) { + return ParserError( + String_fromFormat("expected new line after statement"), + Array_fromArgs(1, peek.token) + ); + } + if(requireBraces) { peek = Lexer_peekToken(parser->lexer, 1); if(!peek.success) return LexerToParserError(peek); diff --git a/test/compiler/parser/Parser.test.c b/test/compiler/parser/Parser.test.c index a9fc298..c225541 100644 --- a/test/compiler/parser/Parser.test.c +++ b/test/compiler/parser/Parser.test.c @@ -819,3 +819,44 @@ DESCRIBE(while_statement, "While statement parsing") { } TEST_END(); } + +DESCRIBE(statement_separation, "Validity of statement separation") { + Lexer lexer; + Lexer_constructor(&lexer); + + Parser parser; + Parser_constructor(&parser, &lexer); + + ParserResult result; + + TEST_BEGIN("Single statement") { + Lexer_setSource(&lexer, "var a = 10"); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENT(result.node, NODE_VARIABLE_DECLARATION); + } TEST_END(); + + TEST_BEGIN("Multiple statements on new lines") { + Lexer_setSource( + &lexer, + "var a = 10" LF + "var b = 20" LF + ); + result = Parser_parse(&parser); + + EXPECT_TRUE(result.success); + EXPECT_STATEMENTS(result.node, 2); + } TEST_END(); + + TEST_BEGIN("Multiple statements on same line") { + Lexer_setSource( + &lexer, + "var a = 10 var b = 20" LF + ); + result = Parser_parse(&parser); + + EXPECT_FALSE(result.success); + } TEST_END(); + +} From 46001098d0321380b559bb2d69f8af6db585cb17 Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Sat, 18 Nov 2023 16:09:53 +0100 Subject: [PATCH 67/69] Introduced new memory allocation function `mem_recalloc` --- include/allocator/MemoryAllocator.h | 2 ++ src/allocator/MemoryAllocator.c | 48 +++++++++++++++++++++++++---- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/include/allocator/MemoryAllocator.h b/include/allocator/MemoryAllocator.h index b4aa578..d2fdecc 100644 --- a/include/allocator/MemoryAllocator.h +++ b/include/allocator/MemoryAllocator.h @@ -9,6 +9,7 @@ enum AllocatorAction { MEMORY_ALLOC, MEMORY_CALLOC, MEMORY_REALLOC, + MEMORY_RECALLOC, MEMORY_FREE, MEMORY_CLEANUP }; @@ -16,6 +17,7 @@ enum AllocatorAction { void* mem_alloc(size_t size); void* mem_calloc(size_t nitems, size_t size); void* mem_realloc(void *ptr, size_t size); +void* mem_recalloc(void *ptr, size_t oldNitems, size_t nitems, size_t size); void mem_free(void *ptr); void* safe_malloc(size_t size); diff --git a/src/allocator/MemoryAllocator.c b/src/allocator/MemoryAllocator.c index 88a4baa..c2ce89c 100644 --- a/src/allocator/MemoryAllocator.c +++ b/src/allocator/MemoryAllocator.c @@ -37,7 +37,7 @@ void safe_free(void *ptr) { // Private -void* Allocator_memoryAction(void *ptr, size_t nitems, size_t size, enum AllocatorAction action) { +void* Allocator_memoryAction(void *ptr, size_t oldNitems, size_t nitems, size_t size, enum AllocatorAction action) { // Use static variable to avoid globals (has the same effect as a global variable tho, but not mentioned in the instructions ;) ) static PointerSet *set = NULL; @@ -103,6 +103,38 @@ void* Allocator_memoryAction(void *ptr, size_t nitems, size_t size, enum Allocat return newPtr; } break; + case MEMORY_RECALLOC: { + // assert(ptr != NULL); // realloc with NULL pointer acts like a malloc + assertf(size > 0, PREFIX "recalloc: Cannot allocate 0 bytes"); + + #ifdef ALLOCATOR_USE_DEFAULT + assertf(size < 0, PREFIX "recalloc: There's no recalloc implementation in the default allocator"); + #endif + + ptr && assertf(PointerSet_has(set, ptr), PREFIX "recalloc: Provided pointer has not been allocated by this allocator (Maybe used 'malloc()' instead of 'mem_alloc()'?)"); + + // Remove the pointer from the set + PointerSet_remove(set, ptr); + + // Allocate new zero-initialized memory + void *newPtr = safe_calloc(nitems, size); + + // Copy the data when the old pointer is not NULL + if(ptr) { + // Copy the data from the old pointer to the new one + memcpy(newPtr, ptr, (oldNitems > nitems ? nitems : oldNitems) * size); + + // Free the old pointer + safe_free(ptr); + } + + // Add the pointer to the set + PointerSet_add(set, newPtr); + + // Return the new pointer + return newPtr; + } break; + case MEMORY_FREE: { assertf(ptr != NULL, PREFIX "free: Cannot free NULL pointer"); @@ -136,23 +168,27 @@ void* Allocator_memoryAction(void *ptr, size_t nitems, size_t size, enum Allocat void Allocator_cleanup() { - Allocator_memoryAction(NULL, 0, 0, MEMORY_CLEANUP); + Allocator_memoryAction(NULL, 0, 0, 0, MEMORY_CLEANUP); } void* mem_alloc(size_t size) { - return Allocator_memoryAction(NULL, 0, size, MEMORY_ALLOC); + return Allocator_memoryAction(NULL, 0, 0, size, MEMORY_ALLOC); } void* mem_calloc(size_t nitems, size_t size) { - return Allocator_memoryAction(NULL, nitems, size, MEMORY_CALLOC); + return Allocator_memoryAction(NULL, 0, nitems, size, MEMORY_CALLOC); } void* mem_realloc(void *ptr, size_t size) { - return Allocator_memoryAction(ptr, 0, size, MEMORY_REALLOC); + return Allocator_memoryAction(ptr, 0, 0, size, MEMORY_REALLOC); +} + +void* mem_recalloc(void *ptr, size_t oldNitems, size_t nitems, size_t size) { + return Allocator_memoryAction(ptr, oldNitems, nitems, size, MEMORY_RECALLOC); } void mem_free(void *ptr) { - Allocator_memoryAction(ptr, 0, 0, MEMORY_FREE); + Allocator_memoryAction(ptr, 0, 0, 0, MEMORY_FREE); } #undef PREFIX From c5d4a452b2f8422764084ce7d8ad9518ce6dc2bf Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Sat, 18 Nov 2023 16:11:30 +0100 Subject: [PATCH 68/69] Fixed array resizing allocation bug --- src/internal/Array.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/internal/Array.c b/src/internal/Array.c index 18f9b47..76fdb73 100644 --- a/src/internal/Array.c +++ b/src/internal/Array.c @@ -67,10 +67,16 @@ void Array_set(Array *array, int index, void *value) { index = __Array_resolveIndex(array, index); - // If size is not enough to fit the index, resize the array - if((size_t)index >= array->size) { - Array_resize(array, array->capacity + (index - array->size) + 1); - array->size = index + 1; + size_t size = index + 1; + + // If there's not enough capacity to fit the index, resize the array + if(size > array->capacity) { + Array_resize(array, size); + } + + // If the index is past the end of the array, set the size to the index + if(size > array->size) { + array->size = size; } array->data[index] = value; @@ -108,16 +114,16 @@ void Array_clear(Array *array) { void Array_resize(Array *array, size_t capacity) { if(!array) return; - array->capacity = capacity; - if(capacity) { // Non-zero capacity => reallocate the array - array->data = mem_realloc(array->data, array->capacity * sizeof(void*)); + array->data = mem_recalloc(array->data, array->capacity, capacity, sizeof(void*)); } else { // Zero capacity => free the array and set it to NULL if(array->data) mem_free(array->data); array->data = NULL; } + + array->capacity = capacity; } void Array_reserve(Array *array, size_t capacity) { From 6236fad59b7f1981afbcffc4973bf5310278cf6d Mon Sep 17 00:00:00 2001 From: Jaroslav Louma Date: Sat, 18 Nov 2023 16:15:40 +0100 Subject: [PATCH 69/69] Added ability to specify a priority of the test file --- test/compiler/lexer/Lexer.test.c | 2 ++ test/compiler/parser/ExpressionParser.test.c | 1 + test/compiler/parser/Parser.test.c | 1 + test/internal/Array.test.c | 2 ++ test/internal/HashMap.test.c | 2 ++ test/internal/String.test.c | 2 ++ test/register_tests.js | 11 ++++++++++- 7 files changed, 20 insertions(+), 1 deletion(-) diff --git a/test/compiler/lexer/Lexer.test.c b/test/compiler/lexer/Lexer.test.c index b452bf2..be7ec60 100644 --- a/test/compiler/lexer/Lexer.test.c +++ b/test/compiler/lexer/Lexer.test.c @@ -2,6 +2,8 @@ #include "unit.h" #include +#define TEST_PRIORITY 90 + DESCRIBE(comment_stripping, "Comments stripping") { Lexer lexer; Lexer_constructor(&lexer); diff --git a/test/compiler/parser/ExpressionParser.test.c b/test/compiler/parser/ExpressionParser.test.c index a1f82d0..8fad9ff 100644 --- a/test/compiler/parser/ExpressionParser.test.c +++ b/test/compiler/parser/ExpressionParser.test.c @@ -4,5 +4,6 @@ #include "compiler/lexer/Lexer.h" #include "compiler/parser/Parser.h" +#define TEST_PRIORITY 80 // TODO: Tests for ExpressionParser internals here... diff --git a/test/compiler/parser/Parser.test.c b/test/compiler/parser/Parser.test.c index c225541..8b57a2b 100644 --- a/test/compiler/parser/Parser.test.c +++ b/test/compiler/parser/Parser.test.c @@ -7,6 +7,7 @@ #include "compiler/parser/Parser.h" #include "compiler/parser/ASTNodes.h" +#define TEST_PRIORITY 80 DESCRIBE(variable_declaration, "Variable declaration parsing") { Lexer lexer; diff --git a/test/internal/Array.test.c b/test/internal/Array.test.c index 459faba..b8f7b05 100644 --- a/test/internal/Array.test.c +++ b/test/internal/Array.test.c @@ -2,6 +2,8 @@ #include "unit.h" #include +#define TEST_PRIORITY 100 + DESCRIBE(get, "Array_get") { Array *arr = NULL; diff --git a/test/internal/HashMap.test.c b/test/internal/HashMap.test.c index 9fdb6e8..6c68de7 100644 --- a/test/internal/HashMap.test.c +++ b/test/internal/HashMap.test.c @@ -2,6 +2,8 @@ #include "unit.h" #include +#define TEST_PRIORITY 100 + DESCRIBE(map_alloc, "HashMap_alloc/HashMap_resize") { HashMap *map = NULL; diff --git a/test/internal/String.test.c b/test/internal/String.test.c index 1b863f4..2c04a48 100644 --- a/test/internal/String.test.c +++ b/test/internal/String.test.c @@ -2,6 +2,8 @@ #include "unit.h" #include +#define TEST_PRIORITY 100 + DESCRIBE(equals, "String_equals") { String *str = NULL; diff --git a/test/register_tests.js b/test/register_tests.js index b15ea2e..346fd43 100644 --- a/test/register_tests.js +++ b/test/register_tests.js @@ -18,7 +18,8 @@ const MAX_ID_LENGTH = C_MAX_ID_LENGTH - TEST_SUIT_PREFIX.length - 1; path: file, relative: path.relative(root, file), base: path.basename(file), - functions: /**@type {{name: string, description: string, line: number}[]}*/([]) + functions: /**@type {{name: string, description: string, line: number}[]}*/([]), + priority: 0 })); const testNames = new Set(); @@ -27,6 +28,11 @@ const MAX_ID_LENGTH = C_MAX_ID_LENGTH - TEST_SUIT_PREFIX.length - 1; const content = fs.readFileSync(file.path, "utf8"); const regex = /DESCRIBE\s*\(([_a-zA-Z][_a-zA-Z0-9]*)\s*,\s*("(?:\\"|.)*?")\)/g; // 32 - 1 - "unit__".length (6) = 25 + // Get priority of test file + const [m, priority] = content.match(/^\s*#define\s+TEST_PRIORITY\s+([+-]?[0-9]+)/m) || [null, "0"]; + file.priority = parseInt(priority); + + // Get all test functions let match; while((match = regex.exec(content)) !== null) { const [m, name, description] = match; @@ -44,6 +50,9 @@ const MAX_ID_LENGTH = C_MAX_ID_LENGTH - TEST_SUIT_PREFIX.length - 1; } } + // Sort files by priority + files.sort((a, b) => b.priority - a.priority); + fs.writeFileSync(path.join(__dirname, MAIN_FILENAME), `/** * @file test/${MAIN_FILENAME} * @author test/register_tests.js