From 338e3d66abc443fb7200e0f301752f8ea5e7ab8a Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Wed, 14 Aug 2024 01:20:25 +0100 Subject: [PATCH] feat: Add compound statements --- .gitignore | 6 +-- samples/compound.c | 15 ++++++ samples/conditional.c | 8 +++ src/ast.rs | 78 ++++++++++++++++++--------- src/grammar.txt | 8 ++- src/ir.rs | 35 +++++++----- src/lib.rs | 2 + src/semantic.rs | 121 ++++++++++++++++++++++++++++++------------ 8 files changed, 195 insertions(+), 78 deletions(-) create mode 100644 samples/compound.c create mode 100644 samples/conditional.c diff --git a/.gitignore b/.gitignore index 0bc3e5c..f875195 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,9 @@ # Ignore all samples/* - # Unignore all with extensions -samples/!*.* - +!samples/*.* # Unignore all dirs -samples/!*/ +!samples/*/ .vim/ .zed/ diff --git a/samples/compound.c b/samples/compound.c new file mode 100644 index 0000000..880bd60 --- /dev/null +++ b/samples/compound.c @@ -0,0 +1,15 @@ +int main(void) { + int a = 0; + if (a) { + int b = 2; + return b; + } else { + int c = 3; + if (a < c) { + return !a; + } else { + return 5; + } + } + return a; +} \ No newline at end of file diff --git a/samples/conditional.c b/samples/conditional.c new file mode 100644 index 0000000..ddf5dfd --- /dev/null +++ b/samples/conditional.c @@ -0,0 +1,8 @@ +int main(void) { + int a = 4; + int b = (a == 4) ? 8 : 2; + if (b == 6) + return 9; + else + return 3; +} diff --git a/src/ast.rs b/src/ast.rs index 1e5a584..b54fb27 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -5,6 +5,7 @@ use std::fmt; use std::mem::discriminant; use strum_macros::{Display, EnumIs}; +#[inline(always)] fn expect_token(expected: TokenKind, tokens: &mut VecDeque) -> TokenKind { let exp = discriminant(&expected); let actual = discriminant(&tokens[0]); @@ -42,30 +43,13 @@ impl Program { } } -#[derive(Debug, PartialEq, Clone)] +#[derive(Debug, PartialEq, Clone, DisplayTree)] #[allow(dead_code)] pub struct Function { pub name: Identifier, pub return_type: String, - pub body: Vec, -} - -impl DisplayTree for Function { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>, style: display_tree::Style) -> std::fmt::Result { - writeln!(f, "{} {}", self.return_type, self.name)?; - for block in &self.body { - writeln!( - f, - "{}{} {}", - style.char_set.connector, - std::iter::repeat(style.char_set.horizontal) - .take(style.indentation as usize) - .collect::(), - display_tree::format_tree!(*block) - )?; - } - Ok(()) - } + #[tree] + pub body: Block, } impl Function { @@ -77,19 +61,52 @@ impl Function { expect_token(TokenKind::ParenOpen, tokens); expect_token(TokenKind::Void, tokens); expect_token(TokenKind::ParenClose, tokens); + + let body = Block::parse(tokens); + + Function { + name, + return_type: return_type.to_string(), + body, + } + } +} + +#[derive(Debug, PartialEq, Clone)] +#[allow(dead_code)] +pub struct Block { + pub body: Vec, +} + +impl Block { + fn parse(tokens: &mut VecDeque) -> Self { expect_token(TokenKind::BraceOpen, tokens); let mut body = vec![]; while !tokens.front().unwrap().to_owned().is_brace_close() { body.push(BlockItem::parse(tokens)); } + expect_token(TokenKind::BraceClose, tokens); - Function { - name, - return_type: return_type.to_string(), - body, + Block { body } + } +} + +impl DisplayTree for Block { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>, style: display_tree::Style) -> std::fmt::Result { + for basic_block in &self.body { + writeln!( + f, + "{}{} {}", + style.char_set.connector, + std::iter::repeat(style.char_set.horizontal) + .take(style.indentation as usize) + .collect::(), + display_tree::format_tree!(*basic_block) + )?; } + Ok(()) } } @@ -170,6 +187,7 @@ pub enum Statement { // TODO: manually implement DisplayTree #[ignore_field] Option>, ), + Compound(#[tree] Block), Null, } @@ -205,6 +223,10 @@ impl Statement { }; Self::If(cond, then_stmt, else_stmt) } + TokenKind::BraceOpen => { + let block = Block::parse(tokens); + Self::Compound(block) + } _ => { let exp = Self::Exp(Expression::parse(tokens, 0)); expect_token(TokenKind::Semicolon, tokens); @@ -430,7 +452,9 @@ mod tests { let function_expected = Function { name: Identifier::new("main"), return_type: "Int".to_owned(), - body: vec![BlockItem::Stmt(Statement::Return(Expression::Constant(7)))], + body: Block { + body: vec![BlockItem::Stmt(Statement::Return(Expression::Constant(7)))], + }, }; let program_expected = Program { @@ -458,7 +482,9 @@ mod tests { let function_expected = Function { name: Identifier::new("main"), return_type: "Int".to_owned(), - body: vec![BlockItem::Stmt(Statement::Return(Expression::Constant(6)))], + body: Block { + body: vec![BlockItem::Stmt(Statement::Return(Expression::Constant(6)))], + }, }; assert_eq!(Function::parse(&mut tokens), function_expected); diff --git a/src/grammar.txt b/src/grammar.txt index 8c9f4a2..54c301e 100644 --- a/src/grammar.txt +++ b/src/grammar.txt @@ -3,12 +3,14 @@ AST Definition ========================== program = Program(function_definition) -function_definition = Function(identifier name, block_item* body) +function_definition = Function(identifier name, block body) +block = Block(block_item*) block_item = S(statement) | D(declaration) declaration = Declaration(identifier name, exp? init) statement = Return(exp) | Expression(exp) | If(exp condition, statement then, statement? else) + | Compound(block) | Null exp = Constant(int) | Var(identifier) @@ -25,12 +27,14 @@ Grammar ========================== ::= - ::= "int" "(" "void" ")" "{" {} "}" + ::= "int" "(" "void" ")" + ::= "{" {} "}" ::= | ::= "int" ["=" ] ";" ::= "return" ";" | ";" | "if" "(" ")" ["else" ] + | | ";" ::= | | "?" ":" ::= | | | "(" ")" diff --git a/src/ir.rs b/src/ir.rs index 6450674..f719460 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -76,10 +76,8 @@ impl Function { Function { name: Identifier::generate(function.name), return_type: function.return_type, - instructions: function - .body + instructions: Instruction::generate_from_block(function.body, ctx) .into_iter() - .flat_map(|block| Instruction::generate_from_block(block, ctx)) // Implicit return 0 at the end of each function .chain([Instruction::Return(Val::Constant(0))]) .collect(), @@ -101,8 +99,16 @@ pub enum Instruction { } impl Instruction { - pub fn generate_from_block(block: ast::BlockItem, ctx: &mut IrCtx) -> Vec { - match block { + pub fn generate_from_block(block: ast::Block, ctx: &mut IrCtx) -> Vec { + block + .body + .into_iter() + .flat_map(|block| Instruction::generate_from_basic_block(block, ctx)) + .collect() + } + + pub fn generate_from_basic_block(basic_block: ast::BlockItem, ctx: &mut IrCtx) -> Vec { + match basic_block { ast::BlockItem::Stmt(statement) => Self::generate_from_statement(statement, ctx), ast::BlockItem::Decl(declaration) => Self::generate_from_declaration(declaration, ctx), } @@ -145,6 +151,7 @@ impl Instruction { } instructions } + Statement::Compound(block) => Self::generate_from_block(block, ctx), Statement::Null => vec![], // _ => todo!(), } @@ -417,12 +424,14 @@ mod tests { body: ast::Function { name: ast::Identifier::new("main"), return_type: "Int".to_owned(), - body: vec![ast::BlockItem::Stmt(ast::Statement::Return( - ast::Expression::Unary( - ast::UnaryOperator::Negation, - Box::new(ast::Expression::Constant(5)), - ), - ))], + body: ast::Block { + body: vec![ast::BlockItem::Stmt(ast::Statement::Return( + ast::Expression::Unary( + ast::UnaryOperator::Negation, + Box::new(ast::Expression::Constant(5)), + ), + ))], + }, }, }; @@ -446,7 +455,9 @@ mod tests { let ast_fn = ast::Function { name: ast::Identifier::new("main"), return_type: "Int".to_owned(), - body: vec![ast::BlockItem::Stmt(stmt.clone())], + body: ast::Block { + body: vec![ast::BlockItem::Stmt(stmt.clone())], + }, }; let expected = Function { diff --git a/src/lib.rs b/src/lib.rs index 58836ef..944978d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +#![feature(let_chains)] + use display_tree::format_tree; use std::fs; use std::io::{Error, Write}; diff --git a/src/semantic.rs b/src/semantic.rs index 951de39..a080933 100644 --- a/src/semantic.rs +++ b/src/semantic.rs @@ -3,17 +3,15 @@ use std::collections::HashMap; static PANIC_STAGE: &str = "Semantic Analysis Error"; +type VariableMap = HashMap; + pub struct SemanticCtx { unique_var_id: u64, - variables: HashMap, } impl SemanticCtx { pub fn new() -> Self { - Self { - unique_var_id: 0, - variables: HashMap::new(), - } + Self { unique_var_id: 0 } } pub fn gen_unique_ident(&mut self, ident: &ast::Identifier) -> ast::Identifier { @@ -22,9 +20,13 @@ impl SemanticCtx { ast::Identifier::new(format!("{}.{}", ident, id).as_str()) } - pub fn get_unique_ident(&self, ident: &ast::Identifier) -> ast::Identifier { - if let Some(unique_ident) = self.variables.get(&ident.to_string()) { - ast::Identifier::new(unique_ident) + pub fn get_unique_ident( + &self, + ident: &ast::Identifier, + variables: &mut VariableMap, + ) -> ast::Identifier { + if let Some(unique_ident_entry) = variables.get(&ident.to_string()) { + ast::Identifier::new(&unique_ident_entry.name) } else { log::error!("Use of undeclared variable {}", ident); panic!("{}", PANIC_STAGE) @@ -32,6 +34,25 @@ impl SemanticCtx { } } +#[derive(Debug, PartialEq, Hash, Clone)] +pub struct VariableMapEntry { + name: String, + from_current_block: bool, +} + +impl VariableMapEntry { + pub fn new(name: String) -> Self { + Self { + name, + from_current_block: true, + } + } + + pub fn unset_from_current_block(&mut self) { + self.from_current_block = false; + } +} + impl ast::Program { pub fn validate(self) -> Self { let mut ctx = SemanticCtx::new(); @@ -43,35 +64,55 @@ impl ast::Program { impl ast::Function { pub fn validate(self, ctx: &mut SemanticCtx) -> Self { + let mut variables: VariableMap = HashMap::new(); Self { name: self.name, return_type: self.return_type, - body: self.body.into_iter().map(|b| b.validate(ctx)).collect(), + body: self.body.validate(ctx, &mut variables), + } + } +} + +impl ast::Block { + pub fn validate(self, ctx: &mut SemanticCtx, variables: &mut VariableMap) -> Self { + Self { + body: self + .body + .into_iter() + .map(|b| b.validate(ctx, variables)) + .collect(), } } } impl ast::BlockItem { - pub fn validate(self, ctx: &mut SemanticCtx) -> Self { + pub fn validate(self, ctx: &mut SemanticCtx, variables: &mut VariableMap) -> Self { match self { - Self::Decl(declaration) => Self::Decl(declaration.validate(ctx)), - Self::Stmt(statement) => Self::Stmt(statement.validate(ctx)), + Self::Decl(declaration) => Self::Decl(declaration.validate(ctx, variables)), + Self::Stmt(statement) => Self::Stmt(statement.validate(ctx, variables)), } } } impl ast::Declaration { - pub fn validate(self, ctx: &mut SemanticCtx) -> Self { - if ctx.variables.contains_key(&self.name.to_string()) { - panic!("Duplicate variable declaration"); + pub fn validate(self, ctx: &mut SemanticCtx, variables: &mut VariableMap) -> Self { + let existing = variables.get(&self.name.to_string()); + + if let Some(entry) = existing + && entry.from_current_block + { + panic!("Duplicate variable declaration, {:?}", entry); } + let name = ctx.gen_unique_ident(&self.name); - ctx.variables - .insert(self.name.to_string(), name.to_string()); + variables.insert( + self.name.to_string(), + VariableMapEntry::new(name.to_string()), + ); let init = if let Some(exp) = self.init { - Some(exp.validate(ctx)) + Some(exp.validate(ctx, variables)) } else { None }; @@ -81,19 +122,26 @@ impl ast::Declaration { } impl ast::Statement { - pub fn validate(self, ctx: &mut SemanticCtx) -> Self { + pub fn validate(self, ctx: &mut SemanticCtx, variables: &mut VariableMap) -> Self { match self { - Self::Return(exp) => Self::Return(exp.validate(ctx)), - Self::Exp(exp) => Self::Exp(exp.validate(ctx)), + Self::Return(exp) => Self::Return(exp.validate(ctx, variables)), + Self::Exp(exp) => Self::Exp(exp.validate(ctx, variables)), Self::If(cond, then_stmt, else_stmt) => Self::If( - cond.validate(ctx), - Box::new(then_stmt.validate(ctx)), + cond.validate(ctx, variables), + Box::new(then_stmt.validate(ctx, variables)), if let Some(stmt) = else_stmt { - Some(Box::new(stmt.validate(ctx))) + Some(Box::new(stmt.validate(ctx, variables))) } else { None }, ), + Self::Compound(block) => { + let mut new_variables: VariableMap = variables.clone(); + for (_, v) in new_variables.iter_mut() { + v.unset_from_current_block(); + } + Self::Compound(block.validate(ctx, &mut new_variables)) + } Self::Null => self, // _ => todo!(), } @@ -101,25 +149,30 @@ impl ast::Statement { } impl ast::Expression { - pub fn validate(self, ctx: &mut SemanticCtx) -> Self { + pub fn validate(self, ctx: &mut SemanticCtx, variables: &mut VariableMap) -> Self { match self { Self::Assignment(left, right) => { if let Self::Var(_) = &*left { - Self::Assignment(Box::new(left.validate(ctx)), Box::new(right.validate(ctx))) + Self::Assignment( + Box::new(left.validate(ctx, variables)), + Box::new(right.validate(ctx, variables)), + ) } else { log::error!("Invalid assignment lvalue {:?}", left); panic!("{}", PANIC_STAGE) } } - Self::Var(ident) => Self::Var(ctx.get_unique_ident(&ident)), - Self::Unary(op, expr) => Self::Unary(op, Box::new(expr.validate(ctx))), - Self::Binary(op, e1, e2) => { - Self::Binary(op, Box::new(e1.validate(ctx)), Box::new(e2.validate(ctx))) - } + Self::Var(ident) => Self::Var(ctx.get_unique_ident(&ident, variables)), + Self::Unary(op, expr) => Self::Unary(op, Box::new(expr.validate(ctx, variables))), + Self::Binary(op, e1, e2) => Self::Binary( + op, + Box::new(e1.validate(ctx, variables)), + Box::new(e2.validate(ctx, variables)), + ), Self::Conditional(cond, then_exp, else_exp) => Self::Conditional( - Box::new(cond.validate(ctx)), - Box::new(then_exp.validate(ctx)), - Box::new(else_exp.validate(ctx)), + Box::new(cond.validate(ctx, variables)), + Box::new(then_exp.validate(ctx, variables)), + Box::new(else_exp.validate(ctx, variables)), ), Self::Constant(_) => self, // _ => todo!(),