From 8304f03d4a27387db735c189096626a002f1e206 Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Sun, 15 Sep 2024 19:44:08 +0100 Subject: [PATCH] structure: Restructure lexer & parser files --- {src => doc}/grammar.txt | 0 src/ast/mod.rs | 3 - src/error.rs | 2 +- src/ir.rs | 22 ++--- src/{ => lexer}/lexer.rs | 99 ------------------- src/lexer/mod.rs | 4 + src/lexer/tests.rs | 95 ++++++++++++++++++ src/lib.rs | 3 +- src/{ast/tree.rs => parser/ast.rs} | 152 ++--------------------------- src/parser/mod.rs | 4 + src/{ast => parser}/print.rs | 2 +- src/parser/tests.rs | 135 +++++++++++++++++++++++++ src/semantic.rs | 8 +- src/typecheck.rs | 2 +- 14 files changed, 265 insertions(+), 266 deletions(-) rename {src => doc}/grammar.txt (100%) delete mode 100644 src/ast/mod.rs rename src/{ => lexer}/lexer.rs (81%) create mode 100644 src/lexer/mod.rs create mode 100644 src/lexer/tests.rs rename src/{ast/tree.rs => parser/ast.rs} (80%) create mode 100644 src/parser/mod.rs rename src/{ast => parser}/print.rs (99%) create mode 100644 src/parser/tests.rs diff --git a/src/grammar.txt b/doc/grammar.txt similarity index 100% rename from src/grammar.txt rename to doc/grammar.txt diff --git a/src/ast/mod.rs b/src/ast/mod.rs deleted file mode 100644 index a054078..0000000 --- a/src/ast/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod print; -pub mod tree; -pub use tree::*; diff --git a/src/error.rs b/src/error.rs index f976e3e..65896d2 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,6 +1,6 @@ -use crate::ast::ParserError; use crate::codegen::CodegenError; use crate::lexer::LexerError; +use crate::parser::ast::ParserError; use crate::semantic::SemanticError; use crate::typecheck::TypeCheckError; use thiserror::Error; diff --git a/src/ir.rs b/src/ir.rs index 15faf81..f721a80 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -2,7 +2,7 @@ use std::fmt; use strum::EnumIs; -use crate::ast::{self, Statement}; +use crate::parser::ast; #[derive(Debug)] #[allow(dead_code)] @@ -162,13 +162,13 @@ impl Instruction { pub fn generate_from_statement(statement: ast::Statement, ctx: &mut IrCtx) -> Vec { match statement { - Statement::Return(expr) => { + ast::Statement::Return(expr) => { let (mut instructions, val) = Self::generate_from_expr(expr, ctx); instructions.push(Instruction::Return(val)); instructions } - Statement::Exp(expr) => Self::generate_from_expr(expr, ctx).0, - Statement::If(cond, then_stmt, else_statement) => { + ast::Statement::Exp(expr) => Self::generate_from_expr(expr, ctx).0, + ast::Statement::If(cond, then_stmt, else_statement) => { let end_label = ctx.label("jump_end"); let (mut instructions, cond_val) = Self::generate_from_expr(cond, ctx); @@ -187,20 +187,20 @@ impl Instruction { } instructions } - Statement::Compound(block) => Self::generate_from_block(block, ctx), - Statement::Break(label) => { + ast::Statement::Compound(block) => Self::generate_from_block(block, ctx), + ast::Statement::Break(label) => { vec![Instruction::Jump(Identifier::new(&format!( "break_{}", label.unwrap() )))] } - Statement::Continue(label) => { + ast::Statement::Continue(label) => { vec![Instruction::Jump(Identifier::new(&format!( "continue_{}", label.unwrap() )))] } - Statement::While(cond, body, label) => { + ast::Statement::While(cond, body, label) => { let start_label = Identifier::new(&format!("continue_{}", label.as_ref().unwrap())); let end_label = Identifier::new(&format!("break_{}", label.as_ref().unwrap())); @@ -215,7 +215,7 @@ impl Instruction { log::trace!("Emitting IR for while -> {:?}", instructions); instructions } - Statement::DoWhile(body, cond, label) => { + ast::Statement::DoWhile(body, cond, label) => { let start_label = Identifier::new(&format!("start_{}", label.as_ref().unwrap())); let break_label = Identifier::new(&format!("break_{}", label.as_ref().unwrap())); let continue_label = @@ -232,7 +232,7 @@ impl Instruction { log::trace!("Emitting IR for do-while -> {:?}", instructions); instructions } - Statement::For(init, cond, post, body, label) => { + ast::Statement::For(init, cond, post, body, label) => { let start_label = Identifier::new(&format!("start_{}", label.as_ref().unwrap())); let continue_label = Identifier::new(&format!("continue_{}", label.as_ref().unwrap())); @@ -256,7 +256,7 @@ impl Instruction { instructions.push(Instruction::Label(end_label)); instructions } - Statement::Null => vec![], + ast::Statement::Null => vec![], // _ => todo!(), } } diff --git a/src/lexer.rs b/src/lexer/lexer.rs similarity index 81% rename from src/lexer.rs rename to src/lexer/lexer.rs index 3b65045..6ee05af 100644 --- a/src/lexer.rs +++ b/src/lexer/lexer.rs @@ -306,102 +306,3 @@ pub fn run_lexer(source: String) -> LexerResult> { let mut lexer = Lexer::new(&source.trim()); lexer.tokenize() } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn tokenize_identifier() { - assert_eq!( - TokenKind::from_str("test"), - Some(TokenKind::Identifier("test".to_owned())) - ); - assert_eq!( - TokenKind::from_str("main"), - Some(TokenKind::Identifier("main".to_owned())) - ); - assert_eq!( - TokenKind::from_str("ma_n"), - Some(TokenKind::Identifier("ma_n".to_owned())) - ); - assert_eq!(TokenKind::from_str("53main"), None); - assert_eq!(TokenKind::from_str("ma.in"), None); - } - - #[test] - fn tokenize_constant() { - assert_eq!(TokenKind::from_str("66"), Some(TokenKind::Constant(66))); - assert_eq!(TokenKind::from_str("32"), Some(TokenKind::Constant(32))); - } - - #[test] - fn tokenize_void() { - assert_eq!(TokenKind::from_str("void").unwrap(), TokenKind::Void); - } - - #[test] - fn tokenize_return() { - assert_eq!(TokenKind::from_str("return").unwrap(), TokenKind::Return); - } - - #[test] - fn tokenize_decrement() { - assert_eq!(TokenKind::from_str("--").unwrap(), TokenKind::Decrement); - } - - #[test] - fn tokenize_complement() { - assert_eq!(TokenKind::from_str("~").unwrap(), TokenKind::Complement); - } - - #[test] - fn tokenize_semicolon() { - assert_eq!(TokenKind::from_str(";").unwrap(), TokenKind::Semicolon); - } - - #[test] - fn tokenize_brace_close() { - assert_eq!(TokenKind::from_str("}").unwrap(), TokenKind::BraceClose); - } - - #[test] - fn tokenize_brace_open() { - assert_eq!(TokenKind::from_str("{").unwrap(), TokenKind::BraceOpen); - } - - #[test] - fn tokenize_paren_close() { - assert_eq!(TokenKind::from_str(")").unwrap(), TokenKind::ParenClose); - } - - #[test] - fn tokenize_paren_open() { - assert_eq!(TokenKind::from_str("(").unwrap(), TokenKind::ParenOpen); - } - - #[test] - fn tokenize_minus() { - assert_eq!(TokenKind::from_str("-").unwrap(), TokenKind::Minus); - } - - #[test] - fn tokenize_plus() { - assert_eq!(TokenKind::from_str("+").unwrap(), TokenKind::Plus); - } - - #[test] - fn tokenize_asterisk() { - assert_eq!(TokenKind::from_str("*").unwrap(), TokenKind::Asterisk); - } - - #[test] - fn tokenize_slash() { - assert_eq!(TokenKind::from_str("/").unwrap(), TokenKind::Slash); - } - - #[test] - fn tokenize_percent() { - assert_eq!(TokenKind::from_str("%").unwrap(), TokenKind::Percent); - } -} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..c2a4eac --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,4 @@ +pub mod lexer; +pub use lexer::*; +#[cfg(test)] +mod tests; diff --git a/src/lexer/tests.rs b/src/lexer/tests.rs new file mode 100644 index 0000000..6b83baf --- /dev/null +++ b/src/lexer/tests.rs @@ -0,0 +1,95 @@ +use super::*; + +#[test] +fn tokenize_identifier() { + assert_eq!( + TokenKind::from_str("test"), + Some(TokenKind::Identifier("test".to_owned())) + ); + assert_eq!( + TokenKind::from_str("main"), + Some(TokenKind::Identifier("main".to_owned())) + ); + assert_eq!( + TokenKind::from_str("ma_n"), + Some(TokenKind::Identifier("ma_n".to_owned())) + ); + assert_eq!(TokenKind::from_str("53main"), None); + assert_eq!(TokenKind::from_str("ma.in"), None); +} + +#[test] +fn tokenize_constant() { + assert_eq!(TokenKind::from_str("66"), Some(TokenKind::Constant(66))); + assert_eq!(TokenKind::from_str("32"), Some(TokenKind::Constant(32))); +} + +#[test] +fn tokenize_void() { + assert_eq!(TokenKind::from_str("void").unwrap(), TokenKind::Void); +} + +#[test] +fn tokenize_return() { + assert_eq!(TokenKind::from_str("return").unwrap(), TokenKind::Return); +} + +#[test] +fn tokenize_decrement() { + assert_eq!(TokenKind::from_str("--").unwrap(), TokenKind::Decrement); +} + +#[test] +fn tokenize_complement() { + assert_eq!(TokenKind::from_str("~").unwrap(), TokenKind::Complement); +} + +#[test] +fn tokenize_semicolon() { + assert_eq!(TokenKind::from_str(";").unwrap(), TokenKind::Semicolon); +} + +#[test] +fn tokenize_brace_close() { + assert_eq!(TokenKind::from_str("}").unwrap(), TokenKind::BraceClose); +} + +#[test] +fn tokenize_brace_open() { + assert_eq!(TokenKind::from_str("{").unwrap(), TokenKind::BraceOpen); +} + +#[test] +fn tokenize_paren_close() { + assert_eq!(TokenKind::from_str(")").unwrap(), TokenKind::ParenClose); +} + +#[test] +fn tokenize_paren_open() { + assert_eq!(TokenKind::from_str("(").unwrap(), TokenKind::ParenOpen); +} + +#[test] +fn tokenize_minus() { + assert_eq!(TokenKind::from_str("-").unwrap(), TokenKind::Minus); +} + +#[test] +fn tokenize_plus() { + assert_eq!(TokenKind::from_str("+").unwrap(), TokenKind::Plus); +} + +#[test] +fn tokenize_asterisk() { + assert_eq!(TokenKind::from_str("*").unwrap(), TokenKind::Asterisk); +} + +#[test] +fn tokenize_slash() { + assert_eq!(TokenKind::from_str("/").unwrap(), TokenKind::Slash); +} + +#[test] +fn tokenize_percent() { + assert_eq!(TokenKind::from_str("%").unwrap(), TokenKind::Percent); +} diff --git a/src/lib.rs b/src/lib.rs index 8587482..fac1b3e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,17 +8,18 @@ use std::process::Command; use strum::EnumIs; use synoptic; -pub mod ast; pub mod codegen; pub mod error; pub mod ir; pub mod lexer; #[cfg(feature = "llvm")] pub mod llvm_ir; +pub mod parser; pub mod semantic; pub mod typecheck; use cfg_if::cfg_if; +use parser::ast; use lexer::TokenKind; diff --git a/src/ast/tree.rs b/src/parser/ast.rs similarity index 80% rename from src/ast/tree.rs rename to src/parser/ast.rs index 42745f3..ce15686 100644 --- a/src/ast/tree.rs +++ b/src/parser/ast.rs @@ -63,7 +63,7 @@ pub struct FunctionDeclaration { } impl FunctionDeclaration { - fn parse(tokens: &mut VecDeque) -> ParserResult { + pub fn parse(tokens: &mut VecDeque) -> ParserResult { log_trace("parsing function from", tokens); let return_type = expect_token(TokenKind::Int, tokens)?; @@ -155,7 +155,7 @@ pub enum BlockItem { } impl BlockItem { - fn parse(tokens: &mut VecDeque) -> ParserResult { + pub fn parse(tokens: &mut VecDeque) -> ParserResult { log_trace("parsing block item from", tokens); let token = tokens.front().unwrap().to_owned(); @@ -252,7 +252,7 @@ pub enum Statement { } impl Statement { - fn parse(tokens: &mut VecDeque) -> ParserResult { + pub fn parse(tokens: &mut VecDeque) -> ParserResult { log_trace("Trying statement from", tokens); let token = tokens.front().unwrap().to_owned(); @@ -386,7 +386,7 @@ pub enum Expression { } impl Expression { - fn parse(tokens: &mut VecDeque, min_precedence: u32) -> ParserResult { + pub fn parse(tokens: &mut VecDeque, min_precedence: u32) -> ParserResult { log_trace("Trying expr from", tokens); if tokens.len() == 0 { @@ -420,7 +420,7 @@ impl Expression { Ok(left) } - fn parse_factor(tokens: &mut VecDeque) -> ParserResult { + pub fn parse_factor(tokens: &mut VecDeque) -> ParserResult { log_trace("Trying factor from", tokens); if tokens.len() == 0 { @@ -477,7 +477,7 @@ impl Expression { Err(ParserError::MalformedExpression) } - fn parse_optional(tokens: &mut VecDeque) -> ParserResult> { + pub fn parse_optional(tokens: &mut VecDeque) -> ParserResult> { let expr = Expression::parse(tokens, 0); if let Ok(expr) = expr { Ok(Some(expr)) @@ -549,7 +549,7 @@ pub enum UnaryOperator { } impl UnaryOperator { - fn parse(tokens: &mut VecDeque) -> ParserResult { + pub fn parse(tokens: &mut VecDeque) -> ParserResult { let token = tokens.pop_front().unwrap(); match token { @@ -609,141 +609,3 @@ fn expect_token(expected: TokenKind, tokens: &mut VecDeque) -> Parser Ok(tokens.pop_front().unwrap()) } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_program() { - let tokens = vec![ - TokenKind::Int, - TokenKind::Identifier("main".to_owned()), - TokenKind::ParenOpen, - TokenKind::Void, - TokenKind::ParenClose, - TokenKind::BraceOpen, - TokenKind::Return, - TokenKind::Constant(7), - TokenKind::Semicolon, - TokenKind::BraceClose, - ]; - - let function_expected = FunctionDeclaration { - name: Identifier::new("main"), - params: vec![], - return_type: "Int".to_owned(), - body: Some(Block { - body: vec![BlockItem::Stmt(Statement::Return(Expression::Constant(7)))], - }), - }; - - let program_expected = Program { - body: vec![function_expected], - }; - - assert_eq!(Program::parse(tokens).unwrap(), program_expected); - } - - #[test] - fn parse_function() { - let mut tokens = VecDeque::from([ - TokenKind::Int, - TokenKind::Identifier("main".to_owned()), - TokenKind::ParenOpen, - TokenKind::Void, - TokenKind::ParenClose, - TokenKind::BraceOpen, - TokenKind::Return, - TokenKind::Constant(6), - TokenKind::Semicolon, - TokenKind::BraceClose, - ]); - - let function_expected = FunctionDeclaration { - name: Identifier::new("main"), - params: vec![], - return_type: "Int".to_owned(), - body: Some(Block { - body: vec![BlockItem::Stmt(Statement::Return(Expression::Constant(6)))], - }), - }; - - assert_eq!( - FunctionDeclaration::parse(&mut tokens).unwrap(), - function_expected - ); - assert!(tokens.is_empty()); - } - - #[test] - fn parse_statement_return() { - let mut tokens = VecDeque::from([ - TokenKind::Return, - TokenKind::Constant(6), - TokenKind::Semicolon, - ]); - assert_eq!( - Statement::parse(&mut tokens).unwrap(), - Statement::Return(Expression::Constant(6)) - ); - assert!(tokens.is_empty()); - } - - #[test] - fn parse_expression_factor_constant() { - let mut tokens = VecDeque::from([TokenKind::Constant(3)]); - assert_eq!( - Expression::parse_factor(&mut tokens).unwrap(), - Expression::Constant(3) - ); - assert!(tokens.is_empty()); - } - - #[test] - fn parse_expression_factor_unary() { - let mut tokens = VecDeque::from([TokenKind::Minus, TokenKind::Constant(2)]); - - let expr = Expression::parse_factor(&mut tokens).unwrap(); - let expected = - Expression::Unary(UnaryOperator::Negation, Box::new(Expression::Constant(2))); - assert_eq!(expr, expected); - assert!(tokens.is_empty()); - } - - #[test] - fn parse_expression_unary_nested() { - let mut tokens = VecDeque::from([ - TokenKind::Complement, - TokenKind::ParenOpen, - TokenKind::Minus, - TokenKind::Constant(4), - TokenKind::ParenClose, - ]); - - let expr = Expression::parse_factor(&mut tokens).unwrap(); - let expected = Expression::Unary( - UnaryOperator::Complement, - Box::new(Expression::Unary( - UnaryOperator::Negation, - Box::new(Expression::Constant(4)), - )), - ); - assert_eq!(expr, expected); - assert!(tokens.is_empty()); - } - - #[test] - fn parse_unary() { - let mut tokens = VecDeque::from([TokenKind::Complement, TokenKind::Minus]); - assert_eq!( - UnaryOperator::parse(&mut tokens).unwrap(), - UnaryOperator::Complement - ); - assert_eq!( - UnaryOperator::parse(&mut tokens).unwrap(), - UnaryOperator::Negation - ); - assert!(tokens.is_empty()); - } -} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..b2db0bc --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,4 @@ +pub mod ast; +pub mod print; +#[cfg(test)] +mod tests; diff --git a/src/ast/print.rs b/src/parser/print.rs similarity index 99% rename from src/ast/print.rs rename to src/parser/print.rs index e501aa0..0a1dfd8 100644 --- a/src/ast/print.rs +++ b/src/parser/print.rs @@ -1,4 +1,4 @@ -use super::tree::*; +use super::ast::*; use std::fmt; static INDENT: &str = " "; diff --git a/src/parser/tests.rs b/src/parser/tests.rs new file mode 100644 index 0000000..f478c27 --- /dev/null +++ b/src/parser/tests.rs @@ -0,0 +1,135 @@ +use super::ast::*; +use crate::lexer::*; +use std::collections::VecDeque; + +#[test] +fn parse_program() { + let tokens = vec![ + TokenKind::Int, + TokenKind::Identifier("main".to_owned()), + TokenKind::ParenOpen, + TokenKind::Void, + TokenKind::ParenClose, + TokenKind::BraceOpen, + TokenKind::Return, + TokenKind::Constant(7), + TokenKind::Semicolon, + TokenKind::BraceClose, + ]; + + let function_expected = FunctionDeclaration { + name: Identifier::new("main"), + params: vec![], + return_type: "Int".to_owned(), + body: Some(Block { + body: vec![BlockItem::Stmt(Statement::Return(Expression::Constant(7)))], + }), + }; + + let program_expected = Program { + body: vec![function_expected], + }; + + assert_eq!(Program::parse(tokens).unwrap(), program_expected); +} + +#[test] +fn parse_function() { + let mut tokens = VecDeque::from([ + TokenKind::Int, + TokenKind::Identifier("main".to_owned()), + TokenKind::ParenOpen, + TokenKind::Void, + TokenKind::ParenClose, + TokenKind::BraceOpen, + TokenKind::Return, + TokenKind::Constant(6), + TokenKind::Semicolon, + TokenKind::BraceClose, + ]); + + let function_expected = FunctionDeclaration { + name: Identifier::new("main"), + params: vec![], + return_type: "Int".to_owned(), + body: Some(Block { + body: vec![BlockItem::Stmt(Statement::Return(Expression::Constant(6)))], + }), + }; + + assert_eq!( + FunctionDeclaration::parse(&mut tokens).unwrap(), + function_expected + ); + assert!(tokens.is_empty()); +} + +#[test] +fn parse_statement_return() { + let mut tokens = VecDeque::from([ + TokenKind::Return, + TokenKind::Constant(6), + TokenKind::Semicolon, + ]); + assert_eq!( + Statement::parse(&mut tokens).unwrap(), + Statement::Return(Expression::Constant(6)) + ); + assert!(tokens.is_empty()); +} + +#[test] +fn parse_expression_factor_constant() { + let mut tokens = VecDeque::from([TokenKind::Constant(3)]); + assert_eq!( + Expression::parse_factor(&mut tokens).unwrap(), + Expression::Constant(3) + ); + assert!(tokens.is_empty()); +} + +#[test] +fn parse_expression_factor_unary() { + let mut tokens = VecDeque::from([TokenKind::Minus, TokenKind::Constant(2)]); + + let expr = Expression::parse_factor(&mut tokens).unwrap(); + let expected = Expression::Unary(UnaryOperator::Negation, Box::new(Expression::Constant(2))); + assert_eq!(expr, expected); + assert!(tokens.is_empty()); +} + +#[test] +fn parse_expression_unary_nested() { + let mut tokens = VecDeque::from([ + TokenKind::Complement, + TokenKind::ParenOpen, + TokenKind::Minus, + TokenKind::Constant(4), + TokenKind::ParenClose, + ]); + + let expr = Expression::parse_factor(&mut tokens).unwrap(); + let expected = Expression::Unary( + UnaryOperator::Complement, + Box::new(Expression::Unary( + UnaryOperator::Negation, + Box::new(Expression::Constant(4)), + )), + ); + assert_eq!(expr, expected); + assert!(tokens.is_empty()); +} + +#[test] +fn parse_unary() { + let mut tokens = VecDeque::from([TokenKind::Complement, TokenKind::Minus]); + assert_eq!( + UnaryOperator::parse(&mut tokens).unwrap(), + UnaryOperator::Complement + ); + assert_eq!( + UnaryOperator::parse(&mut tokens).unwrap(), + UnaryOperator::Negation + ); + assert!(tokens.is_empty()); +} diff --git a/src/semantic.rs b/src/semantic.rs index b566e74..cc3be6e 100644 --- a/src/semantic.rs +++ b/src/semantic.rs @@ -1,4 +1,4 @@ -use crate::ast::{self, *}; +use crate::parser::ast; use std::collections::HashMap; use strum_macros::EnumIs; use thiserror::Error; @@ -119,7 +119,7 @@ impl ast::Program { .body .into_iter() .map(|f| f.resolve(ctx, &mut identifiers)) - .collect::>>()?, + .collect::>>()?, }) } @@ -129,7 +129,7 @@ impl ast::Program { .body .into_iter() .map(|f| f.label(ctx)) - .collect::>>()?, + .collect::>>()?, }) } } @@ -485,7 +485,7 @@ impl ast::Expression { let args = args .into_iter() .map(|arg| arg.resolve(ctx, identifiers)) - .collect::>>()?; + .collect::>>()?; Self::FunctionCall(ident, args) } else { return Err(SemanticError::UndeclaredFunction(name.to_string())); diff --git a/src/typecheck.rs b/src/typecheck.rs index cc47923..e3ecede 100644 --- a/src/typecheck.rs +++ b/src/typecheck.rs @@ -1,4 +1,4 @@ -use crate::ast::*; +use crate::parser::ast::*; use std::collections::HashMap; use strum_macros::EnumIs; use thiserror::Error;