diff --git a/README.md b/README.md index 0daa022..4215999 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,11 @@ OPTIONS: ### Examples See the `examples` or `test` directory! The official [B reference manual](https://www.bell-labs.com/usr/dmr/www/bref.html) and [B tutorial](https://www.bell-labs.com/usr/dmr/www/btut.html) explain all the supported functionality. + +Use this commands to run compiler tests: +``` +cargo run -- -r run_tests.b +``` ### Standard Library There are some standard library functions defined in `assets`. They aren't bundled with the release for now, so if you want to use them you'll have to download them. diff --git a/assets/memory.b b/assets/memory.b index 17732bb..65cf661 100644 --- a/assets/memory.b +++ b/assets/memory.b @@ -1,3 +1,6 @@ +/* Increase the heap size in increments of 32KiB */ +#define _HEAP_INCREMENT 077777; + _heapBegin 0; _heapEnd 0; @@ -9,7 +12,7 @@ malloc(count) { extrn _heapBegin, _heapEnd; if (_heapBegin == 0) { _heapBegin = syscall(12, 0); - _heapEnd = syscall(12, _heapBegin + 077777); + _heapEnd = syscall(12, _heapBegin + _HEAP_INCREMENT); /* Slot header: (size << 1) + occupiedBit */ /* size=0 and occupied=0 indicate the end of the chunk list */ *_heapBegin = 0; @@ -33,8 +36,7 @@ malloc(count) { ptrEnd = ptr + byteCount + 8; if (ptrEnd >= _heapEnd) { - /* Increase the heap size in increments of 32KiB */ - _heapEnd = syscall(12, (ptrEnd + 077777) & ~077777); + _heapEnd = syscall(12, (ptrEnd + _HEAP_INCREMENT) & ~_HEAP_INCREMENT); } /* Set a new null terminator */ *ptrEnd = 0; diff --git a/run_tests.b b/run_tests.b index 7f206f0..850718d 100644 --- a/run_tests.b +++ b/run_tests.b @@ -49,6 +49,7 @@ main() { it =+ reclen; } } + printf("Great success!*n"); } print_divider(len) { @@ -56,4 +57,4 @@ print_divider(len) { putchar('='); } putchar('*n'); -} \ No newline at end of file +} diff --git a/src/ast.rs b/src/ast.rs index 1be595e..49a6791 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -7,8 +7,8 @@ pub struct Pos { impl Pos { pub fn new(offset: usize, file_id: usize) -> Pos { Pos { - offset: offset, - file_id: file_id, + offset, + file_id, } } } @@ -23,16 +23,14 @@ impl CompErr { pub fn err(pos: &Pos, message: String) -> Result { Err(CompErr { pos: Some(pos.clone()), - message: message, + message, }) } - pub fn from_io_res( - io_res: Result - ) -> Result { + pub fn from_io_res(io_res: Result) -> Result { match io_res { Ok(result) => Ok(result), - Err(err) => Err(CompErr { + Err(err) => Err(CompErr { pos: None, message: err.to_string(), }), @@ -54,10 +52,10 @@ pub struct RootStatements { impl RootStatements { pub fn new() -> RootStatements { RootStatements { - functions: vec!(), - variables: vec!(), - imports: vec!(), - defines: vec!(), + functions: vec![], + variables: vec![], + imports: vec![], + defines: vec![], } } } @@ -147,26 +145,26 @@ pub enum Expr { Reference(Pos, String), Dereference(Pos, Box), } - + impl GetPos for Expr { fn pos(&self) -> Pos { match self { - Expr::Id(pos, _) => pos.clone(), - Expr::Str(pos, _) => pos.clone(), - Expr::Call(pos, _, _) => pos.clone(), - Expr::Int(pos, _) => pos.clone(), - Expr::Assignment(pos, _, _) => pos.clone(), + Expr::Id(pos, _) => pos.clone(), + Expr::Str(pos, _) => pos.clone(), + Expr::Call(pos, _, _) => pos.clone(), + Expr::Int(pos, _) => pos.clone(), + Expr::Assignment(pos, _, _) => pos.clone(), Expr::DerefAssignment(pos, _, _) => pos.clone(), - Expr::UnaryOperator(pos, _, _) => pos.clone(), - Expr::BinOperator(pos, _, _, _) => pos.clone(), - Expr::Reference(pos, _) => pos.clone(), - Expr::Dereference(pos, _) => pos.clone(), - Expr::Cond(pos, _, _, _) => pos.clone(), + Expr::UnaryOperator(pos, _, _) => pos.clone(), + Expr::BinOperator(pos, _, _, _) => pos.clone(), + Expr::Reference(pos, _) => pos.clone(), + Expr::Dereference(pos, _) => pos.clone(), + Expr::Cond(pos, _, _, _) => pos.clone(), } } } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq)] pub enum BinOp { Assign(Option>), // FIXME: This shouldn't need to be heap allocated Add, @@ -193,7 +191,7 @@ impl BinOp { } } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq)] pub enum UnaryOp { PreIncrement, PreDecrement, @@ -206,14 +204,9 @@ pub enum UnaryOp { impl BinOp { #[allow(dead_code)] pub fn is_comparison(&self) -> bool { - match self { - BinOp::Eq => true, - BinOp::Ne => true, - BinOp::Le => true, - BinOp::Ge => true, - BinOp::Lt => true, - BinOp::Gt => true, - _ => false, - } + matches!( + self, + BinOp::Eq | BinOp::Ne | BinOp::Le | BinOp::Ge | BinOp::Lt | BinOp::Gt + ) } } diff --git a/src/codegen.rs b/src/codegen.rs index 5064525..596f685 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -1,8 +1,8 @@ -use std::sync::Condvar; use std::collections::HashMap; use std::collections::HashSet; use std::io::BufWriter; use std::io::Write; +use std::sync::Condvar; use std::sync::{Arc, Mutex}; use std::thread; @@ -57,7 +57,7 @@ impl FunContext<'_> { } fn new_scope(&mut self) { - self.block_vars.push(vec!()); + self.block_vars.push(vec![]); } fn drop_scope(&mut self) { @@ -67,12 +67,9 @@ impl FunContext<'_> { } } - fn add_to_scope( - &mut self, pos: &Pos, name: String, entry: ScopeEntry - ) -> Result<(), CompErr> { + fn add_to_scope(&mut self, pos: &Pos, name: String, entry: ScopeEntry) -> Result<(), CompErr> { if self.fun_scope.contains_key(&name) { - return CompErr::err(pos, format!( - "{} is already in defined in this scope", name)); + return CompErr::err(pos, format!("{} is already in defined in this scope", name)); } self.fun_scope.insert(name.clone(), entry); self.block_vars.last_mut().unwrap().push(name); @@ -86,7 +83,7 @@ impl FunContext<'_> { match self.global_scope.get(name) { // Only allow referencing global vars when users specify "extrn" Some(ScopeEntry::Var(Loc::Data(_))) => None, - other => other, + other => other, } } } @@ -101,10 +98,12 @@ fn label_for_string_id(file_id: usize, string_index: usize) -> String { * @param offset The positive offset from rbp (how much space came before this) */ fn prepass_gen( - c: &mut FunContext, instructions: &mut Vec, - body: &Statement, offset: i64 + c: &mut FunContext, + instructions: &mut Vec, + body: &Statement, + offset: i64, ) -> Result<(), CompErr> { - let mut stack = vec!(body); + let mut stack = vec![body]; let mut autos_size = 0; // DFS to find them all @@ -112,46 +111,48 @@ fn prepass_gen( match stack.pop().unwrap() { Statement::Label(pos, name) => { if c.labels.contains_key(name) { - return CompErr::err(pos, format!( - "Label {} already defined in this function", name)); + return CompErr::err( + pos, + format!("Label {} already defined in this function", name), + ); } let l = c.new_label(&format!("LAB_{}", name).to_string()); c.labels.insert(name.clone(), l); - }, + } Statement::Auto(pos, vars) => { for var in vars { let name = var.name(); if c.local_var_locs.contains_key(name) { - return CompErr::err(pos, format!( - "{} already defined in this function", name)); + return CompErr::err( + pos, + format!("{} already defined in this function", name), + ); } let size = match var { Var::Vec(_, vec_size, _) => 1 + vec_size, - Var::Single(_, _) => 1, + Var::Single(_, _) => 1, }; - c.local_var_locs.insert( - name.clone(), - Loc::Stack(-offset - autos_size) - ); + c.local_var_locs + .insert(name.clone(), Loc::Stack(-offset - autos_size)); autos_size += size; } - }, + } Statement::Block(statements) => { for s in statements { stack.push(s); } - }, + } Statement::If(_, if_body, Some(else_body)) => { stack.push(if_body); stack.push(else_body); - }, + } Statement::If(_, body, None) => stack.push(body), - Statement::While(_, body) => stack.push(body), - _ => {}, + Statement::While(_, body) => stack.push(body), + _ => {} } } @@ -164,10 +165,12 @@ fn prepass_gen( // Allocates the necessary args on the stack fn alloc_args( - c: &mut FunContext, instructions: &mut Vec, - pos: &Pos, args: &Vec + c: &mut FunContext, + instructions: &mut Vec, + pos: &Pos, + args: &[String], ) -> Result<(), CompErr> { - for i in 0..args.len() { + for (i, arg) in args.iter().enumerate() { let loc = if i < 6 { let register = Reg::for_arg_num(i); instructions.push(format!("pushq %{}", register)); @@ -176,15 +179,17 @@ fn alloc_args( Loc::Stack((i as i64) - 4) }; - c.local_var_locs.insert(args[i].clone(), loc.clone()); - c.add_to_scope(pos, args[i].clone(), ScopeEntry::Var(loc))?; + c.local_var_locs.insert(arg.clone(), loc.clone()); + c.add_to_scope(pos, arg.clone(), ScopeEntry::Var(loc))?; } Ok(()) } fn gen_op_cmp( instructions: &mut Vec, - command: &str, lhs_loc: Loc, rhs_loc: Loc + command: &str, + lhs_loc: Loc, + rhs_loc: Loc, ) -> (Loc, RegSet) { instructions.push(format!("cmpq {},{}", rhs_loc, lhs_loc)); instructions.push(format!("movq $0,{}", lhs_loc)); @@ -200,7 +205,9 @@ fn gen_op_cmp( fn gen_op_single( instructions: &mut Vec, - command: &str, lhs_loc: Loc, rhs_loc: Loc + command: &str, + lhs_loc: Loc, + rhs_loc: Loc, ) -> (Loc, RegSet) { instructions.push(format!("{} {},{}", command, rhs_loc, lhs_loc)); (lhs_loc, RegSet::empty()) @@ -208,18 +215,19 @@ fn gen_op_single( fn gen_op_shift( instructions: &mut Vec, - command: &str, lhs_loc: Loc, rhs_loc: Loc + command: &str, + lhs_loc: Loc, + rhs_loc: Loc, ) -> (Loc, RegSet) { match rhs_loc { - rhs_loc @ Loc::Immediate(_) => - gen_op_single(instructions, command, lhs_loc, rhs_loc), + rhs_loc @ Loc::Immediate(_) => gen_op_single(instructions, command, lhs_loc, rhs_loc), _ => { // Required to use %cl register for non immediates during shifts instructions.push(format!("movq {},%rcx", rhs_loc)); instructions.push(format!("{} %cl,{}", command, lhs_loc)); let used_registers = RegSet::of(Reg::Rcx); (lhs_loc, used_registers) - }, + } } } @@ -229,30 +237,27 @@ fn gen_op_shift( */ fn gen_op_pre_float_op( instructions: &mut Vec, - lhs_loc: Loc, init_rhs_loc: Loc + lhs_loc: Loc, + init_rhs_loc: Loc, ) -> (Loc, RegSet) { // rax and rdx are always used for div or mod let mut used_registers = RegSet::of(Reg::Rax).with(Reg::Rdx); - let should_move_rhs = match init_rhs_loc { - Loc::Register(Reg::Rax) => true, - Loc::Register(Reg::Rdx) => true, - Loc::Immediate(_) => true, - _ => false, - }; + let should_move_rhs = matches!( + init_rhs_loc, + Loc::Register(Reg::Rax) | Loc::Register(Reg::Rdx) | Loc::Immediate(_) + ); // Move rhs to a new register if necessary let rhs_loc = if should_move_rhs { // Make sure we don't override LHS let dest_reg = match lhs_loc { Loc::Register(Reg::Rcx) => Reg::Rdi, - _ => Reg::Rcx, + _ => Reg::Rcx, }; used_registers = used_registers.with(dest_reg); - instructions.push(format!( - "movq {},%{}", init_rhs_loc, dest_reg - )); + instructions.push(format!("movq {},%{}", init_rhs_loc, dest_reg)); Loc::Register(dest_reg) } else { @@ -260,7 +265,7 @@ fn gen_op_pre_float_op( }; match lhs_loc { - Loc::Register(Reg::Rax) => {}, + Loc::Register(Reg::Rax) => {} lhs_loc => instructions.push(format!("movq {},%rax", lhs_loc)), }; @@ -271,12 +276,8 @@ fn gen_op_pre_float_op( * Generate instructions to diving (or mod) two numbers * As per x86 idivq, the results always go to rax and rdx */ -fn gen_op_pre_div( - instructions: &mut Vec, - lhs_loc: Loc, init_rhs_loc: Loc -) -> RegSet { - let (rhs_loc, used_registers) = - gen_op_pre_float_op(instructions, lhs_loc, init_rhs_loc); +fn gen_op_pre_div(instructions: &mut Vec, lhs_loc: Loc, init_rhs_loc: Loc) -> RegSet { + let (rhs_loc, used_registers) = gen_op_pre_float_op(instructions, lhs_loc, init_rhs_loc); instructions.push("movq $0,%rdx".to_string()); instructions.push(format!("idivq {}", rhs_loc)); @@ -284,28 +285,18 @@ fn gen_op_pre_div( used_registers } -fn gen_op_mod( - instructions: &mut Vec, - lhs_loc: Loc, rhs_loc: Loc -) -> (Loc, RegSet) { +fn gen_op_mod(instructions: &mut Vec, lhs_loc: Loc, rhs_loc: Loc) -> (Loc, RegSet) { let used_registers = gen_op_pre_div(instructions, lhs_loc, rhs_loc); (Loc::Register(Reg::Rdx), used_registers) } -fn gen_op_div( - instructions: &mut Vec, - lhs_loc: Loc, rhs_loc: Loc -) -> (Loc, RegSet) { +fn gen_op_div(instructions: &mut Vec, lhs_loc: Loc, rhs_loc: Loc) -> (Loc, RegSet) { let used_registers = gen_op_pre_div(instructions, lhs_loc, rhs_loc); (Loc::Register(Reg::Rax), used_registers) } -fn gen_op_mul( - instructions: &mut Vec, - lhs_loc: Loc, rhs_loc: Loc -) -> (Loc, RegSet) { - let (rhs_loc, used_registers) = - gen_op_pre_float_op(instructions, lhs_loc, rhs_loc); +fn gen_op_mul(instructions: &mut Vec, lhs_loc: Loc, rhs_loc: Loc) -> (Loc, RegSet) { + let (rhs_loc, used_registers) = gen_op_pre_float_op(instructions, lhs_loc, rhs_loc); instructions.push(format!("imulq {}", rhs_loc)); (Loc::Register(Reg::Rax), used_registers) } @@ -318,44 +309,43 @@ fn gen_op_mul( */ fn gen_op_command( instructions: &mut Vec, - op: &BinOp, lhs_loc: Loc, rhs_loc: Loc + op: &BinOp, + lhs_loc: Loc, + rhs_loc: Loc, ) -> (Loc, RegSet) { match op { - BinOp::Add => gen_op_single(instructions, "addq", lhs_loc, rhs_loc), - BinOp::Sub => gen_op_single(instructions, "subq", lhs_loc, rhs_loc), - BinOp::Mod => gen_op_mod(instructions, lhs_loc, rhs_loc), - BinOp::Div => gen_op_div(instructions, lhs_loc, rhs_loc), - BinOp::Mul => gen_op_mul(instructions, lhs_loc, rhs_loc), + BinOp::Add => gen_op_single(instructions, "addq", lhs_loc, rhs_loc), + BinOp::Sub => gen_op_single(instructions, "subq", lhs_loc, rhs_loc), + BinOp::Mod => gen_op_mod(instructions, lhs_loc, rhs_loc), + BinOp::Div => gen_op_div(instructions, lhs_loc, rhs_loc), + BinOp::Mul => gen_op_mul(instructions, lhs_loc, rhs_loc), BinOp::ShiftRight => gen_op_shift(instructions, "shrq", lhs_loc, rhs_loc), - BinOp::ShiftLeft => gen_op_shift(instructions, "shlq", lhs_loc, rhs_loc), - BinOp::And => gen_op_single(instructions, "andq", lhs_loc, rhs_loc), - BinOp::Or => gen_op_single(instructions, "orq", lhs_loc, rhs_loc), - BinOp::Xor => gen_op_single(instructions, "xorq", lhs_loc, rhs_loc), - BinOp::Eq => gen_op_cmp(instructions, "sete", lhs_loc, rhs_loc), - BinOp::Ne => gen_op_cmp(instructions, "setne", lhs_loc, rhs_loc), - BinOp::Le => gen_op_cmp(instructions, "setle", lhs_loc, rhs_loc), - BinOp::Lt => gen_op_cmp(instructions, "setl", lhs_loc, rhs_loc), - BinOp::Ge => gen_op_cmp(instructions, "setge", lhs_loc, rhs_loc), - BinOp::Gt => gen_op_cmp(instructions, "setg", lhs_loc, rhs_loc), - BinOp::Assign(_) => - panic!("Assignments should not be parsed as regular binop exprs"), + BinOp::ShiftLeft => gen_op_shift(instructions, "shlq", lhs_loc, rhs_loc), + BinOp::And => gen_op_single(instructions, "andq", lhs_loc, rhs_loc), + BinOp::Or => gen_op_single(instructions, "orq", lhs_loc, rhs_loc), + BinOp::Xor => gen_op_single(instructions, "xorq", lhs_loc, rhs_loc), + BinOp::Eq => gen_op_cmp(instructions, "sete", lhs_loc, rhs_loc), + BinOp::Ne => gen_op_cmp(instructions, "setne", lhs_loc, rhs_loc), + BinOp::Le => gen_op_cmp(instructions, "setle", lhs_loc, rhs_loc), + BinOp::Lt => gen_op_cmp(instructions, "setl", lhs_loc, rhs_loc), + BinOp::Ge => gen_op_cmp(instructions, "setge", lhs_loc, rhs_loc), + BinOp::Gt => gen_op_cmp(instructions, "setg", lhs_loc, rhs_loc), + BinOp::Assign(_) => panic!("Assignments should not be parsed as regular binop exprs"), } } /// Returns the registers that the given op MIGHT use -fn registers_for_op( - op: &BinOp -) -> RegSet { +fn registers_for_op(op: &BinOp) -> RegSet { let fpu_regset = RegSet::of(Reg::Rax).with(Reg::Rdx); let shift_regset = RegSet::of(Reg::Rcx); match op { BinOp::ShiftRight => shift_regset, - BinOp::ShiftLeft => shift_regset, - BinOp::Mod => fpu_regset, - BinOp::Div => fpu_regset, - BinOp::Mul => fpu_regset, - _ => RegSet::empty(), + BinOp::ShiftLeft => shift_regset, + BinOp::Mod => fpu_regset, + BinOp::Div => fpu_regset, + BinOp::Mul => fpu_regset, + _ => RegSet::empty(), } } @@ -371,15 +361,17 @@ fn get_safe_registers(used_registers: RegSet) -> RegSet { /// #Arguments /// * `unsafe_registers` - Registers which should be avoided fn gen_pre_op( - c: &mut FunContext, instructions: &mut Vec, - lhs: &Expr, rhs: &Expr, unsafe_registers: RegSet, + c: &mut FunContext, + instructions: &mut Vec, + lhs: &Expr, + rhs: &Expr, + unsafe_registers: RegSet, ) -> Result<(Loc, Loc, RegSet), CompErr> { // Generate instructions for RHS first so we know which registers are safe - let mut rhs_ins = vec!(); + let mut rhs_ins = vec![]; let (rhs_loc, mut used_registers) = gen_expr(c, &mut rhs_ins, rhs)?; - let safe_registers = get_safe_registers(used_registers.clone()) - .subtract(&unsafe_registers); + let safe_registers = get_safe_registers(used_registers.clone()).subtract(&unsafe_registers); let (lhs_loc, lhs_registers) = gen_expr(c, instructions, lhs)?; used_registers = used_registers.union(lhs_registers); @@ -399,13 +391,10 @@ fn gen_pre_op( instructions.push(format!("movq {},%{}", lhs_loc, dest_reg)); instructions.append(&mut rhs_ins); Loc::Register(dest_reg) - }, + } // Nowhere is safe! Store LHS on the stack None => { - let lhs_in_reg = match lhs_loc { - Loc::Register(_) => true, - _ => false, - }; + let lhs_in_reg = matches!(lhs_loc, Loc::Register(_)); if lhs_in_reg { instructions.push(format!("pushq {}", lhs_loc)); @@ -417,7 +406,7 @@ fn gen_pre_op( // we already know everything is used! let new_lhs_loc = match rhs_loc { Loc::Register(Reg::Rax) => Loc::Register(Reg::Rcx), - _ => Loc::Register(Reg::Rax), + _ => Loc::Register(Reg::Rax), }; if lhs_in_reg { @@ -426,29 +415,36 @@ fn gen_pre_op( instructions.push(format!("movq {},{}", lhs_loc, new_lhs_loc)); } new_lhs_loc - }, + } }; Ok((new_lhs_loc, rhs_loc, used_registers)) } fn gen_prep_unary_op_incdec( - c: &mut FunContext, instructions: &mut Vec, expr: &Expr + c: &mut FunContext, + instructions: &mut Vec, + expr: &Expr, ) -> Result<(Loc, RegSet), CompErr> { let (expr_loc, used_registers) = gen_expr(c, instructions, expr)?; match expr_loc { - Loc::Register(_) | Loc::Immediate(_) => - return CompErr::err(&expr.pos(), format!( - "`++` or `--` must operate on a memory location")), - _ => {}, + Loc::Register(_) | Loc::Immediate(_) => { + return CompErr::err( + &expr.pos(), + "`++` or `--` must operate on a memory location".to_string(), + ) + } + _ => {} }; Ok((expr_loc, used_registers)) } fn gen_unary_op_pre_incdec( - c: &mut FunContext, instructions: &mut Vec, - op_name: &str, expr: &Expr + c: &mut FunContext, + instructions: &mut Vec, + op_name: &str, + expr: &Expr, ) -> Result<(Loc, RegSet), CompErr> { let (expr_loc, used_registers) = gen_prep_unary_op_incdec(c, instructions, expr)?; instructions.push(format!("{} {}", op_name, expr_loc)); @@ -456,8 +452,10 @@ fn gen_unary_op_pre_incdec( } fn gen_unary_op_post_incdec( - c: &mut FunContext, instructions: &mut Vec, - op_name: &str, expr: &Expr + c: &mut FunContext, + instructions: &mut Vec, + op_name: &str, + expr: &Expr, ) -> Result<(Loc, RegSet), CompErr> { let (expr_loc, used_registers) = gen_prep_unary_op_incdec(c, instructions, expr)?; @@ -471,8 +469,10 @@ fn gen_unary_op_post_incdec( } fn gen_unary_op_non_assign( - c: &mut FunContext, instructions: &mut Vec, - asm_op: &str, expr: &Expr + c: &mut FunContext, + instructions: &mut Vec, + asm_op: &str, + expr: &Expr, ) -> Result<(Loc, RegSet), CompErr> { let (expr_loc, mut used_registers) = gen_expr(c, instructions, expr)?; let dest_reg = match expr_loc { @@ -480,62 +480,65 @@ fn gen_unary_op_non_assign( _ => { let dest_reg = match used_registers.first() { Some(reg) => reg, - None => { + None => { used_registers = used_registers.with(Reg::Rax); Reg::Rax - }, + } }; instructions.push(format!("movq {},%{}", expr_loc, dest_reg)); dest_reg - }, + } }; instructions.push(format!("{} %{}", asm_op, dest_reg)); Ok((Loc::Register(dest_reg), used_registers)) } fn gen_unary_op( - c: &mut FunContext, instructions: &mut Vec, - op: &UnaryOp, expr: &Expr + c: &mut FunContext, + instructions: &mut Vec, + op: &UnaryOp, + expr: &Expr, ) -> Result<(Loc, RegSet), CompErr> { match op { - UnaryOp::PreIncrement => gen_unary_op_pre_incdec(c, instructions, "incq", expr), - UnaryOp::PreDecrement => gen_unary_op_pre_incdec(c, instructions, "decq", expr), + UnaryOp::PreIncrement => gen_unary_op_pre_incdec(c, instructions, "incq", expr), + UnaryOp::PreDecrement => gen_unary_op_pre_incdec(c, instructions, "decq", expr), UnaryOp::PostIncrement => gen_unary_op_post_incdec(c, instructions, "incq", expr), UnaryOp::PostDecrement => gen_unary_op_post_incdec(c, instructions, "decq", expr), - UnaryOp::BitNot => gen_unary_op_non_assign(c, instructions, "notq", expr), - UnaryOp::Negate => gen_unary_op_non_assign(c, instructions, "negq", expr), + UnaryOp::BitNot => gen_unary_op_non_assign(c, instructions, "notq", expr), + UnaryOp::Negate => gen_unary_op_non_assign(c, instructions, "negq", expr), } } fn gen_bin_op( - c: &mut FunContext, instructions: &mut Vec, - op: &BinOp, lhs: &Expr, rhs: &Expr + c: &mut FunContext, + instructions: &mut Vec, + op: &BinOp, + lhs: &Expr, + rhs: &Expr, ) -> Result<(Loc, RegSet), CompErr> { - let (lhs_loc, rhs_loc, used_registers) = gen_pre_op( - c, instructions, lhs, rhs, - registers_for_op(op))?; + let (lhs_loc, rhs_loc, used_registers) = + gen_pre_op(c, instructions, lhs, rhs, registers_for_op(op))?; // Run the command! - let (op_loc, op_registers) = - gen_op_command(instructions, op, lhs_loc, rhs_loc); + let (op_loc, op_registers) = gen_op_command(instructions, op, lhs_loc, rhs_loc); Ok((op_loc, used_registers.union(op_registers))) } fn gen_syscall( - c: &mut FunContext, instructions: &mut Vec, - pos: &Pos, params: &Vec + c: &mut FunContext, + instructions: &mut Vec, + pos: &Pos, + params: &Vec, ) -> Result<(Loc, RegSet), CompErr> { - if params.len() == 0 || params.len() > 7 { - return CompErr::err(pos, format!( - "syscall() must take between 1-7 arguments")); + if params.is_empty() || params.len() > 7 { + return CompErr::err(pos, "syscall() must take between 1-7 arguments".to_string()); } let mut used_registers = RegSet::of(Reg::Rax); for param in params.iter().rev() { - let (param_loc, param_used_reg) = - gen_expr(c, instructions, ¶m)?; + let (param_loc, param_used_reg) = gen_expr(c, instructions, param)?; // TODO: Optimize better, quit monkeying around // No point pushing memory or immediate locations to the stack! @@ -549,15 +552,16 @@ fn gen_syscall( instructions.push(format!("popq %{}", reg)); } - instructions.push(format!("syscall")); + instructions.push("syscall".to_string()); Ok((Loc::Register(Reg::Rax), used_registers)) } // Returns the first 6 param locations fn gen_call_params( - c: &mut FunContext, instructions: &mut Vec, - params: &Vec + c: &mut FunContext, + instructions: &mut Vec, + params: &Vec, ) -> Result, CompErr> { // Evaluate backwards until the 7th var. // Since the 7th+ params have to be on the stack anyways @@ -567,10 +571,9 @@ fn gen_call_params( instructions.push(format!("pushq {}", param_loc)); } - let mut param_locs = vec!(); + let mut param_locs = vec![]; - for i in 0..std::cmp::min(6, params.len()) { - let param = ¶ms[i]; + for param in params.iter().take(std::cmp::min(6, params.len())) { let (param_loc, _) = gen_expr(c, instructions, param)?; if param_loc.is_reg() { @@ -584,70 +587,80 @@ fn gen_call_params( /// Substitutes the given IDs in the AST to their respective values. /// Used for macro expansion -fn substitute_id( - body: &Expr, - substitutions: &HashMap, -) -> Result { +fn substitute_id(body: &Expr, substitutions: &HashMap) -> Result { Ok(match body { - expr @ Expr::Str(_, _) => expr.clone(), - expr @ Expr::Int(_, _) => expr.clone(), + expr @ Expr::Str(_, _) => expr.clone(), + expr @ Expr::Int(_, _) => expr.clone(), Expr::Id(pos, name) => match substitutions.get(name) { Some(value) => value.clone(), - None => Expr::Id(pos.clone(), name.clone()), + None => Expr::Id(pos.clone(), name.clone()), }, Expr::Reference(pos, name) => match substitutions.get(name) { - Some(_) => return CompErr::err(pos, format!( - "Cannot reference a macro arg")), - None => Expr::Reference(pos.clone(), name.clone()), + Some(_) => return CompErr::err(pos, "Cannot reference a macro arg".to_string()), + None => Expr::Reference(pos.clone(), name.clone()), }, Expr::Call(pos, callee, params) => { let sub_callee = substitute_id(callee, substitutions)?; - let mut sub_params = vec!(); + let mut sub_params = vec![]; for param in params { sub_params.push(substitute_id(param, substitutions)?); } Expr::Call(pos.clone(), Box::new(sub_callee), sub_params) - }, + } Expr::Assignment(pos, lhs, rhs) => { let sub_rhs = substitute_id(rhs, substitutions)?; Expr::Assignment(pos.clone(), lhs.clone(), Box::new(sub_rhs)) - }, + } Expr::DerefAssignment(pos, lhs, rhs) => { let sub_lhs = substitute_id(lhs, substitutions)?; let sub_rhs = substitute_id(rhs, substitutions)?; Expr::DerefAssignment(pos.clone(), Box::new(sub_lhs), Box::new(sub_rhs)) - }, + } Expr::UnaryOperator(pos, op, expr) => { let sub_expr = substitute_id(expr, substitutions)?; Expr::UnaryOperator(pos.clone(), op.clone(), Box::new(sub_expr)) - }, + } Expr::BinOperator(pos, op, lhs, rhs) => { let sub_lhs = substitute_id(lhs, substitutions)?; let sub_rhs = substitute_id(rhs, substitutions)?; - Expr::BinOperator(pos.clone(), op.clone(), - Box::new(sub_lhs), Box::new(sub_rhs)) - }, + Expr::BinOperator( + pos.clone(), + op.clone(), + Box::new(sub_lhs), + Box::new(sub_rhs), + ) + } Expr::Cond(pos, cond, truthy, falsey) => { - let sub_cond = substitute_id(cond, substitutions)?; + let sub_cond = substitute_id(cond, substitutions)?; let sub_truthy = substitute_id(truthy, substitutions)?; let sub_falsey = substitute_id(falsey, substitutions)?; - Expr::Cond(pos.clone(), Box::new(sub_cond), - Box::new(sub_truthy), Box::new(sub_falsey)) - }, + Expr::Cond( + pos.clone(), + Box::new(sub_cond), + Box::new(sub_truthy), + Box::new(sub_falsey), + ) + } Expr::Dereference(pos, expr) => { let sub_expr = substitute_id(expr, substitutions)?; Expr::Dereference(pos.clone(), Box::new(sub_expr)) - }, + } }) } fn gen_call_macro( - c: &mut FunContext, instructions: &mut Vec, - pos: &Pos, body: Expr, args: Vec, params: &Vec + c: &mut FunContext, + instructions: &mut Vec, + pos: &Pos, + body: Expr, + args: Vec, + params: &Vec, ) -> Result<(Loc, RegSet), CompErr> { if args.len() != params.len() { - return CompErr::err(pos, format!( - "This macro must accept {} arguments", args.len())); + return CompErr::err( + pos, + format!("This macro must accept {} arguments", args.len()), + ); } let mut substitutions = HashMap::new(); @@ -659,39 +672,43 @@ fn gen_call_macro( } fn gen_call( - c: &mut FunContext, instructions: &mut Vec, - pos: &Pos, callee_expr: &Expr, params: &Vec + c: &mut FunContext, + instructions: &mut Vec, + pos: &Pos, + callee_expr: &Expr, + params: &Vec, ) -> Result<(Loc, RegSet), CompErr> { match callee_expr { Expr::Id(_, name) if name == "syscall" => { return gen_syscall(c, instructions, pos, params); - }, + } _ => {} } let param_locs = gen_call_params(c, instructions, params)?; let callee = match callee_expr { - Expr::Id(_, name) => match c.find_in_scope(&name) { + Expr::Id(_, name) => match c.find_in_scope(name) { Some(ScopeEntry::Fun(arg_num)) => { if params.len() > *arg_num { - return CompErr::err(pos, format!( - "{} accepts at most {} arguments", name, arg_num)); + return CompErr::err( + pos, + format!("{} accepts at most {} arguments", name, arg_num), + ); } name - }, + } Some(ScopeEntry::Var(loc)) => { instructions.push(format!("movq {},%rax", loc)); "*%rax" - }, + } Some(ScopeEntry::Define(args, body)) => { let ac = args.clone(); let bc = body.clone(); let pc = pos.clone(); return gen_call_macro(c, instructions, &pc, bc, ac, params); - }, - None => return CompErr::err(pos, format!( - "{} not in scope", name)), + } + None => return CompErr::err(pos, format!("{} not in scope", name)), }, callee_expr => { let (callee_loc, _) = gen_expr(c, instructions, callee_expr)?; @@ -699,7 +716,7 @@ fn gen_call( instructions.push(format!("movq {},%rax", callee_loc)); } "*%rax" - }, + } }; for i in (0..std::cmp::min(6, params.len())).rev() { @@ -725,33 +742,36 @@ fn gen_call( } fn gen_reference( - c: &mut FunContext, instructions: &mut Vec, - pos: &Pos, name: &String + c: &mut FunContext, + instructions: &mut Vec, + pos: &Pos, + name: &String, ) -> Result<(Loc, RegSet), CompErr> { match c.find_in_scope(name) { Some(ScopeEntry::Var(Loc::Stack(offset))) => { let dest_reg = Reg::Rax; - instructions.push(format!( - "leaq {}(%rbp),%{}", 8 * offset, dest_reg)); + instructions.push(format!("leaq {}(%rbp),%{}", 8 * offset, dest_reg)); Ok((Loc::Register(dest_reg), RegSet::of(dest_reg))) - }, - Some(ScopeEntry::Var(other)) => CompErr::err(pos, format!( - "Variable cannot be at {:?}!", other)), + } + Some(ScopeEntry::Var(other)) => { + CompErr::err(pos, format!("Variable cannot be at {:?}!", other)) + } Some(ScopeEntry::Fun(_)) => { let dest_reg = Reg::Rax; instructions.push(format!("movq ${},%rax", name)); Ok((Loc::Register(dest_reg), RegSet::of(dest_reg))) - }, - Some(ScopeEntry::Define(_, _)) => CompErr::err(pos, format!( - "#define value cannot be referenced")), - None => CompErr::err(pos, format!( - "{} not in scope", name)), + } + Some(ScopeEntry::Define(_, _)) => { + CompErr::err(pos, "#define value cannot be referenced".to_string()) + } + None => CompErr::err(pos, format!("{} not in scope", name)), } } fn gen_dereference( - c: &mut FunContext, instructions: &mut Vec, - expr: &Expr + c: &mut FunContext, + instructions: &mut Vec, + expr: &Expr, ) -> Result<(Loc, RegSet), CompErr> { let (target_loc, mut used_registers) = gen_expr(c, instructions, expr)?; @@ -764,11 +784,11 @@ fn gen_dereference( None => { used_registers = used_registers.with(Reg::Rax); Reg::Rax - }, + } }; instructions.push(format!("movq {},%{}", target_loc, new_reg)); new_reg - }, + } }; instructions.push(format!("movq (%{}),%{}", dest_reg, dest_reg)); @@ -777,8 +797,9 @@ fn gen_dereference( // Generates the RHS instructions for an assignment fn gen_expr_ass_rhs( - c: &mut FunContext, instructions: &mut Vec, - rhs: &Expr + c: &mut FunContext, + instructions: &mut Vec, + rhs: &Expr, ) -> Result<(Reg, RegSet), CompErr> { let (rhs_loc, mut used_registers) = gen_expr(c, instructions, rhs)?; @@ -792,18 +813,21 @@ fn gen_expr_ass_rhs( None => { used_registers = used_registers.with(Reg::Rax); Reg::Rax - }, + } }; instructions.push(format!("movq {},%{}", rhs_loc, rhs_reg)); Ok((rhs_reg, used_registers)) - }, + } } } fn gen_expr_ass( - c: &mut FunContext, instructions: &mut Vec, - pos: &Pos, lhs_name: &String, rhs: &Expr + c: &mut FunContext, + instructions: &mut Vec, + pos: &Pos, + lhs_name: &String, + rhs: &Expr, ) -> Result<(Loc, RegSet), CompErr> { let (rhs_reg, used_registers) = gen_expr_ass_rhs(c, instructions, rhs)?; @@ -811,44 +835,42 @@ fn gen_expr_ass( Some(ScopeEntry::Var(lhs_loc)) => { instructions.push(format!("movq %{},{}", rhs_reg, lhs_loc)); Ok((lhs_loc.clone(), used_registers)) - }, - Some(ScopeEntry::Fun(_)) => - CompErr::err(pos, format!("Cannot reassign a function")), - Some(ScopeEntry::Define(_, _)) => - CompErr::err(pos, format!("Cannot reassign a #define value")), - None => - CompErr::err(pos, format!("Variable {} not in scope", lhs_name)), + } + Some(ScopeEntry::Fun(_)) => CompErr::err(pos, "Cannot reassign a function".to_string()), + Some(ScopeEntry::Define(_, _)) => { + CompErr::err(pos, "Cannot reassign a #define value".to_string()) + } + None => CompErr::err(pos, format!("Variable {} not in scope", lhs_name)), } } // FIXME: This register assignment technique is super similar in other places // Abstract away! fn gen_expr_deref_ass( - c: &mut FunContext, instructions: &mut Vec, - lhs: &Expr, rhs: &Expr + c: &mut FunContext, + instructions: &mut Vec, + lhs: &Expr, + rhs: &Expr, ) -> Result<(Loc, RegSet), CompErr> { let (lhs_loc, mut used_registers) = gen_expr(c, instructions, lhs)?; - let mut rhs_inst = vec!(); + let mut rhs_inst = vec![]; let (rhs_reg, rhs_used) = gen_expr_ass_rhs(c, &mut rhs_inst, rhs)?; let safe_registers = get_safe_registers(rhs_used.clone()); let lhs_dest_reg = match safe_registers.first() { Some(safe_reg) => match lhs_loc { - Loc::Register(lhs_reg) if safe_registers.contains(lhs_reg) => - Some(lhs_reg), + Loc::Register(lhs_reg) if safe_registers.contains(lhs_reg) => Some(lhs_reg), lhs_loc => { - instructions.push(format!( - "movq {},%{}", lhs_loc, safe_reg - )); + instructions.push(format!("movq {},%{}", lhs_loc, safe_reg)); used_registers = used_registers.with(safe_reg); Some(safe_reg) - }, + } }, None => { // No safe registers! Push to stack! instructions.push(format!("pushq {}", lhs_loc)); None - }, + } }; instructions.append(&mut rhs_inst); @@ -861,11 +883,11 @@ fn gen_expr_deref_ass( None => { let dest_reg = match rhs_reg { Reg::Rax => Reg::Rcx, - _ => Reg::Rax, + _ => Reg::Rax, }; instructions.push(format!("popq %{}", dest_reg)); dest_reg - }, + } }; // At this point @@ -880,9 +902,7 @@ fn gen_expr_deref_ass( /// # Arguments /// * `dest_reg` - For values > 2^32, this is the reg that will be used. -fn gen_int( - instructions: &mut Vec, signed: i64, dest_reg: Reg -) -> (Loc, RegSet) { +fn gen_int(instructions: &mut Vec, signed: i64, dest_reg: Reg) -> (Loc, RegSet) { if signed < i32::MAX as i64 && signed >= i32::MIN as i64 { (Loc::Immediate(signed), RegSet::empty()) } else { @@ -900,7 +920,8 @@ fn gen_int( } fn gen_cond_expr( - c: &mut FunContext, instructions: &mut Vec, + c: &mut FunContext, + instructions: &mut Vec, cond_expr: &Expr, true_expr: &Expr, false_expr: &Expr, @@ -936,54 +957,55 @@ fn gen_cond_expr( * @return (instructions, location, used_registers) */ fn gen_expr( - c: &mut FunContext, instructions: &mut Vec, - expr: &Expr + c: &mut FunContext, + instructions: &mut Vec, + expr: &Expr, ) -> Result<(Loc, RegSet), CompErr> { match expr { Expr::Int(_, value) => Ok(gen_int(instructions, *value, Reg::Rax)), - Expr::Id(pos, name) => { - match c.find_in_scope(name) { - Some(ScopeEntry::Var(loc)) => - Ok((loc.clone(), RegSet::empty())), - Some(ScopeEntry::Fun(_)) => - CompErr::err(pos, format!( - "{} is a function, and can only be called or referenced", - name)), - Some(ScopeEntry::Define(args, body)) => { - if args.is_empty() { - let b = body.clone(); - gen_expr(c, instructions, &b) - } else { - CompErr::err(pos, format!( - "This macro must take {} args", args.len())) - } - }, - None => CompErr::err(pos, format!( - "Variable {} not in scope", name)), + Expr::Id(pos, name) => match c.find_in_scope(name) { + Some(ScopeEntry::Var(loc)) => Ok((loc.clone(), RegSet::empty())), + Some(ScopeEntry::Fun(_)) => CompErr::err( + pos, + format!( + "{} is a function, and can only be called or referenced", + name + ), + ), + Some(ScopeEntry::Define(args, body)) => { + if args.is_empty() { + let b = body.clone(); + gen_expr(c, instructions, &b) + } else { + CompErr::err(pos, format!("This macro must take {} args", args.len())) + } } + None => CompErr::err(pos, format!("Variable {} not in scope", name)), }, Expr::Str(_, (file_id, string_index)) => { let label = label_for_string_id(*file_id, *string_index); instructions.push(format!("leaq {}(%rip),%rax", label)); Ok((Loc::Register(Reg::Rax), RegSet::of(Reg::Rax))) - }, - Expr::Assignment(pos, lhs, rhs) => gen_expr_ass(c, instructions, pos, lhs, rhs), + } + Expr::Assignment(pos, lhs, rhs) => gen_expr_ass(c, instructions, pos, lhs, rhs), Expr::DerefAssignment(_, lhs, rhs) => gen_expr_deref_ass(c, instructions, lhs, rhs), - Expr::UnaryOperator(_, op, expr) => gen_unary_op(c, instructions, op, expr), + Expr::UnaryOperator(_, op, expr) => gen_unary_op(c, instructions, op, expr), Expr::BinOperator(_, op, lhs, rhs) => gen_bin_op(c, instructions, op, lhs, rhs), - Expr::Call(pos, callee, params) => gen_call(c, instructions, pos, callee, params), - Expr::Reference(pos, name) => gen_reference(c, instructions, pos, name), - Expr::Dereference(_, expr) => gen_dereference(c, instructions, expr), - Expr::Cond(_, cond, true_expr, false_expr) => - gen_cond_expr(c, instructions, cond, true_expr, false_expr), + Expr::Call(pos, callee, params) => gen_call(c, instructions, pos, callee, params), + Expr::Reference(pos, name) => gen_reference(c, instructions, pos, name), + Expr::Dereference(_, expr) => gen_dereference(c, instructions, expr), + Expr::Cond(_, cond, true_expr, false_expr) => { + gen_cond_expr(c, instructions, cond, true_expr, false_expr) + } } } fn gen_return_expr( - c: &mut FunContext, instructions: &mut Vec, - expr: &Expr + c: &mut FunContext, + instructions: &mut Vec, + expr: &Expr, ) -> Result<(), CompErr> { - let (loc, _) = gen_expr(c, instructions, &expr)?; + let (loc, _) = gen_expr(c, instructions, expr)?; // If the location is already rax, we don't need to move! if loc != Loc::Register(Reg::Rax) { @@ -1002,37 +1024,44 @@ fn gen_return(instructions: &mut Vec) { } fn gen_cond_cmp( - c: &mut FunContext, instructions: &mut Vec, + c: &mut FunContext, + instructions: &mut Vec, jump_command: &str, lhs: &Expr, rhs: &Expr, - end_label: &String + end_label: &String, ) -> Result<(), CompErr> { - let (lhs_loc, rhs_loc, _) = gen_pre_op( - c, instructions, lhs, rhs, RegSet::empty())?; + let (lhs_loc, rhs_loc, _) = gen_pre_op(c, instructions, lhs, rhs, RegSet::empty())?; instructions.push(format!("cmpq {},{}", rhs_loc, lhs_loc)); instructions.push(format!("{} {}", jump_command, end_label)); Ok(()) } fn gen_cond( - c: &mut FunContext, instructions: &mut Vec, + c: &mut FunContext, + instructions: &mut Vec, cond: &Expr, - end_label: &String + end_label: &String, ) -> Result<(), CompErr> { match cond { - Expr::BinOperator(_, BinOp::Eq, lhs, rhs) => - gen_cond_cmp(c, instructions, "jne", lhs, rhs, end_label), - Expr::BinOperator(_, BinOp::Ne, lhs, rhs) => - gen_cond_cmp(c, instructions, "je", lhs, rhs, end_label), - Expr::BinOperator(_, BinOp::Gt, lhs, rhs) => - gen_cond_cmp(c, instructions, "jle", lhs, rhs, end_label), - Expr::BinOperator(_, BinOp::Ge, lhs, rhs) => - gen_cond_cmp(c, instructions, "jl", lhs, rhs, end_label), - Expr::BinOperator(_, BinOp::Lt, lhs, rhs) => - gen_cond_cmp(c, instructions, "jge", lhs, rhs, end_label), - Expr::BinOperator(_, BinOp::Le, lhs, rhs) => - gen_cond_cmp(c, instructions, "jg", lhs, rhs, end_label), + Expr::BinOperator(_, BinOp::Eq, lhs, rhs) => { + gen_cond_cmp(c, instructions, "jne", lhs, rhs, end_label) + } + Expr::BinOperator(_, BinOp::Ne, lhs, rhs) => { + gen_cond_cmp(c, instructions, "je", lhs, rhs, end_label) + } + Expr::BinOperator(_, BinOp::Gt, lhs, rhs) => { + gen_cond_cmp(c, instructions, "jle", lhs, rhs, end_label) + } + Expr::BinOperator(_, BinOp::Ge, lhs, rhs) => { + gen_cond_cmp(c, instructions, "jl", lhs, rhs, end_label) + } + Expr::BinOperator(_, BinOp::Lt, lhs, rhs) => { + gen_cond_cmp(c, instructions, "jge", lhs, rhs, end_label) + } + Expr::BinOperator(_, BinOp::Le, lhs, rhs) => { + gen_cond_cmp(c, instructions, "jg", lhs, rhs, end_label) + } Expr::Int(_, value) => { if *value == 0 { instructions.push(format!("jmp {}", end_label)); @@ -1041,21 +1070,22 @@ fn gen_cond( // For non-zero ints, no comparison needs to be made! Ok(()) } - }, + } _ => { // Fallback to evaluating the entire conditional expression let (cond_loc, _) = gen_expr(c, instructions, cond)?; instructions.push(format!("cmpq $0,{}", cond_loc)); instructions.push(format!("jz {}", end_label)); Ok(()) - }, + } } } fn gen_if( - c: &mut FunContext, instructions: &mut Vec, + c: &mut FunContext, + instructions: &mut Vec, cond: &Expr, - if_body: &Statement + if_body: &Statement, ) -> Result<(), CompErr> { let if_end_label = c.new_label("IF_END"); gen_cond(c, instructions, cond, &if_end_label)?; @@ -1065,9 +1095,10 @@ fn gen_if( } fn gen_while( - c: &mut FunContext, instructions: &mut Vec, + c: &mut FunContext, + instructions: &mut Vec, cond: &Expr, - body: &Statement + body: &Statement, ) -> Result<(), CompErr> { let while_begin_label = c.new_label("WHILE_BEGIN"); instructions.push(format!("{}:", while_begin_label)); @@ -1085,7 +1116,8 @@ fn gen_while( } fn gen_if_else( - c: &mut FunContext, instructions: &mut Vec, + c: &mut FunContext, + instructions: &mut Vec, cond: &Expr, if_body: &Statement, else_body: &Statement, @@ -1103,8 +1135,10 @@ fn gen_if_else( } fn gen_switch( - c: &mut FunContext, instructions: &mut Vec, - cond: &Expr, body: &Vec + c: &mut FunContext, + instructions: &mut Vec, + cond: &Expr, + body: &Vec, ) -> Result<(), CompErr> { let (expr_loc, _) = gen_expr(c, instructions, cond)?; // cmp requires the dest to be in a register @@ -1113,18 +1147,18 @@ fn gen_switch( other => { instructions.push(format!("movq {},%rax", other)); Loc::Register(Reg::Rax) - }, + } }; // The register to store case values let case_reg = match cond_loc { Loc::Register(Reg::Rax) => Reg::Rcx, - _ => Reg::Rax, + _ => Reg::Rax, }; let mut used_case_values = HashSet::new(); let mut default_label: Option = None; - let mut body_inst = vec!(); + let mut body_inst = vec![]; let switch_end_label = c.new_label("SW_END"); @@ -1132,20 +1166,22 @@ fn gen_switch( for inner in body { match inner { SwInner::Default(pos) => { - if !default_label.is_none() { + if default_label.is_some() { return CompErr::err( pos, - format!("`default` label is already defined in switch")); + "`default` label is already defined in switch".to_string(), + ); } let label_name = c.new_label("SW_DEFAULT"); body_inst.push(format!("{}:", label_name)); default_label = Some(label_name); - }, + } SwInner::Case(pos, value) => { if used_case_values.contains(value) { return CompErr::err( pos, - format!("case {} is already defined in switch", value)); + format!("case {} is already defined in switch", value), + ); } used_case_values.insert(value); @@ -1154,24 +1190,29 @@ fn gen_switch( body_inst.push(format!("{}:", label_name)); instructions.push(format!("cmpq {},{}", case_loc, cond_loc)); instructions.push(format!("je {}", label_name)); - }, + } SwInner::Statement(body) => gen_statement(c, &mut body_inst, body)?, } } c.break_dest_stack.pop(); + + let inst = match default_label { + Some(ref label_name) => format!("jmp {}", label_name), + None => format!("jmp {}", switch_end_label), + }; + // Default jump point - instructions.push(format!("jmp {}", match default_label { - Some(label_name) => label_name, - None => switch_end_label.clone(), - })); + instructions.push(inst); instructions.append(&mut body_inst); instructions.push(format!("{}:", switch_end_label)); Ok(()) } fn gen_auto( - c: &mut FunContext, instructions: &mut Vec, - pos: &Pos, vars: &Vec + c: &mut FunContext, + instructions: &mut Vec, + pos: &Pos, + vars: &Vec, ) -> Result<(), CompErr> { for var in vars { // Guaranteed to exist because of the prepass @@ -1187,96 +1228,86 @@ fn gen_auto( }; // The first value in the stack for a vector is a data pointer - instructions.push(format!( - "leaq {}(%rbp),%rax", (offset - size) * 8)); + instructions.push(format!("leaq {}(%rbp),%rax", (offset - size) * 8)); instructions.push(format!("movq %rax,{}", dest_loc)); - for i in 0..initial.len() { - let value = initial[i]; + for (i, value) in initial.iter().enumerate() { let val_dest_loc = Loc::Stack(offset - size + i as i64); - let (val_loc, _) = gen_int(instructions, value, Reg::Rax); - instructions.push(format!( - "movq {},{}", val_loc, val_dest_loc)); + let (val_loc, _) = gen_int(instructions, *value, Reg::Rax); + instructions.push(format!("movq {},{}", val_loc, val_dest_loc)); } - }, + } Var::Single(_, Some(value)) => { let (val_loc, _) = gen_int(instructions, *value, Reg::Rax); instructions.push(format!("movq {},{}", val_loc, dest_loc)); - }, - Var::Single(_, None) => {}, + } + Var::Single(_, None) => {} } } Ok(()) } -fn gen_extern( - c: &mut FunContext, - pos: &Pos, vars: &Vec -) -> Result<(), CompErr> { +fn gen_extern(c: &mut FunContext, pos: &Pos, vars: &Vec) -> Result<(), CompErr> { for name in vars { - if !c.find_in_scope(name).is_none() { - return CompErr::err(pos, format!( - "{} is already is scope", name)); + if c.find_in_scope(name).is_some() { + return CompErr::err(pos, format!("{} is already is scope", name)); } match c.global_scope.get(name) { Some(ScopeEntry::Var(Loc::Data(_))) => { let entry = ScopeEntry::Var(Loc::Data(name.clone())); c.add_to_scope(pos, name.clone(), entry)?; - }, - Some(ScopeEntry::Fun(_)) => return CompErr::err(pos, format!( - "{} is a function, not a global var", name)), - _ => return CompErr::err(pos, format!( - "Could not find definition for {}", name)), + } + Some(ScopeEntry::Fun(_)) => { + return CompErr::err(pos, format!("{} is a function, not a global var", name)) + } + _ => return CompErr::err(pos, format!("Could not find definition for {}", name)), } } Ok(()) } fn gen_statement( - c: &mut FunContext, instructions: &mut Vec, - body: &Statement + c: &mut FunContext, + instructions: &mut Vec, + body: &Statement, ) -> Result<(), CompErr> { match body { Statement::Null => Ok(()), - Statement::Break(pos) => { - match c.break_dest_stack.last() { - Some(label) => { - instructions.push(format!("# break")); - instructions.push(format!("jmp {}", label)); - Ok(()) - }, - None => CompErr::err(pos, format!( - "Cannot break from this location")), + Statement::Break(pos) => match c.break_dest_stack.last() { + Some(label) => { + instructions.push("# break".to_string()); + instructions.push(format!("jmp {}", label)); + Ok(()) } + None => CompErr::err(pos, "Cannot break from this location".to_string()), }, - Statement::Goto(pos, name) => { - match c.labels.get(name) { - Some(label) => { - instructions.push(format!("# goto {}", label)); - instructions.push(format!("jmp {}", label)); - Ok(()) - }, - None => - CompErr::err(pos, format!( - "Label '{}' not defined in this function", name)), + Statement::Goto(pos, name) => match c.labels.get(name) { + Some(label) => { + instructions.push(format!("# goto {}", label)); + instructions.push(format!("jmp {}", label)); + Ok(()) } + None => CompErr::err( + pos, + format!("Label '{}' not defined in this function", name), + ), }, // We preprocess the labels, so we know it must exist Statement::Label(_, name) => { instructions.push(format!("{}:", c.labels.get(name).unwrap())); Ok(()) - }, + } Statement::Return => { - instructions.push(format!("# return")); + instructions.push("# return".to_string()); gen_return(instructions); Ok(()) - }, + } Statement::ReturnExpr(expr) => { - instructions.push(format!("# return")); + instructions.push("# return".to_string()); gen_return_expr(c, instructions, expr) - }, + } Statement::Block(statements) => { c.new_scope(); for statement in statements { @@ -1284,58 +1315,60 @@ fn gen_statement( } c.drop_scope(); Ok(()) - }, - Statement::Auto(pos, vars) => { - instructions.push(format!("# auto")); + } + Statement::Auto(pos, vars) => { + instructions.push("# auto".to_string()); gen_auto(c, instructions, pos, vars) - }, + } Statement::Extern(pos, vars) => { - instructions.push(format!("# extrn")); + instructions.push("# extrn".to_string()); gen_extern(c, pos, vars) - }, + } Statement::Expr(expr) => { - instructions.push(format!("# Expression statement")); + instructions.push("# Expression statement".to_string()); gen_expr(c, instructions, expr)?; Ok(()) - }, + } Statement::If(cond, if_body, None) => { - instructions.push(format!("# if")); + instructions.push("# if".to_string()); gen_if(c, instructions, cond, if_body) - }, + } Statement::If(cond, if_body, Some(else_body)) => { - instructions.push(format!("# if")); + instructions.push("# if".to_string()); gen_if_else(c, instructions, cond, if_body, else_body) - }, + } Statement::While(cond, body) => { - instructions.push(format!("# while")); + instructions.push("# while".to_string()); gen_while(c, instructions, cond, body) - }, + } Statement::Switch(cond, body) => { - instructions.push(format!("# switch")); + instructions.push("# switch".to_string()); gen_switch(c, instructions, cond, body) - }, + } } } -fn gen_fun( - c: &mut FunContext, function: &RSFunction -) -> Result, CompErr> { +fn gen_fun(c: &mut FunContext, function: &RSFunction) -> Result, CompErr> { let pos = &function.pos; let args = &function.args; let body = &function.body; c.new_scope(); - let mut instructions = vec!(); + let mut instructions = vec![]; // Save base pointer, since it's callee-saved - instructions.push(format!("pushq %rbp")); - instructions.push(format!("movq %rsp,%rbp")); + instructions.push("pushq %rbp".to_string()); + instructions.push("movq %rsp,%rbp".to_string()); // Prepare initial stack memory - alloc_args(c, &mut instructions, &pos, &args)?; - prepass_gen(c, &mut instructions, - &body, 1 + std::cmp::min(6, args.len() as i64))?; + alloc_args(c, &mut instructions, pos, args)?; + prepass_gen( + c, + &mut instructions, + body, + 1 + std::cmp::min(6, args.len() as i64), + )?; - gen_statement(c, &mut instructions, &body)?; + gen_statement(c, &mut instructions, body)?; let trailing_ret = match instructions.last() { Some(instruction) => instruction == "ret", @@ -1364,20 +1397,17 @@ fn root_prepass<'a>( let name = var.name(); if scope.contains_key(name) { - return CompErr::err( - &variable.pos, format!("{} already in root scope", name)); + return CompErr::err(&variable.pos, format!("{} already in root scope", name)); } - root_vars.push(&var); + root_vars.push(var); - scope.insert(name.clone(), - ScopeEntry::Var(Loc::Data(name.clone()))); + scope.insert(name.clone(), ScopeEntry::Var(Loc::Data(name.clone()))); } for function in functions { let name = &function.name; if scope.contains_key(name) { - return CompErr::err(&function.pos, format!( - "{} already in root scope", name)); + return CompErr::err(&function.pos, format!("{} already in root scope", name)); } scope.insert(name.clone(), ScopeEntry::Fun(function.args.len())); } @@ -1385,8 +1415,7 @@ fn root_prepass<'a>( for define in defines { let name = &define.name; if scope.contains_key(name) { - return CompErr::err(&define.pos, format!( - "{} already in root scope", name)); + return CompErr::err(&define.pos, format!("{} already in root scope", name)); } scope.insert(name.clone(), ScopeEntry::Define(define.args, define.body)); @@ -1395,9 +1424,7 @@ fn root_prepass<'a>( Ok((scope, root_vars)) } -fn generate_data_segment( - root_vars: &Vec<&Var>, w: &mut dyn Write -) -> Result<(), std::io::Error> { +fn generate_data_segment(root_vars: &Vec<&Var>, w: &mut dyn Write) -> Result<(), std::io::Error> { writeln!(w, ".data")?; for var in root_vars { write!(w, "{}:\n ", var.name())?; @@ -1405,10 +1432,10 @@ fn generate_data_segment( match var { Var::Single(_, None) => { writeln!(w, ".skip 8")?; - }, + } Var::Single(_, Some(value)) => { writeln!(w, ".quad {}", value)?; - }, + } Var::Vec(_, size, initial) => { if initial.is_empty() { // +1 for the vec pointer @@ -1417,8 +1444,7 @@ fn generate_data_segment( // One extra at the begining for vec pointer write!(w, ".quad 0")?; - for i in 0..initial.len() { - let value = initial[i]; + for value in initial.iter() { write!(w, ",{}", value)?; } @@ -1426,25 +1452,20 @@ fn generate_data_segment( for _ in initial.len()..*size as usize { write!(w, ",0")?; } - write!(w, "\n")?; + writeln!(w)?; } - }, + } }; } Ok(()) } -fn generate_start( - root_vars: &Vec<&Var>, w: &mut dyn Write -) -> Result<(), std::io::Error> { - writeln!(w, "{}\n{}\n{}", - ".text", - ".global _start", - "_start:")?; +fn generate_start(root_vars: &Vec<&Var>, w: &mut dyn Write) -> Result<(), std::io::Error> { + writeln!(w, ".text\n.global _start\n_start:")?; for var in root_vars { match var { - Var::Single(_, _) => {}, + Var::Single(_, _) => {} Var::Vec(name, _, _) => { // Initialize vec pointers // For consistency with stack vectors, data vectors are pointers @@ -1455,7 +1476,7 @@ fn generate_start( } } - writeln!(w, " movq (%rsp),%rdi")?; // Pass argc as first `main` arg + writeln!(w, " movq (%rsp),%rdi")?; // Pass argc as first `main` arg writeln!(w, " leaq 8(%rsp),%rsi")?; // Pass argv as second `main` arg writeln!(w, " call main")?; writeln!(w, " movq %rax,%rdi")?; @@ -1466,7 +1487,8 @@ fn generate_start( } fn generate_strings( - strings: &Vec<(usize, Vec>)>, w: &mut dyn Write + strings: &Vec<(usize, Vec>)>, + w: &mut dyn Write, ) -> Result<(), std::io::Error> { writeln!(w, ".text")?; // Prevent constant strings from being modified @@ -1477,10 +1499,10 @@ fn generate_strings( // TODO: Print nicely instead of packing into quads let string_quads = pack_chars(string_chars); write!(w, "{}:\n .quad {}", label, string_quads[0])?; - for i in 1..string_quads.len() { - write!(w, ",{}", string_quads[i])?; + for quad in string_quads.iter().skip(1) { + write!(w, ",{}", quad)?; } - writeln!(w, "")?; + writeln!(w)?; } } Ok(()) @@ -1491,26 +1513,25 @@ fn gen( functions: Vec, variables: Vec, defines: Vec, - writer: &mut dyn Write + writer: &mut dyn Write, ) -> Result<(), CompErr> { let mut w = BufWriter::new(writer); - let (global_scope, root_vars) = root_prepass( - &functions, - &variables, - defines - )?; + let (global_scope, root_vars) = root_prepass(&functions, &variables, defines)?; CompErr::from_io_res(generate_data_segment(&root_vars, &mut w))?; CompErr::from_io_res(generate_strings(&strings, &mut w))?; CompErr::from_io_res(generate_start(&root_vars, &mut w))?; - let pool = Arc::new((Mutex::new(CodeGenPool { - running_fibers: 0, - functions: functions, - results: vec!(), - errors: vec!(), - }), Condvar::new())); + let pool = Arc::new(( + Mutex::new(CodeGenPool { + running_fibers: 0, + functions, + results: vec![], + errors: vec![], + }), + Condvar::new(), + )); let thread_count = logical_cpu_count(); let arc_global_scope = Arc::new(global_scope); @@ -1525,15 +1546,10 @@ fn gen( })) } - loop { - match pop_pool_result(&pool) { - Some((func_name, instructions)) => { - CompErr::from_io_res(writeln!(w, "{}:", func_name))?; - for instruction in instructions { - CompErr::from_io_res(writeln!(w, " {}", instruction))?; - } - }, - None => break, + while let Some((func_name, instructions)) = pop_pool_result(&pool) { + CompErr::from_io_res(writeln!(w, "{}:", func_name))?; + for instruction in instructions { + CompErr::from_io_res(writeln!(w, " {}", instruction))?; } } @@ -1548,7 +1564,8 @@ fn gen( } // Might be redundant. TODO: Double check!!! - for (func_name, instructions) in guard.results.iter() { + let guard_iter = guard.results.iter(); + for (func_name, instructions) in guard_iter { CompErr::from_io_res(writeln!(w, "{}:", func_name))?; for instruction in instructions { CompErr::from_io_res(writeln!(w, " {}", instruction))?; @@ -1557,9 +1574,7 @@ fn gen( Ok(()) } -fn pop_pool_result( - pool: &Arc<(Mutex, Condvar)> -) -> Option<(String, Vec)> { +fn pop_pool_result(pool: &Arc<(Mutex, Condvar)>) -> Option<(String, Vec)> { let (mutex, cvar) = pool.as_ref(); let mut guard = mutex.lock().unwrap(); @@ -1569,9 +1584,7 @@ fn pop_pool_result( guard.results.pop() } -fn unpool_function( - pool: &Arc<(Mutex, Condvar)> -) -> Option<(usize, RSFunction)> { +fn unpool_function(pool: &Arc<(Mutex, Condvar)>) -> Option<(usize, RSFunction)> { let mut guard = pool.0.lock().unwrap(); let func_id = guard.functions.len(); @@ -1579,18 +1592,15 @@ fn unpool_function( return None; } - match guard.functions.pop() { - Some(fun) => { - guard.running_fibers += 1; - Some((func_id, fun)) - }, - None => None, - } + guard.functions.pop().map(|fun| { + guard.running_fibers += 1; + (func_id, fun) + }) } fn codegen_fiber( global_scope: Arc>, - pool: Arc<(Mutex, Condvar)> + pool: Arc<(Mutex, Condvar)>, ) { loop { match unpool_function(&pool) { @@ -1598,12 +1608,12 @@ fn codegen_fiber( let mut c = FunContext { global_scope: &global_scope, fun_scope: HashMap::new(), - block_vars: vec!(), + block_vars: vec![], local_var_locs: HashMap::new(), labels: HashMap::new(), - func_id: func_id, + func_id, label_counter: 0, - break_dest_stack: vec!(), + break_dest_stack: vec![], }; match gen_fun(&mut c, &fun) { @@ -1613,20 +1623,20 @@ fn codegen_fiber( guard.results.push((fun.name, instructions)); guard.running_fibers -= 1; cvar.notify_all(); - }, + } Err(err) => { let (mutex, cvar) = pool.as_ref(); let mut guard = mutex.lock().unwrap(); guard.errors.push(err); guard.running_fibers -= 1; cvar.notify_all(); - }, + } } - }, + } None => { pool.1.notify_all(); return; - }, + } } } } @@ -1637,12 +1647,12 @@ pub fn generate(parse_result: ParseResult, writer: &mut dyn Write) { parse_result.functions, parse_result.variables, parse_result.defines, - writer + writer, ) { - Ok(_) => {}, + Ok(_) => {} Err(err) => { print_comp_error(&parse_result.file_paths, &err); std::process::exit(1); - }, + } } } diff --git a/src/main.rs b/src/main.rs index b725a36..0c70e1b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,15 +5,15 @@ mod parser; mod tokenizer; mod util; +use std::collections::hash_map::DefaultHasher; +use std::env; use std::fs; +use std::hash::{Hash, Hasher}; use std::io; -use std::env; use std::process::{Command, Stdio}; -use std::collections::hash_map::DefaultHasher; -use std::hash::{Hash, Hasher}; -use parser::*; use codegen::generate; +use parser::*; struct Opts { asm: bool, @@ -41,7 +41,7 @@ fn main() { } else { match opts.output { Some(output) => output, - None => "a.out".to_string(), + None => "a.out".to_string(), } }; @@ -57,7 +57,7 @@ fn main() { std::process::exit(match prog_status.code() { Some(code) => code, - _ => 1, + _ => 1, }); } } @@ -77,22 +77,21 @@ fn compile(input_paths: &Vec, output_path: &String) { .spawn() .expect("Failed running the GNU Assembler"); - let parse_result = parse_or_die(&input_paths); + let parse_result = parse_or_die(input_paths); // Stream the assembly code straight into GNU assembler generate(parse_result, &mut as_process.stdin.as_ref().unwrap()); match as_process.wait() { - Ok(status) => if !status.success() { - let code = match status.code() { - Some(code) => code, - None => 1, - }; - std::process::exit(code); - }, + Ok(status) => { + if !status.success() { + let code = status.code().unwrap_or(1); + std::process::exit(code); + } + } Err(message) => { println!("Failed running GNU Assembler: {}", message); std::process::exit(1); - }, + } } let ld_status = Command::new("ld") @@ -103,10 +102,7 @@ fn compile(input_paths: &Vec, output_path: &String) { .expect("Failed running GNU Linker"); if !ld_status.success() { - let code = match ld_status.code() { - Some(code) => code, - None => 1, - }; + let code = ld_status.code().unwrap_or(1); std::process::exit(code); } else { fs::remove_file(tmp_obj_path).unwrap(); @@ -121,8 +117,8 @@ fn parse_opts() -> Opts { asm: false, run: false, output: None, - inputs: vec!(), - args: vec!(), + inputs: vec![], + args: vec![], }; let mut i = 1; @@ -140,13 +136,13 @@ fn parse_opts() -> Opts { "-h" | "--help" => { print_usage(&name); std::process::exit(0); - }, + } "-s" => { opts.asm = true; - }, + } "-r" => { opts.run = true; - }, + } "-o" => { if i + 1 >= args.len() { print_usage(&name); @@ -154,11 +150,9 @@ fn parse_opts() -> Opts { } i += 1; opts.output = Some(args[i].clone()); - }, + } "--" => pass_through = true, - input => { - opts.inputs.push(input.to_string()) - }, + input => opts.inputs.push(input.to_string()), } i += 1; @@ -184,7 +178,7 @@ fn parse_or_die(inputs: &Vec) -> ParseResult { let parse_result = parse_files(inputs); for err in &parse_result.errors { - print_comp_error(&parse_result.file_paths, &err); + print_comp_error(&parse_result.file_paths, err); } if !parse_result.errors.is_empty() { std::process::exit(1); diff --git a/src/memory.rs b/src/memory.rs index fda79c7..454dde9 100644 --- a/src/memory.rs +++ b/src/memory.rs @@ -1,7 +1,7 @@ use std::fmt; #[allow(dead_code)] -#[derive(Clone, Copy, PartialEq)] +#[derive(Clone, Copy, Eq, PartialEq)] pub enum Reg { Rax = 0b0000000000000001, Rbx = 0b0000000000000010, @@ -11,8 +11,8 @@ pub enum Reg { Rsi = 0b0000000000100000, Rbp = 0b0000000001000000, Rsp = 0b0000000010000000, - R8 = 0b0000000100000000, - R9 = 0b0000001000000000, + R8 = 0b0000000100000000, + R9 = 0b0000001000000000, R10 = 0b0000010000000000, R11 = 0b0000100000000000, R12 = 0b0001000000000000, @@ -32,8 +32,8 @@ impl Reg { Reg::Rsi => "sil", Reg::Rbp => "bpl", Reg::Rsp => "spl", - Reg::R8 => "r8b", - Reg::R9 => "r9b", + Reg::R8 => "r8b", + Reg::R9 => "r9b", Reg::R10 => "r10b", Reg::R11 => "r11b", Reg::R12 => "r12b", @@ -100,7 +100,7 @@ impl Reg { // Efficiently represents a set of registers #[derive(Clone)] pub struct RegSet { - bitmask: u16 + bitmask: u16, } impl RegSet { @@ -109,19 +109,27 @@ impl RegSet { } pub fn of(reg: Reg) -> RegSet { - RegSet { bitmask: reg as u16 } + RegSet { + bitmask: reg as u16, + } } pub fn usable_caller_save() -> RegSet { let registers = [ - Reg::Rax, Reg::Rcx, Reg::Rdx, Reg::Rdi, - Reg::Rsi, Reg::R8, Reg::R9, Reg::R10, + Reg::Rax, + Reg::Rcx, + Reg::Rdx, + Reg::Rdi, + Reg::Rsi, + Reg::R8, + Reg::R9, + Reg::R10, Reg::R11, ]; let mut mask = 0; for reg in registers { - mask = mask | (reg as u16); + mask |= reg as u16; } RegSet { bitmask: mask } @@ -133,17 +141,21 @@ impl RegSet { pub fn union(&self, other: RegSet) -> RegSet { RegSet { - bitmask: self.bitmask | other.bitmask + bitmask: self.bitmask | other.bitmask, } } pub fn subtract(&self, other: &RegSet) -> RegSet { - RegSet { bitmask: self.bitmask & !other.bitmask } + RegSet { + bitmask: self.bitmask & !other.bitmask, + } } pub fn with(&self, reg: Reg) -> RegSet { let reg_mask = reg as u16; - RegSet { bitmask: self.bitmask | reg_mask } + RegSet { + bitmask: self.bitmask | reg_mask, + } } pub fn first(&self) -> Option { @@ -157,7 +169,7 @@ impl RegSet { } // Where values are located -#[derive(Clone, PartialEq)] +#[derive(Clone, PartialEq, Eq)] pub enum Loc { // Stack position relative to %rbp // +1 means the return address, +2 means 7th arg, +3 means 8th, ... @@ -173,19 +185,12 @@ pub enum Loc { impl Loc { pub fn is_reg(&self) -> bool { - match self { - Loc::Register(_) => true, - _ => false, - } + matches!(self, Loc::Register(_)) } #[allow(dead_code)] pub fn is_mem(&self) -> bool { - match self { - Loc::Stack(_) => true, - Loc::Data(_) => true, - _ => false, - } + matches!(self, Loc::Stack(_) | Loc::Data(_)) } } @@ -200,8 +205,8 @@ impl fmt::Display for Reg { Reg::Rsi => write!(f, "rsi"), Reg::Rbp => write!(f, "rbp"), Reg::Rsp => write!(f, "rsp"), - Reg::R8 => write!(f, "r8"), - Reg::R9 => write!(f, "r9"), + Reg::R8 => write!(f, "r8"), + Reg::R9 => write!(f, "r9"), Reg::R10 => write!(f, "r10"), Reg::R11 => write!(f, "r11"), Reg::R12 => write!(f, "r12"), @@ -221,10 +226,10 @@ impl fmt::Debug for Reg { impl fmt::Display for Loc { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Loc::Stack(offset) => write!(f, "{}(%rbp)", 8 * offset), - Loc::Register(reg) => write!(f, "%{}", reg), + Loc::Stack(offset) => write!(f, "{}(%rbp)", 8 * offset), + Loc::Register(reg) => write!(f, "%{}", reg), Loc::Immediate(value) => write!(f, "${}", value), - Loc::Data(name) => write!(f, "{}(%rip)", name), + Loc::Data(name) => write!(f, "{}(%rip)", name), } } } diff --git a/src/parser.rs b/src/parser.rs index 9cc135c..242d574 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,7 +1,7 @@ -use std::sync::Condvar; -use crate::util::logical_cpu_count; use crate::ast::*; use crate::tokenizer::*; +use crate::util::logical_cpu_count; +use std::sync::Condvar; use std::collections::HashSet; use std::ops::DerefMut; @@ -24,12 +24,12 @@ pub struct ParseResult { impl ParseResult { fn new() -> ParseResult { ParseResult { - file_paths: vec!(), - strings: vec!(), - functions: vec!(), - variables: vec!(), - defines: vec!(), - errors: vec!(), + file_paths: vec![], + strings: vec![], + functions: vec![], + variables: vec![], + defines: vec![], + errors: vec![], } } } @@ -48,7 +48,7 @@ impl ParseState { fn new() -> ParseState { ParseState { result: ParseResult::new(), - parse_stack: vec!(), + parse_stack: vec![], parsed_set: HashSet::new(), running_parsers: 0, file_id: 0, @@ -91,23 +91,18 @@ impl ParseContext<'_> { } } -fn parse_root_var( - c: &mut ParseContext, name: String -) -> Result { +fn parse_root_var(c: &mut ParseContext, name: String) -> Result { let pos = c.pos(); let var = parse_var_entry(c, name)?; parse_tok(c, Token::Semicolon)?; - Ok(RSVariable{ - pos: pos, - var: var, - }) + Ok(RSVariable { pos, var }) } // Packs the given chars into 64 bit wide chars. // Return value will always be null terminated. // Expects all chars to be valid! pub fn pack_chars(chars: &Vec) -> Vec { - let mut values = vec!(); + let mut values = vec![]; let mut i = 0; while i < chars.len() { @@ -128,20 +123,20 @@ pub fn pack_chars(chars: &Vec) -> Vec { } fn parse_vec_values(c: &mut ParseContext) -> Result, CompErr> { - let mut values = vec!(); + let mut values = vec![]; match pop_tok(c)? { (_, Token::Int(value)) => { values.push(value); - }, + } (_, Token::Char(chars)) => { values.push(pack_chars(&chars)[0]); - }, + } (_, Token::Str(chars)) => return Ok(pack_chars(&chars)), other => { push_tok(c, other); return Ok(values); - }, + } }; // At this point, we're starting at comma loop { @@ -150,88 +145,74 @@ fn parse_vec_values(c: &mut ParseContext) -> Result, CompErr> { match pop_tok(c)? { (_, Token::Int(value)) => { values.push(value); - }, + } (_, Token::Char(chars)) => { values.push(pack_chars(&chars)[0]); - }, + } other => { // Unfortunately, B has ambiguous grammar... // So we're forced to push 2 tokens :( push_tok(c, (comma_pos, Token::Comma)); push_tok(c, other); break; - }, + } } - }, + } other => { push_tok(c, other); break; - }, + } } } - return Ok(values); + Ok(values) } fn parse_var_entry(c: &mut ParseContext, name: String) -> Result { match pop_tok(c)? { - (_, Token::LBracket) => { - let given_vec_size = match pop_tok(c)? { - (_, Token::Int(max_index)) if max_index >= 0 => { - parse_tok(c, Token::RBracket)?; - max_index + 1 - }, - (_, Token::RBracket) => 0, - (pos, other) => { - return CompErr::err(&pos, format!( - "Expected positive int. Found {:?}", other)); - }, - }; - let vec_values = parse_vec_values(c)?; - // According to the B spec, we choose the max of these two values for - // the catual vector size - let vec_size = std::cmp::max(given_vec_size, vec_values.len() as i64); - Ok(Var::Vec(name, vec_size, vec_values)) - }, - (_, Token::Int(value)) => { - Ok(Var::Single(name, Some(value))) - }, - (_, Token::Char(chars)) => { - Ok(Var::Single(name, Some(pack_chars(&chars)[0]))) - }, + (_, Token::LBracket) => { + let given_vec_size = match pop_tok(c)? { + (_, Token::Int(max_index)) if max_index >= 0 => { + parse_tok(c, Token::RBracket)?; + max_index + 1 + } + (_, Token::RBracket) => 0, + (pos, other) => { + return CompErr::err(&pos, format!("Expected positive int. Found {:?}", other)); + } + }; + let vec_values = parse_vec_values(c)?; + // According to the B spec, we choose the max of these two values for + // the catual vector size + let vec_size = std::cmp::max(given_vec_size, vec_values.len() as i64); + Ok(Var::Vec(name, vec_size, vec_values)) + } + (_, Token::Int(value)) => Ok(Var::Single(name, Some(value))), + (_, Token::Char(chars)) => Ok(Var::Single(name, Some(pack_chars(&chars)[0]))), other => { push_tok(c, other); Ok(Var::Single(name, None)) - }, + } } } // Expects the #import token to have been parsed -fn parse_import( - c: &mut ParseContext -) -> Result { +fn parse_import(c: &mut ParseContext) -> Result { match pop_tok(c)? { (pos, Token::Str(chars)) => { let path: String = chars.into_iter().collect(); parse_tok(c, Token::Semicolon)?; - Ok(RSImport { - pos: pos, - path: path, - }) - }, - (pos, tok) => CompErr::err(&pos, format!( - "String expected. {:?} given", tok)), + Ok(RSImport { pos, path }) + } + (pos, tok) => CompErr::err(&pos, format!("String expected. {:?} given", tok)), } } // Expects the #define token to have been parsed -fn parse_define( - c: &mut ParseContext -) -> Result { +fn parse_define(c: &mut ParseContext) -> Result { let pos = c.pos(); let name = match pop_tok(c)? { (_, Token::Id(id)) => id, - (pos, tok) => return CompErr::err(&pos, format!( - "ID expected. {:?} given", tok)), + (pos, tok) => return CompErr::err(&pos, format!("ID expected. {:?} given", tok)), }; // Handles cases with ambiguous parens @@ -252,7 +233,7 @@ fn parse_define( let args = if should_parse_args { parse_args(c)? } else { - vec!() + vec![] }; let body = parse_expr(c)?; parse_tok(c, Token::Semicolon)?; @@ -265,9 +246,7 @@ fn parse_define( }) } -fn parse_args( - c: &mut ParseContext -) -> Result, CompErr> { +fn parse_args(c: &mut ParseContext) -> Result, CompErr> { parse_tok(c, Token::LParen)?; let mut args = Vec::::new(); @@ -282,21 +261,18 @@ fn parse_args( Token::RParen => break, Token::Id(id) => { if !should_parse_param { - return CompErr::err( - &pos, "Comma expected, id found".to_string()); + return CompErr::err(&pos, "Comma expected, id found".to_string()); } args.push(id); should_parse_param = false; - }, + } Token::Comma => { if should_parse_param { - return CompErr::err( - &pos, "id expected, comma found".to_string()); + return CompErr::err(&pos, "id expected, comma found".to_string()); } should_parse_param = true; - }, - other => return CompErr::err( - &pos, format!("Unexpected token: {:?}", other)), + } + other => return CompErr::err(&pos, format!("Unexpected token: {:?}", other)), } } @@ -304,9 +280,7 @@ fn parse_args( } // Parses everything after the name of a function -fn parse_fun( - c: &mut ParseContext, pos: Pos, name: String -) -> Result { +fn parse_fun(c: &mut ParseContext, pos: Pos, name: String) -> Result { Ok(RSFunction { pos, name, @@ -319,29 +293,27 @@ fn parse_statement(c: &mut ParseContext) -> Result { let (pos, tok) = pop_tok(c)?; match tok { - Token::Return => parse_statement_return(c), - Token::Break => parse_statement_break(c, pos), - Token::LBrace => parse_statement_block(c), - Token::Auto => parse_statement_auto(c, pos), - Token::Extern => parse_statement_extern(c, pos), - Token::If => parse_statement_if(c), - Token::While => parse_statement_while(c), - Token::Switch => parse_statement_switch(c), - Token::Semicolon => Ok(Statement::Null), + Token::Return => parse_statement_return(c), + Token::Break => parse_statement_break(c, pos), + Token::LBrace => parse_statement_block(c), + Token::Auto => parse_statement_auto(c, pos), + Token::Extern => parse_statement_extern(c, pos), + Token::If => parse_statement_if(c), + Token::While => parse_statement_while(c), + Token::Switch => parse_statement_switch(c), + Token::Semicolon => Ok(Statement::Null), Token::Label(name) => Ok(Statement::Label(pos, name)), - Token::Goto => parse_statement_goto(c), + Token::Goto => parse_statement_goto(c), tok => { push_tok(c, (pos, tok)); parse_statement_expr(c) - }, + } } } // TODO: This loop delim technique is used in multiple places. Abstract away! // Expects opening `extrn` to have been parsed -fn parse_statement_extern( - c: &mut ParseContext, pos: Pos -) -> Result { +fn parse_statement_extern(c: &mut ParseContext, pos: Pos) -> Result { let mut ids = Vec::::new(); let mut should_parse_param = true; @@ -352,31 +324,26 @@ fn parse_statement_extern( Token::Semicolon => break, Token::Id(id) => { if !should_parse_param { - return CompErr::err( - &pos, "Comma expected, id found".to_string()); + return CompErr::err(&pos, "Comma expected, id found".to_string()); } ids.push(id); should_parse_param = false; - }, + } Token::Comma => { if should_parse_param { - return CompErr::err( - &pos, "id expected, comma found".to_string()); + return CompErr::err(&pos, "id expected, comma found".to_string()); } should_parse_param = true; - }, - other => return CompErr::err( - &pos, format!("Unexpected token: {:?}", other)), + } + other => return CompErr::err(&pos, format!("Unexpected token: {:?}", other)), } } Ok(Statement::Extern(pos, ids)) } // Expects opening `auto` to have been parsed -fn parse_statement_auto( - c: &mut ParseContext, pos: Pos -) -> Result { +fn parse_statement_auto(c: &mut ParseContext, pos: Pos) -> Result { let mut vars = Vec::::new(); let mut should_parse_param = true; @@ -386,22 +353,19 @@ fn parse_statement_auto( Token::Semicolon => break, Token::Id(id) => { if !should_parse_param { - return CompErr::err( - &pos, "Comma expected, id found".to_string()); + return CompErr::err(&pos, "Comma expected, id found".to_string()); } vars.push(parse_var_entry(c, id)?); should_parse_param = false; - }, + } Token::Comma => { if should_parse_param { - return CompErr::err( - &pos, "id expected, comma found".to_string()); + return CompErr::err(&pos, "id expected, comma found".to_string()); } should_parse_param = true; - }, - other => return CompErr::err( - &pos, format!("Unexpected token: {:?}", other)), + } + other => return CompErr::err(&pos, format!("Unexpected token: {:?}", other)), } } Ok(Statement::Auto(pos, vars)) @@ -420,14 +384,10 @@ fn parse_statement_if(c: &mut ParseContext) -> Result { other => { push_tok(c, other); None - }, + } }; - Ok(Statement::If( - cond_expr, - Box::new(if_body), - else_body - )) + Ok(Statement::If(cond_expr, Box::new(if_body), else_body)) } // Expect "goto" to have been parsed already @@ -436,7 +396,7 @@ fn parse_statement_goto(c: &mut ParseContext) -> Result { (pos, Token::Id(name)) => { parse_tok(c, Token::Semicolon)?; Ok(Statement::Goto(pos, name)) - }, + } (pos, _) => CompErr::err(&pos, "Expected ID".to_string()), } } @@ -449,28 +409,32 @@ fn parse_statement_switch(c: &mut ParseContext) -> Result { // As per the B spec, switch bodies must always be compound statements parse_tok(c, Token::LBrace)?; - let mut inner_statements = vec!(); + let mut inner_statements = vec![]; loop { let inner = match pop_tok(c)? { (pos, Token::Default) => { parse_tok(c, Token::Colon)?; SwInner::Default(pos) - }, + } (pos, Token::Case) => { let value = match pop_tok(c)? { - (_, Token::Int(value)) => value, + (_, Token::Int(value)) => value, (_, Token::Char(chars)) => pack_chars(&chars)[0], - (pos, tok) => return CompErr::err( - &pos, format!("Int or char expected, {:?} given.", tok)), + (pos, tok) => { + return CompErr::err( + &pos, + format!("Int or char expected, {:?} given.", tok), + ) + } }; parse_tok(c, Token::Colon)?; SwInner::Case(pos, value) - }, + } (_, Token::RBrace) => break, other => { push_tok(c, other); SwInner::Statement(parse_statement(c)?) - }, + } }; inner_statements.push(inner); } @@ -484,10 +448,7 @@ fn parse_statement_while(c: &mut ParseContext) -> Result { parse_tok(c, Token::RParen)?; let body = parse_statement(c)?; - Ok(Statement::While( - cond_expr, - Box::new(body) - )) + Ok(Statement::While(cond_expr, Box::new(body))) } // Expects opening `{` to have been parsed @@ -500,7 +461,7 @@ fn parse_statement_block(c: &mut ParseContext) -> Result { other => { push_tok(c, other); statements.push(parse_statement(c)?); - }, + } } } @@ -508,9 +469,7 @@ fn parse_statement_block(c: &mut ParseContext) -> Result { } // Expects the `break` keyword to have been parsed already -fn parse_statement_break( - c: &mut ParseContext, pos: Pos -) -> Result { +fn parse_statement_break(c: &mut ParseContext, pos: Pos) -> Result { parse_tok(c, Token::Semicolon)?; Ok(Statement::Break(pos)) } @@ -518,10 +477,9 @@ fn parse_statement_break( // Expects the `return` keyword to have been parsed already fn parse_statement_return(c: &mut ParseContext) -> Result { match pop_tok(c)? { - (_, Token::LParen) => {}, + (_, Token::LParen) => {} (_, Token::Semicolon) => return Ok(Statement::Return), - (pos, _) => return CompErr::err(&pos, format!( - "Expected ( or ; after return statment")), + (pos, _) => return CompErr::err(&pos, "Expected ( or ; after return statment".to_owned()), } let expr = parse_expr(c)?; @@ -544,15 +502,15 @@ fn parse_expr(c: &mut ParseContext) -> Result { const COND_EXPR_PRECEDENCE: u8 = 2; fn get_lr_op_precedence(op: &BinOp) -> u8 { match op { - BinOp::Div | BinOp::Mod | BinOp::Mul => 10, - BinOp::Add | BinOp::Sub => 9, - BinOp::ShiftLeft | BinOp::ShiftRight => 8, - BinOp::Gt | BinOp::Lt | BinOp::Ge | BinOp::Le => 7, - BinOp::Eq | BinOp::Ne => 6, - BinOp::And => 5, - BinOp::Xor => 4, - BinOp::Or => 3, - BinOp::Assign(_) => 1, + BinOp::Div | BinOp::Mod | BinOp::Mul => 10, + BinOp::Add | BinOp::Sub => 9, + BinOp::ShiftLeft | BinOp::ShiftRight => 8, + BinOp::Gt | BinOp::Lt | BinOp::Ge | BinOp::Le => 7, + BinOp::Eq | BinOp::Ne => 6, + BinOp::And => 5, + BinOp::Xor => 4, + BinOp::Or => 3, + BinOp::Assign(_) => 1, } } @@ -560,9 +518,7 @@ fn get_lr_op_precedence(op: &BinOp) -> u8 { * Tries parsing operators until the precedence value doesn't meet requirement. * In other words, it recurses, but doesn't consume lower priority ops. */ -fn parse_expr_prec( - c: &mut ParseContext, precedence: u8 -) -> Result { +fn parse_expr_prec(c: &mut ParseContext, precedence: u8) -> Result { let unchained = parse_expr_unchained(c)?; let mut expr = parse_postfix(c, unchained)?; loop { @@ -576,7 +532,7 @@ fn parse_expr_prec( push_tok(c, (pos, tok)); return Ok(expr); } - }, + } (pos, Token::Question, _) if COND_EXPR_PRECEDENCE >= precedence => { let true_expr = parse_expr_prec(c, COND_EXPR_PRECEDENCE)?; parse_tok(c, Token::Colon)?; @@ -586,19 +542,21 @@ fn parse_expr_prec( pos, Box::new(expr), Box::new(true_expr), - Box::new(false_expr) + Box::new(false_expr), ) - }, + } (pos, tok, _) => { push_tok(c, (pos, tok)); - return Ok(expr) - }, + return Ok(expr); + } } } } fn join_assignment( - post_op: Option>, lhs_expr: Expr, rhs_expr: Expr + post_op: Option>, + lhs_expr: Expr, + rhs_expr: Expr, ) -> Result { match lhs_expr { Expr::Id(pos, id) => { @@ -607,41 +565,32 @@ fn join_assignment( pos.clone(), *post_op, Box::new(Expr::Id(pos.clone(), id.to_string())), - Box::new(rhs_expr) + Box::new(rhs_expr), ), - None => rhs_expr + None => rhs_expr, }; - Ok(Expr::Assignment( - pos, - id, - Box::new(rhs) - )) - }, + Ok(Expr::Assignment(pos, id, Box::new(rhs))) + } Expr::Dereference(pos, lhs) => { let rhs = match post_op { Some(post_op) => Expr::BinOperator( pos.clone(), *post_op, Box::new(Expr::Dereference(pos.clone(), lhs.clone())), - Box::new(rhs_expr) + Box::new(rhs_expr), ), - None => rhs_expr + None => rhs_expr, }; - Ok(Expr::DerefAssignment( - pos, - lhs, - Box::new(rhs) - )) - }, + Ok(Expr::DerefAssignment(pos, lhs, Box::new(rhs))) + } _ => CompErr::err( &lhs_expr.pos(), - "lhs of assignment must be ID or deref".to_string()), + "lhs of assignment must be ID or deref".to_string(), + ), } } -fn join_exprs( - op: BinOp, lhs: Expr, rhs: Expr -) -> Result { +fn join_exprs(op: BinOp, lhs: Expr, rhs: Expr) -> Result { if let BinOp::Assign(post_op) = op { join_assignment(post_op, lhs, rhs) } else { @@ -649,60 +598,56 @@ fn join_exprs( lhs.pos(), op, Box::new(lhs), - Box::new(rhs) + Box::new(rhs), )) } } // Always returns the next token // Optionally returns a binop if appropriate -fn parse_op( - c: &mut ParseContext -) -> Result<(Pos, Token, Option), CompErr> { +fn parse_op(c: &mut ParseContext) -> Result<(Pos, Token, Option), CompErr> { let (pos, tok) = pop_tok(c)?; let binop = match tok { - Token::EqEq => Some(BinOp::Eq), - Token::Eq => Some(BinOp::Assign(None)), + Token::EqEq => Some(BinOp::Eq), + Token::Eq => Some(BinOp::Assign(None)), Token::EqShiftRight => Some(BinOp::assign(BinOp::ShiftRight)), - Token::EqGe => Some(BinOp::assign(BinOp::Ge)), - Token::EqShiftLeft => Some(BinOp::assign(BinOp::ShiftLeft)), - Token::EqLe => Some(BinOp::assign(BinOp::Le)), - Token::EqNe => Some(BinOp::assign(BinOp::Ne)), - Token::EqEqEq => Some(BinOp::assign(BinOp::Eq)), - Token::EqPlus => Some(BinOp::assign(BinOp::Add)), - Token::EqMinus => Some(BinOp::assign(BinOp::Sub)), - Token::EqLt => Some(BinOp::assign(BinOp::Lt)), - Token::EqGt => Some(BinOp::assign(BinOp::Gt)), - Token::EqAmpersand => Some(BinOp::assign(BinOp::And)), - Token::EqPipe => Some(BinOp::assign(BinOp::Or)), - Token::EqCaret => Some(BinOp::assign(BinOp::Xor)), - Token::EqPercent => Some(BinOp::assign(BinOp::Mod)), - Token::EqSlash => Some(BinOp::assign(BinOp::Div)), - Token::EqAsterisk => Some(BinOp::assign(BinOp::Mul)), - Token::Plus => Some(BinOp::Add), - Token::Minus => Some(BinOp::Sub), - Token::Le => Some(BinOp::Le), - Token::Lt => Some(BinOp::Lt), - Token::Ge => Some(BinOp::Ge), - Token::Gt => Some(BinOp::Gt), - Token::Ne => Some(BinOp::Ne), - Token::ShiftLeft => Some(BinOp::ShiftLeft), - Token::ShiftRight => Some(BinOp::ShiftRight), - Token::Ampersand => Some(BinOp::And), - Token::Pipe => Some(BinOp::Or), - Token::Caret => Some(BinOp::Xor), - Token::Percent => Some(BinOp::Mod), - Token::Slash => Some(BinOp::Div), - Token::Asterisk => Some(BinOp::Mul), - _ => None, + Token::EqGe => Some(BinOp::assign(BinOp::Ge)), + Token::EqShiftLeft => Some(BinOp::assign(BinOp::ShiftLeft)), + Token::EqLe => Some(BinOp::assign(BinOp::Le)), + Token::EqNe => Some(BinOp::assign(BinOp::Ne)), + Token::EqEqEq => Some(BinOp::assign(BinOp::Eq)), + Token::EqPlus => Some(BinOp::assign(BinOp::Add)), + Token::EqMinus => Some(BinOp::assign(BinOp::Sub)), + Token::EqLt => Some(BinOp::assign(BinOp::Lt)), + Token::EqGt => Some(BinOp::assign(BinOp::Gt)), + Token::EqAmpersand => Some(BinOp::assign(BinOp::And)), + Token::EqPipe => Some(BinOp::assign(BinOp::Or)), + Token::EqCaret => Some(BinOp::assign(BinOp::Xor)), + Token::EqPercent => Some(BinOp::assign(BinOp::Mod)), + Token::EqSlash => Some(BinOp::assign(BinOp::Div)), + Token::EqAsterisk => Some(BinOp::assign(BinOp::Mul)), + Token::Plus => Some(BinOp::Add), + Token::Minus => Some(BinOp::Sub), + Token::Le => Some(BinOp::Le), + Token::Lt => Some(BinOp::Lt), + Token::Ge => Some(BinOp::Ge), + Token::Gt => Some(BinOp::Gt), + Token::Ne => Some(BinOp::Ne), + Token::ShiftLeft => Some(BinOp::ShiftLeft), + Token::ShiftRight => Some(BinOp::ShiftRight), + Token::Ampersand => Some(BinOp::And), + Token::Pipe => Some(BinOp::Or), + Token::Caret => Some(BinOp::Xor), + Token::Percent => Some(BinOp::Mod), + Token::Slash => Some(BinOp::Div), + Token::Asterisk => Some(BinOp::Mul), + _ => None, }; Ok((pos, tok, binop)) } -fn parse_expr_id_unchained( - c: &mut ParseContext, id: String -) -> Result { +fn parse_expr_id_unchained(c: &mut ParseContext, id: String) -> Result { let (pos, tok) = pop_tok(c)?; match tok { // Handle vector index sugar syntax @@ -710,7 +655,8 @@ fn parse_expr_id_unchained( let index_expr = parse_expr(c)?; parse_tok(c, Token::RBracket)?; - Ok(Expr::Dereference(pos.clone(), + Ok(Expr::Dereference( + pos.clone(), Box::new(Expr::BinOperator( pos.clone(), BinOp::Add, @@ -720,15 +666,15 @@ fn parse_expr_id_unchained( pos.clone(), BinOp::ShiftLeft, Box::new(index_expr), - Box::new(Expr::Int(pos, 3)) - )) - )) + Box::new(Expr::Int(pos, 3)), + )), + )), )) - }, + } tok => { push_tok(c, (pos.clone(), tok)); Ok(Expr::Id(pos, id)) - }, + } } } @@ -742,29 +688,39 @@ fn parse_expr_unchained(c: &mut ParseContext) -> Result { Token::Str(value) => { c.strings.push(value); Ok(Expr::Str(pos, (c.file_id, c.strings.len() - 1))) - }, + } Token::Ampersand => match pop_tok(c)? { (pos, Token::Id(id)) => Ok(Expr::Reference(pos, id)), (pos, tok) => CompErr::err(&pos, format!("Expected id, found {:?}", tok)), }, - Token::Asterisk => Ok(Expr::Dereference( - pos, Box::new(parse_expr_unchained(c)?))), + Token::Asterisk => Ok(Expr::Dereference(pos, Box::new(parse_expr_unchained(c)?))), Token::PlusPlus => Ok(Expr::UnaryOperator( - pos, UnaryOp::PreIncrement, Box::new(parse_expr_unchained(c)?))), + pos, + UnaryOp::PreIncrement, + Box::new(parse_expr_unchained(c)?), + )), Token::MinusMinus => Ok(Expr::UnaryOperator( - pos, UnaryOp::PreDecrement, Box::new(parse_expr_unchained(c)?))), + pos, + UnaryOp::PreDecrement, + Box::new(parse_expr_unchained(c)?), + )), Token::Minus => Ok(Expr::UnaryOperator( - pos, UnaryOp::Negate, Box::new(parse_expr_unchained(c)?))), + pos, + UnaryOp::Negate, + Box::new(parse_expr_unchained(c)?), + )), Token::Tilde => Ok(Expr::UnaryOperator( - pos, UnaryOp::BitNot, Box::new(parse_expr_unchained(c)?))), + pos, + UnaryOp::BitNot, + Box::new(parse_expr_unchained(c)?), + )), // Allow parens for disambiguation Token::LParen => { let expr = parse_expr(c)?; parse_tok(c, Token::RParen)?; Ok(expr) - }, - other => CompErr::err(&pos, format!( - "Expected expression. {:?} found", other)) + } + other => CompErr::err(&pos, format!("Expected expression. {:?} found", other)), } } @@ -774,25 +730,29 @@ fn parse_postfix(c: &mut ParseContext, expr: Expr) -> Result { Token::MinusMinus => { let next = parse_postfix(c, expr)?; Ok(Expr::UnaryOperator( - pos, UnaryOp::PostDecrement, Box::new(next))) - }, + pos, + UnaryOp::PostDecrement, + Box::new(next), + )) + } Token::PlusPlus => { let next = parse_postfix(c, expr)?; Ok(Expr::UnaryOperator( - pos, UnaryOp::PostIncrement, Box::new(next))) - }, + pos, + UnaryOp::PostIncrement, + Box::new(next), + )) + } Token::LParen => parse_expr_call(c, expr), _ => { push_tok(c, (pos, tok)); Ok(expr) - }, + } } } // Assumes the rparen has already been parsed -fn parse_expr_call( - c: &mut ParseContext, callee: Expr -) -> Result { +fn parse_expr_call(c: &mut ParseContext, callee: Expr) -> Result { let mut params = Vec::::new(); // To alternate between comma & arg parsing let mut should_parse_param = true; @@ -805,20 +765,18 @@ fn parse_expr_call( Token::RParen => break, Token::Comma => { if should_parse_param { - return CompErr::err( - &pos, "Expr expected, comma found".to_string()); + return CompErr::err(&pos, "Expr expected, comma found".to_string()); } should_parse_param = true; - }, + } tok => { push_tok(c, (pos.clone(), tok)); if !should_parse_param { - return CompErr::err( - &pos, "Comma expected".to_string()); + return CompErr::err(&pos, "Comma expected".to_string()); } params.push(parse_expr(c)?); should_parse_param = false; - }, + } } } @@ -835,8 +793,8 @@ fn get_parse_position(content: &Vec, offset: usize) -> (String, usize, usi let mut col = 0; let mut current_row_offset = 0; - for i in 0..offset { - if content[i] as char == '\n' { + for (i, item) in content.iter().enumerate().take(offset) { + if *item == '\n' { row += 1; col = 0; current_row_offset = i + 1; @@ -850,14 +808,12 @@ fn get_parse_position(content: &Vec, offset: usize) -> (String, usize, usi row_end += 1; } - let line: &String = &content[current_row_offset..row_end] - .into_iter() - .collect(); + let line: &String = &content[current_row_offset..row_end].iter().collect(); (line.to_string(), row, col) } -pub fn print_comp_error(file_paths: &Vec<(String, PathBuf)>, err: &CompErr) { +pub fn print_comp_error(file_paths: &[(String, PathBuf)], err: &CompErr) { println!("Compile error: {}", err.message); match &err.pos { Some(pos) => { @@ -865,8 +821,7 @@ pub fn print_comp_error(file_paths: &Vec<(String, PathBuf)>, err: &CompErr) { let content = std::fs::read_to_string(&path_buf).unwrap(); println!("In file: {}", file_name); - let (line, row, col) = get_parse_position( - &content.chars().collect(), pos.offset); + let (line, row, col) = get_parse_position(&content.chars().collect(), pos.offset); let prefix = format!("{} |", row); println!("{}{}", prefix, line); @@ -876,20 +831,21 @@ pub fn print_comp_error(file_paths: &Vec<(String, PathBuf)>, err: &CompErr) { } println!("^") - }, - None => {}, + } + None => {} } } fn parse_content( - file_id: usize, content: String + file_id: usize, + content: String, ) -> Result<(RootStatements, Vec>), CompErr> { let mut c = ParseContext { content: content.as_bytes(), offset: 0, file_id, - strings: vec!(), - tok_stack: vec!(), + strings: vec![], + tok_stack: vec![], }; let mut root_statements = RootStatements::new(); @@ -901,43 +857,36 @@ fn parse_content( match tok { Token::LParen => { push_tok(&mut c, (pos.clone(), Token::LParen)); - root_statements.functions.push( - parse_fun(&mut c, pos, id)?); - }, + root_statements.functions.push(parse_fun(&mut c, pos, id)?); + } tok => { push_tok(&mut c, (pos, tok)); - root_statements.variables.push( - parse_root_var(&mut c, id)?); - }, + root_statements.variables.push(parse_root_var(&mut c, id)?); + } } - }, - (_, Token::Import) => - root_statements.imports.push(parse_import(&mut c)?), - (_, Token::Define) => - root_statements.defines.push(parse_define(&mut c)?), + } + (_, Token::Import) => root_statements.imports.push(parse_import(&mut c)?), + (_, Token::Define) => root_statements.defines.push(parse_define(&mut c)?), (_, Token::Eof) => break, - (pos, tok) => return CompErr::err(&pos, format!( - "Expected id. {:?} found", tok)), + (pos, tok) => return CompErr::err(&pos, format!("Expected id. {:?} found", tok)), } } Ok((root_statements, c.strings)) } -fn relative_to_canonical_path( - base: &PathBuf, imp: &PathBuf -) -> Result { +fn relative_to_canonical_path(base: &Path, imp: &PathBuf) -> Result { if imp.is_absolute() { Ok(imp.clone()) } else { match base.parent() { Some(parent) => match parent.join(imp).canonicalize() { Ok(other) => Ok(other), - Err(err) => Err(CompErr { + Err(err) => Err(CompErr { pos: None, message: format!("Error importing {:?}: {}", imp, err), }), }, - None => Ok(imp.clone()), + None => Ok(imp.clone()), } } } @@ -945,15 +894,15 @@ fn relative_to_canonical_path( pub fn parse_files(paths: &Vec) -> ParseResult { let mut parse_state = ParseState::new(); for path in paths { + #[allow(clippy::expect_fun_call)] parse_state.push_path_to_parse( - Path::new(path).canonicalize() - .expect(format!("Invalid path: {}", path).as_str())); + Path::new(path) + .canonicalize() + .expect(format!("Invalid path: {}", path).as_str()), + ); } - let parse_state_arc = Arc::new(( - Mutex::new(parse_state), - Condvar::new() - )); + let parse_state_arc = Arc::new((Mutex::new(parse_state), Condvar::new())); let thread_count = logical_cpu_count(); let mut handles = Vec::with_capacity(thread_count); @@ -974,26 +923,18 @@ pub fn parse_files(paths: &Vec) -> ParseResult { let mut final_result = ParseResult::new(); std::mem::swap( &mut mutex.lock().unwrap().deref_mut().result, - &mut final_result + &mut final_result, ); final_result } -fn parse_fiber( - parse_state: Arc<(Mutex, Condvar)> -) { - loop { - match unpool_file_path(&parse_state) { - Some((file_id, path_buf)) => - parse_file(file_id, path_buf, &parse_state), - None => break, - } +fn parse_fiber(parse_state: Arc<(Mutex, Condvar)>) { + while let Some((file_id, path_buf)) = unpool_file_path(&parse_state) { + parse_file(file_id, path_buf, &parse_state); } } -fn unpool_file_path( - parse_state: &Arc<(Mutex, Condvar)> -) -> Option<(usize, PathBuf)> { +fn unpool_file_path(parse_state: &Arc<(Mutex, Condvar)>) -> Option<(usize, PathBuf)> { let (mutex, cvar) = parse_state.as_ref(); let mut guard = mutex.lock().ok()?; @@ -1002,20 +943,16 @@ fn unpool_file_path( if guard.running_parsers == 0 && guard.parse_stack.is_empty() { return None; } - match guard.pop_path_to_parse() { - res @ Some(_) => return res, - None => {}, + let res = guard.pop_path_to_parse(); + if res.is_some() { + return res; } guard = cvar.wait(guard).ok()?; } } // Parse a file, add to the parse state, and notify the cvar -fn parse_file( - file_id: usize, - path: PathBuf, - parse_state: &Arc<(Mutex, Condvar)> -) { +fn parse_file(file_id: usize, path: PathBuf, parse_state: &Arc<(Mutex, Condvar)>) { let path_str = path.to_str().unwrap().to_string(); let content = std::fs::read_to_string(&path).unwrap(); @@ -1024,7 +961,7 @@ fn parse_file( let parsed_content = parse_content(file_id, content); let mut guard = match mutex.lock() { Ok(guard) => guard, - _ => return, // Poison pill! + _ => return, // Poison pill! }; match parsed_content { @@ -1043,11 +980,11 @@ fn parse_file( guard.result.defines.append(&mut statements.defines); guard.result.file_paths.push((path_str, path)); guard.result.strings.push((file_id, strings)); - }, + } Err(error) => { guard.result.errors.push(error); guard.result.file_paths.push((path_str, path)); - }, + } } guard.running_parsers -= 1; cvar.notify_all(); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ed96795..76aa2bc 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,12 +1,12 @@ +use crate::ast::{CompErr, Pos}; use crate::parser::ParseContext; -use crate::ast::{Pos, CompErr}; #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))] use crate::util::lsb_number; #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))] use std::arch::x86_64::*; -#[derive(Debug, PartialEq)] +#[derive(Debug, Eq, PartialEq)] pub enum Token { Id(String), Label(String), @@ -57,12 +57,24 @@ pub enum Token { Ge, Ne, // For the slew of different assignment operators - Eq , EqEq , EqEqEq , - EqPlus , EqMinus , EqLe , - EqLt , EqGe , EqGt , - EqNe , EqShiftLeft, EqShiftRight, - EqAmpersand, EqPipe , EqCaret , - EqPercent , EqSlash , EqAsterisk , + Eq, + EqEq, + EqEqEq, + EqPlus, + EqMinus, + EqLe, + EqLt, + EqGe, + EqGt, + EqNe, + EqShiftLeft, + EqShiftRight, + EqAmpersand, + EqPipe, + EqCaret, + EqPercent, + EqSlash, + EqAsterisk, } // Returns false if it failed to parse the given token @@ -71,8 +83,10 @@ pub fn parse_tok(c: &mut ParseContext, expected: Token) -> Result<(), CompErr> { if expected == recieved { Ok(()) } else { - CompErr::err(&pos, format!( - "Expected {:?}, but {:?} was found", expected, recieved)) + CompErr::err( + &pos, + format!("Expected {:?}, but {:?} was found", expected, recieved), + ) } } @@ -80,7 +94,7 @@ pub fn parse_tok(c: &mut ParseContext, expected: Token) -> Result<(), CompErr> { // Returns Token::Eof for Eof (considered a valid token) pub fn pop_tok(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { match c.tok_stack.pop() { - None => {}, + None => {} Some(next) => return Ok(next), }; @@ -94,15 +108,15 @@ pub fn pop_tok(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { let ch = unsafe { *c.content.get_unchecked(c.offset) }; match ch as char { - '\'' => get_tok_char(c), - '\"' => get_tok_str(c), - '#' => get_tok_meta(c), + '\'' => get_tok_char(c), + '\"' => get_tok_str(c), + '#' => get_tok_meta(c), // Handle '=' differently because of the chaining rule - '=' => Ok(get_tok_equals(c)), + '=' => Ok(get_tok_equals(c)), '_' | 'a'..='z' | 'A'..='Z' => get_tok_word(c), - '1'..='9' => get_tok_int_decimal(c), - '0' => get_tok_int_octal(c), - _ => get_tok_symbol(c), + '1'..='9' => get_tok_int_decimal(c), + '0' => get_tok_int_octal(c), + _ => get_tok_symbol(c), } } @@ -138,18 +152,11 @@ fn get_tok_symbol(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { let pos = c.pos(); c.offset += 1; match unsafe { *c.content.get_unchecked(c.offset - 1) } as char { - '+' => multi_tok!(c, pos, Token::Plus, - '+', Token::PlusPlus), - '-' => multi_tok!(c, pos, Token::Minus, - '-', Token::MinusMinus), - '>' => multi_tok!(c, pos, Token::Gt, - '>', Token::ShiftRight, - '=', Token::Ge), - '<' => multi_tok!(c, pos, Token::Lt, - '<', Token::ShiftLeft, - '=', Token::Le), - '!' => multi_tok!(c, pos, Token::Bang, - '=', Token::Ne), + '+' => multi_tok!(c, pos, Token::Plus, '+', Token::PlusPlus), + '-' => multi_tok!(c, pos, Token::Minus, '-', Token::MinusMinus), + '>' => multi_tok!(c, pos, Token::Gt, '>', Token::ShiftRight, '=', Token::Ge), + '<' => multi_tok!(c, pos, Token::Lt, '<', Token::ShiftLeft, '=', Token::Le), + '!' => multi_tok!(c, pos, Token::Bang, '=', Token::Ne), '(' => Ok((pos, Token::LParen)), ')' => Ok((pos, Token::RParen)), '{' => Ok((pos, Token::LBrace)), @@ -178,20 +185,18 @@ fn get_tok_symbol(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { // Returns a metaprogramming token fn get_tok_meta(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { let pos = c.pos(); - let next_word = id_slice(&pos, &c.content, c.offset + 1)?; + let next_word = id_slice(&pos, c.content, c.offset + 1)?; match next_word { "import" => { c.offset += 1 + next_word.len(); Ok((c.pos(), Token::Import)) - }, + } "define" => { c.offset += 1 + next_word.len(); Ok((c.pos(), Token::Define)) - }, - other => { - CompErr::err(&pos, format!("Invalid token: #{}", other)) - }, + } + other => CompErr::err(&pos, format!("Invalid token: #{}", other)), } } @@ -200,8 +205,10 @@ fn get_tok_equals(c: &mut ParseContext) -> (Pos, Token) { // Peek at the next 2 chars let (c1, c2) = unsafe { if c.offset + 2 < c.content.len() { - (*c.content.get_unchecked(c.offset + 1), - *c.content.get_unchecked(c.offset + 2)) + ( + *c.content.get_unchecked(c.offset + 1), + *c.content.get_unchecked(c.offset + 2), + ) } else if c.offset + 1 < c.content.len() { (*c.content.get_unchecked(c.offset + 1), 0) } else { @@ -216,18 +223,18 @@ fn get_tok_equals(c: &mut ParseContext) -> (Pos, Token) { ('<', '=') => (3, Token::EqLe), ('!', '=') => (3, Token::EqNe), ('=', '=') => (3, Token::EqEqEq), - ('=', _) => (2, Token::EqEq), - ('+', _) => (2, Token::EqPlus), - ('-', _) => (2, Token::EqMinus), - ('<', _) => (2, Token::EqLt), - ('>', _) => (2, Token::EqGt), - ('&', _) => (2, Token::EqAmpersand), - ('|', _) => (2, Token::EqPipe), - ('^', _) => (2, Token::EqCaret), - ('%', _) => (2, Token::EqPercent), - ('/', _) => (2, Token::EqSlash), - ('*', _) => (2, Token::EqAsterisk), - _ => (1, Token::Eq), + ('=', _) => (2, Token::EqEq), + ('+', _) => (2, Token::EqPlus), + ('-', _) => (2, Token::EqMinus), + ('<', _) => (2, Token::EqLt), + ('>', _) => (2, Token::EqGt), + ('&', _) => (2, Token::EqAmpersand), + ('|', _) => (2, Token::EqPipe), + ('^', _) => (2, Token::EqCaret), + ('%', _) => (2, Token::EqPercent), + ('/', _) => (2, Token::EqSlash), + ('*', _) => (2, Token::EqAsterisk), + _ => (1, Token::Eq), }; let pos = c.pos(); @@ -235,25 +242,21 @@ fn get_tok_equals(c: &mut ParseContext) -> (Pos, Token) { (pos, tok) } -fn get_tok_int_octal( - c: &mut ParseContext -) -> Result<(Pos, Token), CompErr> { +fn get_tok_int_octal(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { let pos = c.pos(); - let current_word = id_slice(&pos, &c.content, c.offset)?; + let current_word = id_slice(&pos, c.content, c.offset)?; let mut value = 0; let mut significance = 1; for c in current_word.bytes().rev() { - if c > '7' as u8 || c < '0' as u8 { - return CompErr::err(&pos, format!( - "Invalid int literal: {}", current_word)); + if !(b'0'..=b'7').contains(&c) { + return CompErr::err(&pos, format!("Invalid int literal: {}", current_word)); } let x = c as i64 - '0' as i64; if value > i64::MAX - x * significance { - return CompErr::err(&pos, format!( - "Invalid int literal: {}", current_word)); + return CompErr::err(&pos, format!("Invalid int literal: {}", current_word)); } value += x * significance; @@ -263,25 +266,21 @@ fn get_tok_int_octal( Ok((pos, Token::Int(value))) } -fn get_tok_int_decimal( - c: &mut ParseContext -) -> Result<(Pos, Token), CompErr> { +fn get_tok_int_decimal(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { let pos = c.pos(); - let current_word = id_slice(&pos, &c.content, c.offset)?; + let current_word = id_slice(&pos, c.content, c.offset)?; let mut value = 0; let mut significance = 1; for c in current_word.bytes().rev() { - if c > '9' as u8 || c < '0' as u8 { - return CompErr::err(&pos, format!( - "Invalid int literal: {}", current_word)); + if !(b'0'..=b'9').contains(&c) { + return CompErr::err(&pos, format!("Invalid int literal: {}", current_word)); } let x = c as i64 - '0' as i64; if value > i64::MAX - x * significance { - return CompErr::err(&pos, format!( - "Invalid int literal: {}", current_word)); + return CompErr::err(&pos, format!("Invalid int literal: {}", current_word)); } value += x * significance; @@ -294,9 +293,7 @@ fn get_tok_int_decimal( fn get_tok_str(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { let pos = c.pos(); c.offset += 1; - let values = unsafe { - get_inside_quotes(c, '\"')? - }; + let values = unsafe { get_inside_quotes(c, '\"')? }; c.offset += 1; Ok((pos, Token::Str(values))) } @@ -308,9 +305,7 @@ fn get_tok_str(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { fn get_tok_char(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { let pos = c.pos(); c.offset += 1; - let chars = unsafe { - get_inside_quotes(c, '\'')? - }; + let chars = unsafe { get_inside_quotes(c, '\'')? }; if chars.len() > 8 { CompErr::err(&pos, "A wide char may be at most 8 bytes".to_string()) @@ -321,11 +316,9 @@ fn get_tok_char(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { } // Gets chars enclosed in the given terminal character -unsafe fn get_inside_quotes( - c: &mut ParseContext, terminal: char -) -> Result, CompErr> { +unsafe fn get_inside_quotes(c: &mut ParseContext, terminal: char) -> Result, CompErr> { let mut i = c.offset; - let mut chars = vec!(); + let mut chars = vec![]; while i < c.content.len() && *c.content.get_unchecked(i) as char != terminal { let chr = match *c.content.get_unchecked(i) as char { @@ -333,35 +326,35 @@ unsafe fn get_inside_quotes( i += 1; // Hit EOF while parsing char if i >= c.content.len() { - return CompErr::err( - &c.pos(), "Hit EOF while parsing char".to_string()); + return CompErr::err(&c.pos(), "Hit EOF while parsing char".to_string()); } match *c.content.get_unchecked(i) as char { - '*' => '*', - 'n' => '\n', - 'e' => 4 as char, // EOT - '0' => '\0', - 't' => '\t', + '*' => '*', + 'n' => '\n', + 'e' => 4 as char, // EOT + '0' => '\0', + 't' => '\t', '\'' => '\'', '\"' => '\"', // For compliance with the B manual // These aren't ever necessary in code compiled with b64 - '{' => '{', - '}' => '}', - other => return CompErr::err( - &Pos::new(i, c.file_id), - format!("Unknown char/str escape char: {}", other)), + '{' => '{', + '}' => '}', + other => { + return CompErr::err( + &Pos::new(i, c.file_id), + format!("Unknown char/str escape char: {}", other), + ) + } } - }, + } chr => { let ichar = chr as i64; - if ichar >= 256 || ichar < 0 { - return CompErr::err( - &c.pos(), - "b64 only supports ASCII chars".to_string()); + if !(0..256).contains(&ichar) { + return CompErr::err(&c.pos(), "b64 only supports ASCII chars".to_string()); } chr - }, + } }; i += 1; chars.push(chr); @@ -374,40 +367,38 @@ unsafe fn get_inside_quotes( // Parsed word-like tokens. Includes keywords and IDs fn get_tok_word(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { let pos = c.pos(); - let slice = id_slice(&pos, &c.content, c.offset)?; + let slice = id_slice(&pos, c.content, c.offset)?; c.offset += slice.len(); // Safe to assume it's valid utf8 since we enforce ASCII let tok = match slice { - "auto" => Token::Auto, - "break" => Token::Break, - "case" => Token::Case, + "auto" => Token::Auto, + "break" => Token::Break, + "case" => Token::Case, "default" => Token::Default, - "else" => Token::Else, - "extrn" => Token::Extern, - "goto" => Token::Goto, - "if" => Token::If, - "return" => Token::Return, - "switch" => Token::Switch, - "while" => Token::While, + "else" => Token::Else, + "extrn" => Token::Extern, + "goto" => Token::Goto, + "if" => Token::If, + "return" => Token::Return, + "switch" => Token::Switch, + "while" => Token::While, word => { let name: String = word.to_string(); if c.offset >= c.content.len() { Token::Id(name) } else { - let ch = unsafe { - *c.content.get_unchecked(c.offset) - }; + let ch = unsafe { *c.content.get_unchecked(c.offset) }; - if ch == ':' as u8 { + if ch == b':' { c.offset += 1; Token::Label(name) } else { Token::Id(name) } } - }, + } }; Ok((pos, tok)) @@ -418,9 +409,7 @@ fn get_tok_word(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { * @return An empty slice if the offset is out of bounds, * or if there are no alphanumeric characters at that position */ -fn id_slice<'a>( - pos: &Pos, slice: &'a [u8], offset: usize -) -> Result<&'a str, CompErr> { +fn id_slice<'a>(pos: &Pos, slice: &'a [u8], offset: usize) -> Result<&'a str, CompErr> { let len = id_len(slice, offset); if len == usize::MAX { @@ -429,26 +418,23 @@ fn id_slice<'a>( unsafe { Ok(std::str::from_utf8_unchecked( - slice.get_unchecked(offset..offset + len))) + slice.get_unchecked(offset..offset + len), + )) } } /// Returns usize::MAX if there are invalid ASCII characters -fn id_len( - slice: &[u8], offset: usize -) -> usize { +fn id_len(slice: &[u8], offset: usize) -> usize { unsafe { #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))] - return simd_id_len( slice, offset); + return simd_id_len(slice, offset); #[cfg(not(all(target_arch = "x86_64", target_feature = "avx2")))] return non_simd_id_len(slice, offset); } } -unsafe fn non_simd_id_len( - slice: &[u8], offset: usize -) -> usize { +unsafe fn non_simd_id_len(slice: &[u8], offset: usize) -> usize { let mut len = 0; while offset + len < slice.len() { @@ -466,14 +452,12 @@ unsafe fn non_simd_id_len( } fn is_alphanum_underscore(c: u8) -> bool { - (c >= 97 && c <= 122) | (c >= 65 && c <= 90) | (c >= 48 && c <= 57) | (c == 95) + (97..=122).contains(&c) | (65..=90).contains(&c) | (48..=57).contains(&c) | (c == 95) } #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))] #[allow(overflowing_literals)] -unsafe fn simd_id_len( - slice: &[u8], offset: usize -) -> usize { +unsafe fn simd_id_len(slice: &[u8], offset: usize) -> usize { let mut tail_offset = offset; let ascii_mask = _mm_set1_epi8(0b01111111); @@ -488,8 +472,7 @@ unsafe fn simd_id_len( let underscore_vec = _mm_set1_epi8('_' as i8); while tail_offset + 16 < slice.len() { - let mut values = _mm_loadu_si128( - slice.get_unchecked(tail_offset) as *const u8 as *const _); + let mut values = _mm_loadu_si128(slice.get_unchecked(tail_offset) as *const u8 as *const _); let only_ascii = _mm_movemask_epi8(_mm_cmpgt_epi8(values, ascii_mask)); if only_ascii != 0 { @@ -506,7 +489,7 @@ unsafe fn simd_id_len( _mm_or_si128( _mm_cmpgt_epi8(values, num_max_bound), _mm_cmpgt_epi8(num_min_bound, values), - ) + ), ); // Convert to lowercase @@ -518,7 +501,7 @@ unsafe fn simd_id_len( _mm_cmpgt_epi8(values, alpha_min_bound), _mm_cmpgt_epi8(alpha_max_bound, values), ), - result + result, ); // Compute bitmask of which values are 255 @@ -551,18 +534,14 @@ unsafe fn simd_consume_ws(c: &mut ParseContext) { let tab_nl_stat_vec = _mm_set1_epi8(0b00001111); while c.offset + 16 < c.content.len() { - let values = _mm_loadu_si128( - c.content.get_unchecked(c.offset) as *const u8 as *const _); + let values = _mm_loadu_si128(c.content.get_unchecked(c.offset) as *const u8 as *const _); // Values will be 255 if they're whitespace // andnot(a, b) does ((NOT a) AND b) let result = _mm_andnot_si128( _mm_cmpeq_epi8(values, space_vec), // In this case, gt is the same as neq - _mm_cmpgt_epi8( - _mm_and_si128(values, tab_nl_vec), - tab_nl_stat_vec - ) + _mm_cmpgt_epi8(_mm_and_si128(values, tab_nl_vec), tab_nl_stat_vec), ); // Compute bitmask of which values are 255 @@ -588,10 +567,12 @@ unsafe fn simd_consume_ws(c: &mut ParseContext) { unsafe fn non_simd_consume_ws(c: &mut ParseContext) { while c.offset < c.content.len() { match *c.content.get_unchecked(c.offset) as char { - ' ' | '\n' | '\t' => c.offset += 1, - '/' => if !consume_comment(c) { - break - }, + ' ' | '\n' | '\t' => c.offset += 1, + '/' => { + if !consume_comment(c) { + break; + } + } _ => break, } } @@ -634,14 +615,14 @@ fn consume_comment(c: &mut ParseContext) -> bool { let asterisk_vec = _mm256_set1_epi8('*' as i8); let slash_vec = _mm256_set1_epi8('/' as i8); while c.offset + 32 < c.content.len() { - let values = _mm256_loadu_si256( - c.content.get_unchecked(c.offset) as *const u8 as *const _); + let values = + _mm256_loadu_si256(c.content.get_unchecked(c.offset) as *const u8 as *const _); let asterisks = _mm256_cmpeq_epi8(values, asterisk_vec); - let slashes = _mm256_cmpeq_epi8(values, slash_vec); + let slashes = _mm256_cmpeq_epi8(values, slash_vec); let asterisk_mask = _mm256_movemask_epi8(asterisks) as u32; - let slash_mask = _mm256_movemask_epi8(slashes) as u32; + let slash_mask = _mm256_movemask_epi8(slashes) as u32; let mask = asterisk_mask & slash_mask.wrapping_shr(1); @@ -661,12 +642,10 @@ fn consume_comment(c: &mut ParseContext) -> bool { while c.offset < c.content.len() { one = two; - two = unsafe { - *c.content.get_unchecked(c.offset) - }; + two = unsafe { *c.content.get_unchecked(c.offset) }; c.offset += 1; - if one == '*' as u8 && two == '/' as u8 { + if one == b'*' && two == b'/' { break; } }