From b0bb8461562c71bc4d6062db73444636ebb14667 Mon Sep 17 00:00:00 2001 From: Brian Thorne Date: Mon, 28 Aug 2023 11:39:28 +1200 Subject: [PATCH 1/5] Refactor unary operations Revised unary operations in AST, reduced to 'Not' & 'Neg' from 'Not', 'DoubleNot', 'Minus' & 'DoubleMinus', for simplifying the codebase. --- interpreter/src/objects.rs | 9 +-------- parser/src/ast.rs | 4 +--- parser/src/cel.lalrpop | 12 +++++------- parser/src/lib.rs | 13 +++++++++---- 4 files changed, 16 insertions(+), 22 deletions(-) diff --git a/interpreter/src/objects.rs b/interpreter/src/objects.rs index c5d5ca2..686ca89 100644 --- a/interpreter/src/objects.rs +++ b/interpreter/src/objects.rs @@ -357,18 +357,11 @@ impl<'a> Value { let expr = Value::resolve(expr, ctx)?; match op { UnaryOp::Not => Value::Bool(!expr.to_bool()), - UnaryOp::DoubleNot => Value::Bool(expr.to_bool()), - UnaryOp::Minus => match expr { + UnaryOp::Neg => match expr { Value::Int(i) => Value::Int(-i), Value::Float(i) => Value::Float(-i), _ => unimplemented!(), }, - UnaryOp::DoubleMinus => match expr { - Value::Int(_) => expr, - Value::UInt(_) => expr, - Value::Float(_) => expr, - _ => unimplemented!(), - }, } .into() } diff --git a/parser/src/ast.rs b/parser/src/ast.rs index 7dc5aad..b57c548 100644 --- a/parser/src/ast.rs +++ b/parser/src/ast.rs @@ -23,9 +23,7 @@ pub enum ArithmeticOp { #[derive(Debug, Eq, PartialEq, Clone)] pub enum UnaryOp { Not, - DoubleNot, - Minus, - DoubleMinus, + Neg, } #[derive(Debug, PartialEq, Clone)] diff --git a/parser/src/cel.lalrpop b/parser/src/cel.lalrpop index 55bfe56..bb159dd 100644 --- a/parser/src/cel.lalrpop +++ b/parser/src/cel.lalrpop @@ -74,9 +74,7 @@ ArithmeticOp: ArithmeticOp = { // (3) UnaryOp: UnaryOp = { "!" => UnaryOp::Not, - "!!" => UnaryOp::DoubleNot, - "-" => UnaryOp::Minus, - "--" => UnaryOp::DoubleMinus, + "-" => UnaryOp::Neg, } RelationOp: RelationOp = { @@ -91,10 +89,10 @@ RelationOp: RelationOp = { Atom: Atom = { // Integer literals. Annoying to parse :/ - r"-?[0-9]+" => Atom::Int(<>.parse().unwrap()), - r"-?0[xX]([0-9a-fA-F]+)" => Atom::Int(i64::from_str_radix(<>, 16).unwrap()), - r"-?[0-9]+ [uU]" => Atom::UInt(<>.parse().unwrap()), - r"-?0[xX]([0-9a-fA-F]+) [uU]" => Atom::UInt(u64::from_str_radix(<>, 16).unwrap()), + r"[0-9]+" => Atom::Int(<>.parse().unwrap()), + r"0[xX]([0-9a-fA-F]+)" => Atom::Int(i64::from_str_radix(<>, 16).unwrap()), + r"[0-9]+ [uU]" => Atom::UInt(<>.parse().unwrap()), + r"0[xX]([0-9a-fA-F]+) [uU]" => Atom::UInt(u64::from_str_radix(<>, 16).unwrap()), // Float with decimals and optional exponent r"([-+]?[0-9]*\.[0-9]+([eE][-+]?[0-9]+)?)" => Atom::Float(<>.parse().unwrap()), diff --git a/parser/src/lib.rs b/parser/src/lib.rs index d0a48aa..a6fbe5f 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -212,10 +212,15 @@ mod tests { #[test] fn test_parser_bool_unary_ops_repeated() { assert_eq!( - parse("!!true"), + parse("!(!true)"), // Parens to help LALRPOP (Unary( - UnaryOp::DoubleNot, - Box::new(Expression::Atom(Atom::Bool(true))), + UnaryOp::Not, + Box::new( + Expression::Unary( + UnaryOp::Not, + Box::new(Expression::Atom(Atom::Bool(true))) + ) + ), )) ); } @@ -224,7 +229,7 @@ mod tests { fn delimited_expressions() { assert_parse_eq( "(-((1)))", - Unary(UnaryOp::Minus, Box::new(Expression::Atom(Atom::Int(1)))), + Unary(UnaryOp::Neg, Box::new(Expression::Atom(Atom::Int(1)))), ); } From 4d9b601ccb94538fb8b5e025ae76ba2beb32b260 Mon Sep 17 00:00:00 2001 From: Brian Thorne Date: Mon, 28 Aug 2023 10:11:38 +1200 Subject: [PATCH 2/5] WIP parsing with Chumsky --- parser/Cargo.toml | 2 + parser/src/chumsky_parser.rs | 660 +++++++++++++++++++++++++++++++++++ parser/src/lib.rs | 1 + 3 files changed, 663 insertions(+) create mode 100644 parser/src/chumsky_parser.rs diff --git a/parser/Cargo.toml b/parser/Cargo.toml index c4f3638..86b4502 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -11,6 +11,8 @@ categories = ["parsing", "cel"] [dependencies] lalrpop-util = { version = "0.19.1", features = ["lexer"] } regex = "1.4.2" +ariadne = { version = "0.3.0", features = ["auto-color"] } +chumsky = "0.9.2" [build-dependencies] lalrpop = { version = "0.19.1", features = ["lexer"] } diff --git a/parser/src/chumsky_parser.rs b/parser/src/chumsky_parser.rs new file mode 100644 index 0000000..172c871 --- /dev/null +++ b/parser/src/chumsky_parser.rs @@ -0,0 +1,660 @@ +use crate::ast::{ArithmeticOp, Atom, Expression, Member, RelationOp, UnaryOp}; +use chumsky::prelude::*; +use chumsky::Parser; +use std::rc::Rc; + +fn boolean() -> impl Parser> { + just("true") + .to(true) + .or(just("false").to(false)) + .map(|b| Expression::Atom(Atom::Bool(b))) +} + +/// Parses floating point and integer numbers and returns them as [`Expr::Atom(Atom::Double(...))`] +/// or [`Expr::Atom(Atom::Int(...))`] types. The following formats are supported: +/// - `1` +/// - `1.` +/// - `1.0` +/// - `-1` +/// - `-1.0` +/// - `1e10` +/// - `1e-10` +/// - `1E10` +/// - `1E-10` +/// - `-1e10` +/// - `1u` +fn numbers() -> impl Parser> { + let digits = text::digits::>(10); + + let frac = just('.').chain::(digits.clone().or_not()); + + let exp = just('e') + .or(just('E')) + .chain::(one_of("+-").or_not()) + .chain::(digits.clone()); + + let float_or_int = text::int::>(10) + .chain::(frac.or_not().flatten()) + .chain::(exp.or_not().flatten()) + .try_map(|chars, span| { + let str = chars.into_iter().collect::(); + + if let Ok(i) = str.parse::() { + Ok(Expression::Atom(Atom::Int(i))) + } else if let Ok(f) = str.parse::() { + Ok(Expression::Atom(Atom::Float(f))) + } else { + Err(Simple::expected_input_found(span, None, None)) + } + }); + + let unsigned_integer = text::int::>(10) + .then_ignore(just('u')) + .map(|s: String| Expression::Atom(Atom::UInt(s.as_str().parse().unwrap()))); + + choice((unsigned_integer, float_or_int)) + .padded() + .labelled("number") +} + +fn str_inner( + delimiter: &str, + escaping: bool, +) -> impl Parser> + '_ { + let unicode = filter::<_, _, Simple>(|c: &char| c.is_ascii_hexdigit()) + .repeated() + .exactly(4) + .collect::() + .validate(|digits, span, emit| { + char::from_u32(u32::from_str_radix(&digits, 16).unwrap()).unwrap_or_else(|| { + emit(Simple::custom(span, "invalid unicode character")); + '\u{FFFD}' // unicode replacement character + }) + }); + + let hex_code_point = filter::<_, _, Simple>(|c: &char| c.is_ascii_hexdigit()) + .repeated() + .exactly(2) + .collect::() + .validate(|digits, span, emit| { + char::from_u32(u32::from_str_radix(&digits, 16).unwrap()).unwrap_or_else(|| { + emit(Simple::custom(span, "invalid unicode character")); + '\u{FFFD}' // unicode replacement character + }) + }); + + let octal_code_point = filter::<_, _, Simple>(|c: &char| c.is_ascii_digit()) + .repeated() + .exactly(3) + .collect::() + .validate(|digits, span, emit| { + char::from_u32(u32::from_str_radix(&digits, 8).unwrap()).unwrap_or_else(|| { + emit(Simple::custom(span, "invalid unicode character")); + '\u{FFFD}' // unicode replacement character + }) + }); + + let escape = just('\\').ignore_then(choice(( + just('\\'), + just('/'), + just('"'), + just('b').to('\x08'), + just('f').to('\x0C'), + just('n').to('\n'), + just('r').to('\r'), + just('t').to('\t'), + just('u').ignore_then(unicode), + just('x').or(just('X')).ignore_then(hex_code_point), + octal_code_point, + ))); + + let mut forbidden = just(delimiter).boxed(); + let mut inner_string = forbidden.not().boxed(); + + if escaping { + forbidden = just(delimiter).or(just("\\")).boxed(); + inner_string = forbidden.not().or(escape).boxed(); + } + + inner_string + .repeated() + .delimited_by(just(delimiter), just(delimiter)) + .collect::() +} + +fn bytes_inner(delimiter: &str) -> impl Parser, Error = Simple> + '_ { + let hex_code_point = filter::<_, _, Simple>(|c: &char| c.is_ascii_hexdigit()) + .repeated() + .exactly(2) + .collect::() + .validate(|digits, span, emit| { + u8::from_str_radix(&digits, 16).unwrap_or_else(|_| { + emit(Simple::custom(span, "invalid hexadecimal character")); + 0u8 + }) + }); + + let octal_code_point = filter::<_, _, Simple>(|c: &char| c.is_ascii_digit()) + .repeated() + .exactly(3) + .collect::() + .validate(|digits, span, emit| { + u8::from_str_radix(&digits, 8).unwrap_or_else(|_| { + emit(Simple::custom(span, "invalid octal code point")); + 0u8 + }) + }); + + let escape = just('\\') + .ignore_then(choice(( + just('\\').to(b'\\'), + just(delimiter).to(delimiter.as_bytes()[0]), + just('n').to(b'\n'), + just('a').to(b'\x07'), + just('b').to(b'\x08'), + just('f').to(b'\x0c'), + just('r').to(b'\r'), + just('t').to(b'\t'), + just('v').to(b'\x0b'), + just('x').or(just('X')).ignore_then(hex_code_point), + octal_code_point, + ))) + .map(|c: u8| vec![c]); + + let forbidden = just(delimiter).or(just("\\")).boxed(); + let not_forbidden = forbidden.not().map(|c: char| c.to_string().into_bytes()); + let inner_string = not_forbidden.or(escape).boxed(); + + inner_string + .repeated() + .delimited_by(just(delimiter), just(delimiter)) + .collect::>>() + .flatten() +} + +// Ref https://github.com/01mf02/jaq/blob/main/jaq-parse/src/token.rs +// See also https://github.com/PRQL/prql/blob/main/prql-compiler/src/parser/lexer.rs#L295-L354 +// A parser for strings; adapted from Chumsky's JSON example parser. +fn str_() -> impl Parser> { + let single_quoted_string = str_inner("'", true).labelled("single quoted string"); + + let double_quoted_string = str_inner("\"", true).labelled("double quoted string"); + + // Byte literals + let single_quoted_bytes = just("b") + .ignore_then(bytes_inner("'")) + .labelled("single quoted byte string"); + let double_quoted_bytes = just("b") + .ignore_then(bytes_inner("\"")) + .labelled("single quoted byte string"); + + // Raw strings don't interpret escape sequences. + + let single_quoted_raw_string = just("r") + .or(just("R")) + .ignore_then(str_inner("'", false)) + .labelled("single quoted raw string"); + + let double_quoted_raw_string = just("r") + .or(just("R")) + .ignore_then(str_inner("\"", false)) + .labelled("double quoted raw string"); + + let triple_single_quoted_raw_string = just("r") + .or(just("R")) + .ignore_then(str_inner("'''", false)) + .labelled("triple ' quoted string"); + + let triple_single_quoted_escaped_string = + str_inner("'''", true).labelled("triple ' quoted escaped string"); + + let triple_double_quoted_string = str_inner("\"\"\"", true).labelled("triple \" quoted string"); + + let strings = choice(( + triple_single_quoted_raw_string, + triple_single_quoted_escaped_string, + triple_double_quoted_string, + single_quoted_raw_string, + single_quoted_string, + double_quoted_raw_string, + double_quoted_string, + )) + .map(|s| Expression::Atom(Atom::String(s.into()))); + + let bytes = choice((single_quoted_bytes, double_quoted_bytes)) + .map(|b| Expression::Atom(Atom::Bytes(b.into()))); + + choice((strings, bytes)) +} + +pub fn parser() -> impl Parser> { + let ident = text::ident::>() + .padded() + .map(Expression::Ident) + .labelled("identifier"); + + let null = just("null") + .padded() + .map(|_| Expression::Atom(Atom::Null)) + .labelled("null"); + + let literal = choice((numbers(), boolean(), str_(), null)).labelled("literal"); + + let attribute_access = just('.').ignore_then(ident.clone()).map(|rhs| match rhs { + Expression::Ident(name) => Box::new(Member::Attribute(name)), + _ => panic!("Expected ident!"), + }); + + let expr = recursive(|expr| { + let expr_in_paren = expr.clone().delimited_by(just('('), just(')')); + + let expr_list = expr + .clone() + .padded() + .separated_by(just(',')) + .then_ignore(just(',').or_not()) + .collect::>(); + + let function_call = just('(') + .ignore_then(expr_list.clone()) + .then_ignore(just(')')) + .map(|args| Box::new(Member::FunctionCall(args))) + .labelled("function call"); + + let index_access = just('[') + .ignore_then(expr.clone()) + .then_ignore(just(']')) + .map(|arg: Expression| Box::new(Member::Index(Box::new(arg)))) + .labelled("index"); + + let list = expr_list + .clone() + // Ignore trailing comma + .delimited_by(just('['), just(']')) + .map(Expression::List) + .labelled("list"); + + let map_item = expr + .clone() + .then_ignore(just(':')) + .then(expr.clone()) + .padded() + .labelled("map item"); + + let map = map_item + .clone() + .separated_by(just(',')) + .delimited_by(just('{'), just('}')) + .padded() + .map(Expression::Map) + .labelled("map"); + + let field_identifier = text::ident::>() + .padded() + .map(|s| { + let ref_counted_field_id: Rc = Rc::from(s); + ref_counted_field_id + }) + .labelled("field identifier"); + + let field_item = field_identifier + .clone() + .then_ignore(just(':')) + .then(expr.clone()); + + let field_items = field_item + .clone() + .separated_by(just(',')) + .delimited_by(just('{'), just('}')) + .padded() + .labelled("field items"); + + let field_inits = ident + .clone() + .then(just('.').ignore_then(ident.clone()).repeated()) + .foldl(|lhs: Expression, rhs: Expression| { + // We convert the Ident Expressions to attribute member expressions except for the left most one + // Ident(A), Ident(B) -> Member(Ident(A), Attribute(B)) + // Member(Ident(A), Attribute(B)), Ident(C) -> Member(Member(Ident(A), Attribute(B)), Attribute(C)) + match rhs { + Expression::Ident(name) => Expression::Member( + Box::new(lhs), // LHS stays as an Ident Expression + Box::new(Member::Attribute(name)), + ), + _ => panic!("Expected ident!"), + } + }) + .then(field_items) + .map(|(lhs, items)| Expression::Member(Box::new(lhs), Box::new(Member::Fields(items)))); + + let primary = choice((literal, field_inits, ident, expr_in_paren, list, map)) + .labelled("primary") + .boxed(); + + let member_chain = primary + .clone() + .then( + choice(( + attribute_access.clone(), + function_call.clone(), + index_access.clone(), + )) + .repeated(), + ) + .map(|(lhs_expression, members)| { + members.into_iter().fold(lhs_expression, |acc, member| { + Expression::Member(Box::new(acc), member) + }) + }) + .labelled("member"); + + let member = choice((member_chain, primary.clone())); + + let op = |c| just::>(c).padded(); + + let unary_op = op('!').to(UnaryOp::Not).or(op('-').to(UnaryOp::Neg)); + + let not_or_negation = unary_op + .repeated() + .at_least(1) + .then(member.clone()) + .foldr(|op, rhs: Expression| Expression::Unary(op, Box::new(rhs))) + .labelled("unary"); + + let unary = choice((not_or_negation, member.clone())).padded(); + + let product_div_op = op('*') + .to(ArithmeticOp::Multiply) + .or(op('/').to(ArithmeticOp::Divide)) + .or(op('%').to(ArithmeticOp::Modulus)); + + let multiplication = unary + .clone() + .then(product_div_op.then(unary.clone()).repeated()) + .foldl(|lhs, (binary_op, rhs)| { + Expression::Arithmetic(Box::new(lhs), binary_op, Box::new(rhs)) + }) + .labelled("product_or_division"); + + let sum_sub_op = op('+') + .to(ArithmeticOp::Add) + .or(op('-').to(ArithmeticOp::Subtract)); + + let addition = multiplication + .clone() + .then(sum_sub_op.then(multiplication.clone()).repeated()) + .foldl(|lhs, (op, rhs)| Expression::Arithmetic(Box::new(lhs), op, Box::new(rhs))) + .labelled("sub_or_sub"); + + let relationship_op = just("==") + .to(RelationOp::Equals) + .or(just("!=").to(RelationOp::NotEquals)) + .or(just(">=").to(RelationOp::GreaterThanEq)) + .or(just("<=").to(RelationOp::LessThanEq)) + .or(just('>').to(RelationOp::GreaterThan)) + .or(just('<').to(RelationOp::LessThan)) + .or(just("in").to(RelationOp::In)); + + let relation = addition + .clone() + .then(relationship_op.then(addition.clone()).repeated()) + .foldl(|lhs, (op, rhs)| Expression::Relation(Box::new(lhs), op, Box::new(rhs))) + .labelled("comparison"); + + let conditional_and = relation + .clone() + .then(just("&&").then(relation.clone()).repeated()) + .foldl(|lhs, (_op, rhs)| Expression::And(Box::new(lhs), Box::new(rhs))) + .labelled("conditional and"); + + let conditional_or = conditional_and + .clone() + .then(just("||").then(conditional_and.clone()).repeated()) + .foldl(|lhs, (_op, rhs)| Expression::Or(Box::new(lhs), Box::new(rhs))) + .labelled("conditional or"); + + let ternary = conditional_or + .clone() + .then( + just("?") + .ignore_then(conditional_or.clone()) + .then_ignore(just(":")) + .then(conditional_or.clone()) + .or_not(), + ) + .map(|(condition, ternary)| match ternary { + Some((true_expression, false_expression)) => Expression::Ternary( + Box::new(condition), + Box::new(true_expression), + Box::new(false_expression), + ), + None => condition, + }) + .labelled("ternary"); + + ternary + }); + + expr.clone() + .padded() + .then_ignore(end()) + .labelled("expression") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_number_parser_unsigned_numbers() { + //let unsigned_integer = text::int::>(10).then_ignore(just('u')).map(|s: String| Expr::Atom(Atom::UInt(s.as_str().parse().unwrap()))); + //assert_eq!(unsigned_integer.parse("1u"), Ok(Expr::Atom(Atom::UInt(1)))); + assert_eq!(numbers().parse("1u"), Ok(Expression::Atom(Atom::UInt(1)))); + assert_eq!(numbers().parse("1up"), Ok(Expression::Atom(Atom::UInt(1)))); + } + + #[test] + fn test_number_parser_int() { + assert_eq!(numbers().parse("1"), Ok(Expression::Atom(Atom::Int(1)))); + + // Debatable if this should be allowed. Ref CEL Spec: + // https://github.com/google/cel-spec/blob/master/doc/langdef.md#numeric-values + // "negative integers are produced by the unary negation operator" + assert_eq!(numbers().parse("100"), Ok(Expression::Atom(Atom::Int(100)))); + } + + #[test] + fn test_boolean_parser_errors() { + assert!(boolean().parse("-true").is_err()); + assert!(boolean().parse("!1").is_err()); + } + + #[test] + fn test_str_inner_parser() { + // Taking the idea from + // REF: https://github.com/PRQL/prql/blob/main/prql-compiler/src/parser/lexer.rs#L295 + + let triple_single_quoted_escaped_string = + str_inner("'''", true).labelled("triple ' quoted escaped string"); + + assert_eq!( + triple_single_quoted_escaped_string.parse(r"''''''"), + Ok(String::from("")) + ); + assert_eq!( + triple_single_quoted_escaped_string.parse(r"'''hello'''"), + Ok(String::from("hello")) + ); + // Check triple quoted strings interpret escape sequences (note this is a rust raw string, not a CEL raw string) + assert_eq!( + triple_single_quoted_escaped_string.parse(r"'''\n'''"), + Ok(String::from("\n")) + ); + assert_eq!( + triple_single_quoted_escaped_string.parse(r"'''x''x'''"), + Ok(String::from("x''x")) + ); + assert_eq!( + triple_single_quoted_escaped_string.parse(r"''' '''"), + Ok(String::from(" ")) + ); + assert_eq!( + triple_single_quoted_escaped_string.parse(r"'''\xFF'''"), + Ok(String::from("ÿ")) + ); + assert_eq!( + triple_single_quoted_escaped_string.parse(r"'''\377'''"), + Ok(String::from("ÿ")) + ); + } + + #[test] + fn test_str_parser() { + assert_eq!( + str_().parse("'Hello!'"), + Ok(Expression::Atom(Atom::String( + String::from("Hello!").into() + ))) + ); + assert_eq!( + str_().parse("\"Hello!\""), + Ok(Expression::Atom(Atom::String( + String::from("Hello!").into() + ))) + ); + assert_eq!( + str_().parse("'\n'"), + Ok(Expression::Atom(Atom::String(String::from("\n").into()))) + ); + assert_eq!( + str_().parse(r"'\n'"), + Ok(Expression::Atom(Atom::String(String::from("\n").into()))) + ); + + assert_eq!( + str_().parse(r"'''hello'''"), + Ok(Expression::Atom(Atom::String(String::from("hello").into()))) + ); + // Check triple quoted strings interpret escape sequences (note this is a rust raw string, not a CEL raw string) + assert_eq!( + str_().parse(r"'''\n'''"), + Ok(Expression::Atom(Atom::String(String::from("\n").into()))) + ); + } + + #[test] + fn test_raw_str_parser() { + assert_eq!( + str_().parse(r"r'\n'"), + Ok(Expression::Atom(Atom::String(String::from("\\n").into()))) + ); + assert_eq!( + str_().parse(r"R'\n'"), + Ok(Expression::Atom(Atom::String(String::from("\\n").into()))) + ); + assert_eq!( + str_().parse("r'1'"), + Ok(Expression::Atom(Atom::String(String::from("1").into()))) + ); + assert_eq!( + str_().parse("r\"Hello!\""), + Ok(Expression::Atom(Atom::String( + String::from("Hello!").into() + ))) + ); + assert_eq!( + str_().parse("R\"Hello!\""), + Ok(Expression::Atom(Atom::String( + String::from("Hello!").into() + ))) + ); + assert_eq!( + str_().parse(r"r'''hello'''"), + Ok(Expression::Atom(Atom::String(String::from("hello").into()))) + ); + assert_eq!( + str_().parse(r"r'''\n'''"), + Ok(Expression::Atom(Atom::String(String::from("\\n").into()))) + ); + } + + #[test] + fn test_raw_bytes_simple() { + let expected: Vec = vec![97, 98, 99]; + + assert_eq!( + str_().parse(r"b'abc'"), + Ok(Expression::Atom(Atom::Bytes(expected.into()))) + ); + } + + #[test] + fn test_raw_bytes_escaped_newlines() { + let expected: Vec = vec![10]; + + assert_eq!( + str_().parse(r"b'\n'"), + Ok(Expression::Atom(Atom::Bytes(expected.into()))) + ); + } + + #[test] + fn test_raw_bytes_escaped_delimiter() { + let expected: Vec = vec![39]; + + assert_eq!( + str_().parse(r"b'\''"), + Ok(Expression::Atom(Atom::Bytes(expected.into()))) + ); + } + + #[test] + fn test_raw_bytes_unicode() { + let expected: Vec = vec![195, 191]; + + assert_eq!( + str_().parse(r"b'ÿ'"), + Ok(Expression::Atom(Atom::Bytes(expected.into()))) + ); + } + + #[test] + fn test_raw_bytes_invalid_utf8() { + let expected: Vec = vec![0, 255]; + + assert_eq!( + str_().parse(r"b'\000\xff'"), + Ok(Expression::Atom(Atom::Bytes(expected.into()))) + ); + } + + #[test] + fn test_raw_bytes_unicode_as_octal_escaped() { + let expected: Vec = vec![195, 191]; + + assert_eq!( + str_().parse(r"b'\303\277'"), + Ok(Expression::Atom(Atom::Bytes(expected.into()))) + ); + } + + #[test] + fn test_raw_bytes_single_octal() { + let expected = vec![0xffu8]; + + assert_eq!( + str_().parse(r"b'\377'"), + Ok(Expression::Atom(Atom::Bytes(expected.into()))) + ); + } + + #[test] + fn test_raw_bytes_single_hexadecimal() { + let expected = vec![0xffu8]; + + assert_eq!( + str_().parse(r"b'\xFF'"), + Ok(Expression::Atom(Atom::Bytes(expected.into()))) + ); + } +} diff --git a/parser/src/lib.rs b/parser/src/lib.rs index a6fbe5f..26797c2 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -3,6 +3,7 @@ use lalrpop_util::lalrpop_mod; pub mod ast; pub use ast::*; +mod chumsky_parser; use std::fmt::Display; lalrpop_mod!(#[allow(clippy::all)] pub parser, "/cel.rs"); From 0524054d8a718f0c2b2c45b356ae9920380e8c6d Mon Sep 17 00:00:00 2001 From: Brian Thorne Date: Mon, 28 Aug 2023 11:40:01 +1200 Subject: [PATCH 3/5] Enable chumsky parser --- parser/src/chumsky_parser.rs | 2 +- parser/src/lib.rs | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/parser/src/chumsky_parser.rs b/parser/src/chumsky_parser.rs index 172c871..ecf46f4 100644 --- a/parser/src/chumsky_parser.rs +++ b/parser/src/chumsky_parser.rs @@ -230,7 +230,7 @@ fn str_() -> impl Parser> { pub fn parser() -> impl Parser> { let ident = text::ident::>() .padded() - .map(Expression::Ident) + .map(|name| Expression::Ident(Rc::new(name))) .labelled("identifier"); let null = just("null") diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 26797c2..eea2af6 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -5,6 +5,7 @@ pub use ast::*; mod chumsky_parser; use std::fmt::Display; +use chumsky::Parser; lalrpop_mod!(#[allow(clippy::all)] pub parser, "/cel.rs"); @@ -31,20 +32,19 @@ pub fn parse(input: &str) -> Result { // Wrap the internal parser function - whether larlpop or chumsky // Example for a possible new chumsky based parser... - // parser().parse(input) - // .into_result() - // .map_err(|e| { - // ParseError { - // msg: e.iter().map(|e| format!("{}", e)).collect::>().join("\n") - // } - // }) + chumsky_parser::parser().parse(input) + .map_err(|e| { + ParseError { + msg: e.iter().map(|e| format!("{}", e)).collect::>().join("\n") + } + }) // Existing Larlpop Parser: - crate::parser::ExpressionParser::new() - .parse(input) - .map_err(|e| ParseError { - msg: format!("{}", e), - }) + // crate::parser::ExpressionParser::new() + // .parse(input) + // .map_err(|e| ParseError { + // msg: format!("{}", e), + // }) } #[cfg(test)] From 6a2881a10a081e8ab3fa00361281f78c4a8fb13b Mon Sep 17 00:00:00 2001 From: Brian Thorne Date: Mon, 28 Aug 2023 13:10:51 +1200 Subject: [PATCH 4/5] Fix float parser and add test cases --- parser/src/chumsky_parser.rs | 2 +- parser/src/lib.rs | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/parser/src/chumsky_parser.rs b/parser/src/chumsky_parser.rs index ecf46f4..a5190ea 100644 --- a/parser/src/chumsky_parser.rs +++ b/parser/src/chumsky_parser.rs @@ -26,7 +26,7 @@ fn boolean() -> impl Parser> { fn numbers() -> impl Parser> { let digits = text::digits::>(10); - let frac = just('.').chain::(digits.clone().or_not()); + let frac = just('.').chain::(digits.clone()); let exp = just('e') .or(just('E')) diff --git a/parser/src/lib.rs b/parser/src/lib.rs index eea2af6..0d12937 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -497,4 +497,18 @@ mod tests { ), ); } + + #[test] + fn test_primitive_function_call() { + assert_parse_eq( + "10.double()", + Member( + Box::new(Member( + Box::new(Expression::Atom(Int(10))), + Box::new(Attribute("double".to_string().into())), + )), + Box::new(FunctionCall(vec![])), + ) + ); + } } From f3210259a92012e951855f209619108186704d4e Mon Sep 17 00:00:00 2001 From: Brian Thorne Date: Mon, 28 Aug 2023 13:17:13 +1200 Subject: [PATCH 5/5] Update authors list --- parser/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser/Cargo.toml b/parser/Cargo.toml index 86b4502..21ee884 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -3,7 +3,7 @@ name = "cel-parser" description = "A parser for the Common Expression Language (CEL)" repository = "https://github.com/clarkmcc/cel-rust" version = "0.5.0" -authors = ["Tom Forbes ", "Clark McCauley "] +authors = ["Tom Forbes ", "Clark McCauley ", "Brian Thorne "] edition = "2018" license = "MIT" categories = ["parsing", "cel"]