Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement string parser #24

Merged
merged 10 commits into from
Feb 4, 2024
7 changes: 7 additions & 0 deletions parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,12 @@ categories = ["parsing", "cel"]
lalrpop-util = { version = "0.19.1", features = ["lexer"] }
regex = "1.4.2"

[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }

[build-dependencies]
lalrpop = { version = "0.19.1", features = ["lexer"] }

[[bench]]
name = "runtime"
harness = false
19 changes: 19 additions & 0 deletions parser/benches/runtime.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use cel_parser::parse_string;
use criterion::{black_box, criterion_group, criterion_main, Criterion};

pub fn parse_string_benchmark(c: &mut Criterion) {
let expressions = vec![
("text", "\"text\""),
("raw", "r\"text\""),
("single unicode escape sequence", "\"\\U0001f431\""),
("single hex escape sequence", "\"\\x0D\""),
("single oct escape sequence", "\"\\015\""),
];

for (name, expr) in black_box(&expressions) {
c.bench_function(name, |b| b.iter(|| parse_string(expr)));
}
}

criterion_group!(benches, parse_string_benchmark);
criterion_main!(benches);
17 changes: 14 additions & 3 deletions parser/src/cel.lalrpop
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{RelationOp, ArithmeticOp, Expression, UnaryOp, Member, Atom};
use crate::{RelationOp, ArithmeticOp, Expression, UnaryOp, Member, Atom, parse_string};
use std::sync::Arc;

grammar;
Expand Down Expand Up @@ -138,11 +138,22 @@ Atom: Atom = {
// the LALRPOP parser.

// Double quoted string
r#""(\\.|[^"\n])*""# => Atom::String(Arc::from(<>[1..<>.len()-1].to_string())),
// I used ChatGPT to come up with this pattern and the explanation is as follows:
// 1. `"`: Match the opening double quote.
// 2. `([^"\\]*(?:\\.[^"\\]*)*)`: This is the main part of the regex which matches the content inside the double quotes.
// a. `[^"\\]*`: Match any sequence of characters that are neither a double quote nor a backslash.
// b. `(?:\\.[^"\\]*)*`: This part matches an escaped character followed by any sequence of characters that are
// neither a double quote nor a backslash. It uses a non-capturing group (?:...) to repeat the pattern.
// This handles sequences like \", \\, or any other escaped character.
// 3. `"`: Match the closing double quote.
r#""([^"\\]*(?:\\.[^"\\]*)*)""# => Atom::String(parse_string(<>).unwrap().into()),
r#"[rR]"([^"\\]*(?:\\.[^"\\]*)*)""# => Atom::String(parse_string(<>).unwrap().into()),
// r#""""(\\.|[^"{3}])*""""# => Atom::String(<>.to_string().into()),

// Single quoted string
r#"'(\\.|[^'\n])*'"# => Atom::String(Arc::from(<>[1..<>.len()-1].to_string())),
// Uses similar regex as above, but replace double quote with a single one
r#"'([^'\\]*(?:\\.[^'\\]*)*)'"# => Atom::String(parse_string(<>).unwrap().into()),
r#"[rR]'([^'\\]*(?:\\.[^'\\]*)*)'"# => Atom::String(parse_string(<>).unwrap().into()),
// r#"'''(\\.|[^'{3}])*'''"# => Atom::String(<>.to_string().into()),

// Double quoted bytes
Expand Down
3 changes: 3 additions & 0 deletions parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ use lalrpop_util::lalrpop_mod;
pub mod ast;
pub use ast::*;

pub mod parse;
pub use parse::*;

use std::fmt::Display;

lalrpop_mod!(#[allow(clippy::all)] pub parser, "/cel.rs");
Expand Down
Loading
Loading