Skip to content

Commit

Permalink
add test for large select
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Feb 5, 2025
1 parent b5ab086 commit cdb3e83
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 18 deletions.
6 changes: 6 additions & 0 deletions parser/src/earley/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ impl XorShift {
x
}

pub fn from_range(&mut self, r: Range<usize>) -> usize {
assert!(r.start < r.end);
assert!(r.end < std::u32::MAX as usize);
r.start + (self.next() as usize) % (r.end - r.start)
}

pub fn one_in(&mut self, n: u32) -> bool {
self.next() % n == 0
}
Expand Down
13 changes: 11 additions & 2 deletions parser/src/factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,27 @@ impl ParserFactory {
}

pub fn create_parser(&self, grammar: TopLevelGrammar) -> Result<TokenParser> {
self.create_parser_ext(grammar, self.buffer_log_level)
self.create_parser_ext2(grammar, self.buffer_log_level, self.stderr_log_level)
}

pub fn create_parser_ext(
&self,
grammar: TopLevelGrammar,
buffer_log_level: u32,
) -> Result<TokenParser> {
self.create_parser_ext2(grammar, buffer_log_level, self.stderr_log_level)
}

pub fn create_parser_ext2(
&self,
grammar: TopLevelGrammar,
buffer_log_level: u32,
stderr_log_level: u32,
) -> Result<TokenParser> {
let mut parser = TokenParser::from_llguidance_json(
self.tok_env.clone(),
grammar,
Logger::new(buffer_log_level, self.stderr_log_level),
Logger::new(buffer_log_level, stderr_log_level),
self.inference_caps.clone(),
self.limits.clone(),
self.extra_lexemes(),
Expand Down
103 changes: 87 additions & 16 deletions sample_parser/tests/test_lark.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
use anyhow::Result;
use llguidance::{api::TopLevelGrammar, TokenParser};
use llguidance::{api::TopLevelGrammar, earley::XorShift, TokenParser};
use sample_parser::*;

fn make_parser(lark: &str) -> Result<TokenParser> {
fn make_parser(lark: &str, quiet: bool) -> Result<TokenParser> {
let grm = TopLevelGrammar::from_lark(lark.to_string());
let mut parser = get_parser_factory().create_parser(grm)?;
let mut parser = get_parser_factory().create_parser_ext2(
grm,
if quiet { 0 } else { 2 },
if quiet { 1 } else { 2 },
)?;
parser.start_without_prompt();
Ok(parser)
}
Expand All @@ -15,14 +19,14 @@ fn consume(parser: &mut TokenParser, tok: u32) {
}

fn lark_ok(lark: &str) {
match make_parser(lark) {
match make_parser(lark, false) {
Err(e) => panic!("unexpected error: {}, grm:\n{}", e, lark),
Ok(_) => {}
}
}

fn lark_err_test(lark: &str, err: &str) {
match make_parser(lark) {
match make_parser(lark, false) {
Err(e) => {
let e = format!("{}", e);
if !e.contains(err) {
Expand All @@ -36,17 +40,21 @@ fn lark_err_test(lark: &str, err: &str) {
}
}

fn lark_str_test(lark: &str, should_accept: bool, s: &str) {
fn lark_str_test(lark: &str, should_accept: bool, s: &str, quiet: bool) {
let trie = get_tok_env().tok_trie();
let tokens = get_tok_env().tokenize(s);
println!(
"\n\ntokens: {}, accpt={}\ngrm:\n{}\n",
trie.tokens_dbg(&tokens),
should_accept,
lark
);
if !quiet {
println!(
"\n\ntokens: {}, accpt={}\ngrm:\n{}\n",
trie.tokens_dbg(&tokens),
should_accept,
lark
);
}

let mut p = make_parser(lark).unwrap();
// let t0 = std::time::Instant::now();
let mut p = make_parser(lark, quiet).unwrap();
// println!("make_parser: {:?}", t0.elapsed());

for tok in tokens.iter() {
let m = p.compute_mask().unwrap();
Expand All @@ -70,15 +78,23 @@ fn lark_str_test(lark: &str, should_accept: bool, s: &str) {
}
}

fn lark_str_test_many(lark: &str, passing: &[&str], failing: &[&str]) {
fn lark_str_test_many_ext(quiet: bool, lark: &str, passing: &[&str], failing: &[&str]) {
for s in passing {
lark_str_test(lark, true, s);
lark_str_test(lark, true, s, quiet);
}
for s in failing {
lark_str_test(lark, false, s);
lark_str_test(lark, false, s, quiet);
}
}

fn lark_str_test_many(lark: &str, passing: &[&str], failing: &[&str]) {
lark_str_test_many_ext(false, lark, passing, failing);
}

fn lark_str_test_many_quiet(lark: &str, passing: &[&str], failing: &[&str]) {
lark_str_test_many_ext(true, lark, passing, failing);
}

#[test]
fn test_dot_unicode() {
lark_str_test_many(
Expand Down Expand Up @@ -476,3 +492,58 @@ fn test_lexeme_substring_words_unicode() {
&["른 갈색 여우", "여우가 게으", "갈색 여가"],
);
}

fn gen_words(seed: u32, num_words: usize) -> String {
let letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,.";
let mut rnd = XorShift::new(seed + 1);
let mut words = vec![];
let num_words = rnd.from_range((num_words / 2)..num_words);
for _ in 0..num_words {
let mut word = String::new();
let len = rnd.from_range(1..15);
for _ in 0..len {
let idx = rnd.from_range(0..letters.len());
word.push(letters.as_bytes()[idx as usize] as char);
}
words.push(word);
}
words.join(" ")
}

fn quote_str(s: &str) -> String {
serde_json::to_string(s).unwrap()
}

#[test]
fn test_large_select() {
let num_words = 500;
// it's kind of slow in non-release mode
let num_opt = if cfg!(debug_assertions) { 100 } else { 1500 };

let t0 = std::time::Instant::now();
let mut grm_sz = 0;

for start in &["start: OPTS\nOPTS: ", "start: opts\nopts: "] {
let mut grm_head = start.to_string();
let mut grm_tail = "".to_string();
let options = (0..num_opt)
.map(|i| gen_words(i, num_words))
.collect::<Vec<_>>();
for (i, opt) in options.iter().enumerate() {
grm_head.push_str(&format!("OPT{} | ", i));
grm_tail.push_str(&format!("OPT{}: {}\n", i, quote_str(opt)));
}
grm_head.push_str(" \"\"\n");
let grm = format!("{}{}", grm_head, grm_tail);
grm_sz = grm.len();

lark_str_test_many_quiet(
&grm,
//&options.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
&[&options[2].as_str(), &options[7].as_str()],
&["something that is unlikely to be in the options"],
);
}

println!("large_select: {:?}; grm={}kB", t0.elapsed(), grm_sz / 1024);
}
1 change: 1 addition & 0 deletions scripts/test-guidance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ if [ "$TEST_RUST" = 1 ] ; then

cargo build --locked
cargo test
cargo test --release

echo "Running sample_parser"
(cd sample_parser && ./run.sh >/dev/null)
Expand Down

0 comments on commit cdb3e83

Please sign in to comment.