Skip to content

Commit

Permalink
1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
akanalytics committed Jul 22, 2024
1 parent e54788d commit eb0b92a
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 0 deletions.
120 changes: 120 additions & 0 deletions crates/odonata-base/src/infra/quote.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/// based loosely on Splitty
/// https://github.com/Canop/splitty (license MIT)
pub struct QuoteParser<'t, F> {
text: &'t str,
strip_quotes: bool,
pat: F,
}

impl<'t, F> QuoteParser<'t, F>
where
F: FnMut(char) -> bool,
{
pub fn new(text: &'t str, pat: F) -> Self {
Self {
text,
strip_quotes: false,
pat,
}
}

pub fn without_quotes(mut self) -> Self {
self.strip_quotes = true;
self
}
}

#[inline(always)]
fn is_quote(c: char) -> bool {
c == '"' || c == '\''
}

impl<'t, F> Iterator for QuoteParser<'t, F>
where
F: FnMut(char) -> bool,
{
type Item = &'t str;

fn next(&mut self) -> Option<&'t str> {
// we ignore delimitors at the start
self.text = self.text.trim_start_matches(&mut self.pat);

let mut char_indices = self.text.char_indices();
let (_, _c) = char_indices.next()?;

let mut quote_mode = false;
for (i, c) in self.text.char_indices() {
if is_quote(c) {
quote_mode = !quote_mode;
}
if quote_mode {
continue;
}
if (self.pat)(c) {
let token = &self.text[..i];
self.text = &self.text[i..];
match self.strip_quotes {
true => return Some(token.trim_matches(is_quote)),
false => return Some(token),
}
}
}
// we havent found an ending delimiter
#[allow(clippy::overly_complex_bool_expr)]
let token = match self.strip_quotes {
true => self.text.trim_matches(is_quote),
false => self.text,
};
self.text = &self.text[0..0]; // flag ends
Some(token)
}
}

fn split_into_tags(text: &str) -> impl Iterator<Item = &str> {
QuoteParser::new(text, |c| c == ';')
}

#[cfg(test)]
mod tests {
use itertools::Itertools as _;
use test_log::test;

use super::*;

#[test]
fn test_quote_parser() {
let vec = QuoteParser::new(r#"bm e4"#, char::is_whitespace).collect_vec();
assert_eq!(vec, vec!["bm", "e4"]);

let vec = QuoteParser::new(r#"id "my name is bob""#, char::is_whitespace).collect_vec();
assert_eq!(vec, vec!["id", "\"my name is bob\""]);

let vec = QuoteParser::new(r#"id "my name is bob""#, char::is_whitespace)
.without_quotes()
.collect_vec();
assert_eq!(vec, vec!["id", "my name is bob"]);

let vec = QuoteParser::new(r#"id 'my name is bob'"#, char::is_whitespace).collect_vec();
assert_eq!(vec, vec!["id", "'my name is bob'"]);

let vec = split_into_tags(r#"cat"meo;w";"mouse";"toad;;;;;;" ;zebra;"#).collect_vec();
assert_eq!(vec, vec!["cat\"meo;w\"", "\"mouse\"", "\"toad;;;;;;\" ", "zebra"]);

let vec = split_into_tags(r#"cat'meo;w';'mouse';'toad;;;;;;' ;zebra;"#).collect_vec();
assert_eq!(vec, vec!["cat\'meo;w\'", "\'mouse\'", "\'toad;;;;;;\' ", "zebra"]);

let vec = split_into_tags(r#";cat;mouse;toad;;;;;;sheep;zebra"#).collect_vec();
assert_eq!(vec, vec!["cat", "mouse", "toad", "sheep", "zebra"]);

// OK, but not desirable (unmatched quote parsing)
let vec = split_into_tags(r#";ca't;mouse;"#).collect_vec();
assert_eq!(vec, vec!["ca't;mouse;"]);

let text = r#"id 'PIN.01'; c0 'Pins'; Sq c3 f6;"#;
let vec = split_into_tags(text).collect_vec();
assert_eq!(vec, vec!["id 'PIN.01'", " c0 'Pins'", " Sq c3 f6"]);


}
}
Binary file not shown.

0 comments on commit eb0b92a

Please sign in to comment.