diff --git a/CHANGES.md b/CHANGES.md index 5430091879..0e3d1873d5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,8 @@ ## Unreleased * Add new entries here +* Approximately 40% faster according to `cargo bench`. +* Some "numeric" characters like `¾` and `①` were being treated as digits. * Support `POINT EMPTY` in conversion to `geo_types`. Converts to `MultiPoint([])`. * diff --git a/Cargo.toml b/Cargo.toml index 3625affdf0..48ccfc7e62 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,4 +26,3 @@ default = ["geo-types"] [[bench]] name = "parse" harness = false - diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 96aa4521bc..83cce87201 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -29,19 +29,14 @@ where Word(String), } +#[inline] fn is_whitespace(c: char) -> bool { - match c { - '\n' | '\r' | '\t' | ' ' => true, - _ => false, - } + c == ' ' || c == '\n' || c == '\r' || c == '\t' } +#[inline] fn is_numberlike(c: char) -> bool { - match c { - c if c.is_numeric() => true, - '.' | '-' | '+' => true, - _ => false, - } + c == '.' || c == '-' || c == '+' || c.is_ascii_digit() } pub type PeekableTokens<'a, T> = Peekable>; @@ -66,7 +61,7 @@ where impl<'a, T> Iterator for Tokens<'a, T> where - T: WktFloat + str::FromStr + Default, + T: WktFloat + str::FromStr, { type Item = Token; @@ -85,56 +80,42 @@ where ')' => Some(Token::ParenClose), ',' => Some(Token::Comma), c if is_numberlike(c) => { - let number = c.to_string() + &self.read_until_whitespace().unwrap_or_default(); - match number.trim_start_matches('+').parse::() { + let number = self.read_until_whitespace(if c == '+' { None } else { Some(c) }); + match number.parse::() { Ok(parsed_num) => Some(Token::Number(parsed_num)), Err(_) => None, } } - c => { - let word = c.to_string() + &self.read_until_whitespace().unwrap_or_default(); - Some(Token::Word(word)) - } + c => Some(Token::Word(self.read_until_whitespace(Some(c)))), } } } impl<'a, T> Tokens<'a, T> where - T: WktFloat + str::FromStr + Default, + T: str::FromStr, { - fn read_until_whitespace(&mut self) -> Option { - let mut result = String::new(); + fn read_until_whitespace(&mut self, first_char: Option) -> String { + let mut result = String::with_capacity(12); // Big enough for most tokens + if let Some(c) = first_char { + result.push(c); + } while let Some(&next_char) = self.chars.peek() { - let marker = match next_char { - '\0' | '(' | ')' | ',' => true, - _ => false, - }; - - // Consume non-markers - if !marker { - let _ = self.chars.next(); - } - - let whitespace = is_whitespace(next_char); - - // Append non-whitespace, non-marker characters - if !marker && !whitespace { - result.push(next_char); - } - - // Stop reading when reached marker or whitespace - if marker || whitespace { - break; + match next_char { + '\0' | '(' | ')' | ',' => break, // Just stop on a marker + c if is_whitespace(c) => { + let _ = self.chars.next(); + break; + } + _ => { + result.push(next_char); + let _ = self.chars.next(); + } } } - if result.is_empty() { - None - } else { - Some(result) - } + result } } @@ -186,6 +167,13 @@ fn test_tokenizer_invalid_number() { assert_eq!(tokens, vec![]); } +#[test] +fn test_tokenizer_not_a_number() { + let test_str = "¾"; // A number according to char.is_numeric() + let tokens: Vec> = Tokens::from_str(test_str).collect(); + assert_eq!(tokens, vec![Token::Word("¾".to_owned())]); +} + #[test] fn test_tokenizer_2numbers() { let test_str = ".4 -2";