Merge georust#66

66: Approximately 40% faster, not all numeric chars are digits r=michaelkirk a=softdevca - [X] I agree to follow the project's [code of conduct](https://github.com/georust/geo/blob/master/CODE_OF_CONDUCT.md). - [X] I added an entry to `CHANGES.md` if knowledge of this change could be valuable to users. --- Co-authored-by: Sheldon Young <[email protected]>
nyurik · Mar 12, 2021 · 5d938ce · 5d938ce
2 parents aa202b3 + b4e0e5c
commit 5d938ce
Showing 3 changed files with 34 additions and 45 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -3,6 +3,8 @@
 ## Unreleased
 
 * Add new entries here
+* Approximately 40% faster according to `cargo bench`.
+* Some "numeric" characters like `¾` and `①` were being treated as digits.
 * Support `POINT EMPTY` in conversion to `geo_types`.
   Converts to `MultiPoint([])`.
   * <https://github.com/georust/wkt/pull/64>

diff --git a/Cargo.toml b/Cargo.toml
@@ -26,4 +26,3 @@ default = ["geo-types"]
 [[bench]]
 name = "parse"
 harness = false
-
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
@@ -29,19 +29,14 @@ where
     Word(String),
 }
 
+#[inline]
 fn is_whitespace(c: char) -> bool {
-    match c {
-        '\n' | '\r' | '\t' | ' ' => true,
-        _ => false,
-    }
+    c == ' ' || c == '\n' || c == '\r' || c == '\t'
 }
 
+#[inline]
 fn is_numberlike(c: char) -> bool {
-    match c {
-        c if c.is_numeric() => true,
-        '.' | '-' | '+' => true,
-        _ => false,
-    }
+    c == '.' || c == '-' || c == '+' || c.is_ascii_digit()
 }
 
 pub type PeekableTokens<'a, T> = Peekable<Tokens<'a, T>>;
@@ -66,7 +61,7 @@ where
 
 impl<'a, T> Iterator for Tokens<'a, T>
 where
-    T: WktFloat + str::FromStr + Default,
+    T: WktFloat + str::FromStr,
 {
     type Item = Token<T>;
 
@@ -85,56 +80,42 @@ where
             ')' => Some(Token::ParenClose),
             ',' => Some(Token::Comma),
             c if is_numberlike(c) => {
-                let number = c.to_string() + &self.read_until_whitespace().unwrap_or_default();
-                match number.trim_start_matches('+').parse::<T>() {
+                let number = self.read_until_whitespace(if c == '+' { None } else { Some(c) });
+                match number.parse::<T>() {
                     Ok(parsed_num) => Some(Token::Number(parsed_num)),
                     Err(_) => None,
                 }
             }
-            c => {
-                let word = c.to_string() + &self.read_until_whitespace().unwrap_or_default();
-                Some(Token::Word(word))
-            }
+            c => Some(Token::Word(self.read_until_whitespace(Some(c)))),
         }
     }
 }
 
 impl<'a, T> Tokens<'a, T>
 where
-    T: WktFloat + str::FromStr + Default,
+    T: str::FromStr,
 {
-    fn read_until_whitespace(&mut self) -> Option<String> {
-        let mut result = String::new();
+    fn read_until_whitespace(&mut self, first_char: Option<char>) -> String {
+        let mut result = String::with_capacity(12); // Big enough for most tokens
+        if let Some(c) = first_char {
+            result.push(c);
+        }
 
         while let Some(&next_char) = self.chars.peek() {
-            let marker = match next_char {
-                '\0' | '(' | ')' | ',' => true,
-                _ => false,
-            };
-
-            // Consume non-markers
-            if !marker {
-                let _ = self.chars.next();
-            }
-
-            let whitespace = is_whitespace(next_char);
-
-            // Append non-whitespace, non-marker characters
-            if !marker && !whitespace {
-                result.push(next_char);
-            }
-
-            // Stop reading when reached marker or whitespace
-            if marker || whitespace {
-                break;
+            match next_char {
+                '\0' | '(' | ')' | ',' => break, // Just stop on a marker
+                c if is_whitespace(c) => {
+                    let _ = self.chars.next();
+                    break;
+                }
+                _ => {
+                    result.push(next_char);
+                    let _ = self.chars.next();
+                }
             }
         }
 
-        if result.is_empty() {
-            None
-        } else {
-            Some(result)
-        }
+        result
     }
 }
 
@@ -186,6 +167,13 @@ fn test_tokenizer_invalid_number() {
     assert_eq!(tokens, vec![]);
 }
 
+#[test]
+fn test_tokenizer_not_a_number() {
+    let test_str = "¾"; // A number according to char.is_numeric()
+    let tokens: Vec<Token<f64>> = Tokens::from_str(test_str).collect();
+    assert_eq!(tokens, vec![Token::Word("¾".to_owned())]);
+}
+
 #[test]
 fn test_tokenizer_2numbers() {
     let test_str = ".4 -2";
Original file line number	Diff line number	Diff line change
		@@ -26,4 +26,3 @@ default = ["geo-types"]
		[[bench]]
		name = "parse"
		harness = false