Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge georust#66
Browse files Browse the repository at this point in the history
66: Approximately 40% faster, not all numeric chars are digits r=michaelkirk a=softdevca

- [X] I agree to follow the project's [code of conduct](https://github.com/georust/geo/blob/master/CODE_OF_CONDUCT.md).
- [X] I added an entry to `CHANGES.md` if knowledge of this change could be valuable to users.
---



Co-authored-by: Sheldon Young <[email protected]>
bors[bot] and softdevca authored Mar 12, 2021

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
2 parents aa202b3 + b4e0e5c commit 5d938ce
Showing 3 changed files with 34 additions and 45 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -3,6 +3,8 @@
## Unreleased

* Add new entries here
* Approximately 40% faster according to `cargo bench`.
* Some "numeric" characters like `¾` and `` were being treated as digits.
* Support `POINT EMPTY` in conversion to `geo_types`.
Converts to `MultiPoint([])`.
* <https://github.com/georust/wkt/pull/64>
1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -26,4 +26,3 @@ default = ["geo-types"]
[[bench]]
name = "parse"
harness = false

76 changes: 32 additions & 44 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
@@ -29,19 +29,14 @@ where
Word(String),
}

#[inline]
fn is_whitespace(c: char) -> bool {
match c {
'\n' | '\r' | '\t' | ' ' => true,
_ => false,
}
c == ' ' || c == '\n' || c == '\r' || c == '\t'
}

#[inline]
fn is_numberlike(c: char) -> bool {
match c {
c if c.is_numeric() => true,
'.' | '-' | '+' => true,
_ => false,
}
c == '.' || c == '-' || c == '+' || c.is_ascii_digit()
}

pub type PeekableTokens<'a, T> = Peekable<Tokens<'a, T>>;
@@ -66,7 +61,7 @@ where

impl<'a, T> Iterator for Tokens<'a, T>
where
T: WktFloat + str::FromStr + Default,
T: WktFloat + str::FromStr,
{
type Item = Token<T>;

@@ -85,56 +80,42 @@ where
')' => Some(Token::ParenClose),
',' => Some(Token::Comma),
c if is_numberlike(c) => {
let number = c.to_string() + &self.read_until_whitespace().unwrap_or_default();
match number.trim_start_matches('+').parse::<T>() {
let number = self.read_until_whitespace(if c == '+' { None } else { Some(c) });
match number.parse::<T>() {
Ok(parsed_num) => Some(Token::Number(parsed_num)),
Err(_) => None,
}
}
c => {
let word = c.to_string() + &self.read_until_whitespace().unwrap_or_default();
Some(Token::Word(word))
}
c => Some(Token::Word(self.read_until_whitespace(Some(c)))),
}
}
}

impl<'a, T> Tokens<'a, T>
where
T: WktFloat + str::FromStr + Default,
T: str::FromStr,
{
fn read_until_whitespace(&mut self) -> Option<String> {
let mut result = String::new();
fn read_until_whitespace(&mut self, first_char: Option<char>) -> String {
let mut result = String::with_capacity(12); // Big enough for most tokens
if let Some(c) = first_char {
result.push(c);
}

while let Some(&next_char) = self.chars.peek() {
let marker = match next_char {
'\0' | '(' | ')' | ',' => true,
_ => false,
};

// Consume non-markers
if !marker {
let _ = self.chars.next();
}

let whitespace = is_whitespace(next_char);

// Append non-whitespace, non-marker characters
if !marker && !whitespace {
result.push(next_char);
}

// Stop reading when reached marker or whitespace
if marker || whitespace {
break;
match next_char {
'\0' | '(' | ')' | ',' => break, // Just stop on a marker
c if is_whitespace(c) => {
let _ = self.chars.next();
break;
}
_ => {
result.push(next_char);
let _ = self.chars.next();
}
}
}

if result.is_empty() {
None
} else {
Some(result)
}
result
}
}

@@ -186,6 +167,13 @@ fn test_tokenizer_invalid_number() {
assert_eq!(tokens, vec![]);
}

#[test]
fn test_tokenizer_not_a_number() {
let test_str = "¾"; // A number according to char.is_numeric()
let tokens: Vec<Token<f64>> = Tokens::from_str(test_str).collect();
assert_eq!(tokens, vec![Token::Word("¾".to_owned())]);
}

#[test]
fn test_tokenizer_2numbers() {
let test_str = ".4 -2";

0 comments on commit 5d938ce

Please sign in to comment.