aldanor · Alexhuszagh · May 19, 2021
diff --git a/extras/simple-bench/Cargo.toml b/extras/simple-bench/Cargo.toml
@@ -14,3 +14,7 @@ anyhow = "1.0"
 lexical = "5.2"
 lexical-core = "0.7"
 fastrand = "1.4"
+
+[features]
+default = []
+use_tokenized = []
diff --git a/extras/simple-bench/src/main.rs b/extras/simple-bench/src/main.rs
@@ -108,6 +108,8 @@ fn run_bench<T: FastFloat, F: Fn(&str) -> T>(
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 enum Method {
     FastFloat,
+    #[cfg(feature = "use_tokenized")]
+    FastFloatTokenized,
     Lexical,
     FromStr,
 }
@@ -120,10 +122,87 @@ fn type_str(float32: bool) -> &'static str {
     }
 }
 
+#[inline]
+#[cfg(feature = "use_tokenized")]
+fn parse_sign<'a>(s: &'a str) -> (bool, &'a str) {
+    match s.as_bytes().get(0) {
+        Some(&b'+') => (false, &s[1..]),
+        Some(&b'-') => (true, &s[1..]),
+        _ => (false, s),
+    }
+}
+
+#[inline]
+#[cfg(feature = "use_tokenized")]
+fn decimal_index(s: &str) -> Option<usize> {
+    s.as_bytes().iter().position(|&c| c == b'.')
+}
+
+#[inline]
+#[cfg(feature = "use_tokenized")]
+fn exponent_index(s: &str) -> Option<usize> {
+    s.as_bytes().iter().position(|&c| c == b'e' || c == b'E')
+}
+
+#[inline]
+#[cfg(feature = "use_tokenized")]
+fn split_index<'a>(s: &'a str, index: usize) -> (&'a str, &'a str) {
+    let (lead, trail) = s.as_bytes().split_at(index);
+    let trail = &trail[1..];
+    use std::str;
+    unsafe {
+        (str::from_utf8_unchecked(lead), str::from_utf8_unchecked(trail))
+    }
+}
+
+#[inline]
+#[cfg(feature = "use_tokenized")]
+fn split_end<'a>(s: &'a str) -> (&'a str, &'a str) {
+    let (lead, trail) = s.as_bytes().split_at(s.len());
+    use std::str;
+    unsafe {
+        (str::from_utf8_unchecked(lead), str::from_utf8_unchecked(trail))
+    }
+}
+
+#[inline]
+#[cfg(feature = "use_tokenized")]
+fn parse_exponent(s: &str) -> i64 {
+    s.parse::<i64>().unwrap()
+}
+
+#[inline]
+#[cfg(feature = "use_tokenized")]
+fn tokenize<'a>(s: &'a str) -> (&'a str, &'a str, i64, bool) {
+    let (negative, s) = parse_sign(s);
+    if let Some(index) = decimal_index(s) {
+        let (i, rest) = split_index(s, index);
+        if let Some(index) = exponent_index(s) {
+            let (f, exp) = split_index(rest, index);
+            let exp = parse_exponent(exp);
+            (i, f, exp, negative)
+        } else {
+            (i, rest, 0, negative)
+        }
+    } else {
+        if let Some(index) = exponent_index(s) {
+            let (i, exp) = split_index(s, index);
+            let (i, f) = split_end(i);
+            let exp = parse_exponent(exp);
+            (i, f, exp, negative)
+        } else {
+            let (i, f) = split_end(s);
+            (i, f, 0, negative)
+        }
+    }
+}
+
 impl Method {
     pub fn name(&self) -> &'static str {
         match self {
             Self::FastFloat => "fast-float",
+            #[cfg(feature = "use_tokenized")]
+            Self::FastFloatTokenized => "fast-float-tokenized",
             Self::Lexical => "lexical",
             Self::FromStr => "from_str",
         }
@@ -140,6 +219,11 @@ impl Method {
             Self::FastFloat => run_bench(data, repeat, |s: &str| {
                 fast_float::parse_partial::<T, _>(s).unwrap_or_default().0
             }),
+            #[cfg(feature = "use_tokenized")]
+            Self::FastFloatTokenized => run_bench(data, repeat, |s: &str| {
+                let (i, f, e, n) = tokenize(s);
+                fast_float::parse_from_parts::<T, _>(i, f, e, n)
+            }),
             Self::Lexical => run_bench(data, repeat, |s: &str| {
                 lexical_core::parse_partial::<T>(s.as_bytes())
                     .unwrap_or_default()
@@ -165,7 +249,15 @@ impl Method {
     }
 
     pub fn all() -> &'static [Self] {
-        &[Method::FastFloat, Method::Lexical, Method::FromStr]
+        #[cfg(feature = "use_tokenized")]
+        {
+            &[Method::FastFloat, Method::FastFloatTokenized, Method::Lexical, Method::FromStr]
+        }
+
+        #[cfg(not(feature = "use_tokenized"))]
+        {
+            &[Method::FastFloat, Method::Lexical, Method::FromStr]
+        }
     }
 }
 

diff --git a/src/decimal.rs b/src/decimal.rs
@@ -187,41 +187,37 @@ impl Decimal {
 }
 
 #[inline]
-pub fn parse_decimal(mut s: &[u8]) -> Decimal {
-    // can't fail since it follows a call to parse_number
-    let mut d = Decimal::default();
-    let start = s;
-    let c = s.get_first();
-    d.negative = c == b'-';
-    if c == b'-' || c == b'+' {
-        s = s.advance(1);
+fn parse_fractional<'a>(mut s: &'a [u8], d: &mut Decimal) -> &'a [u8] {
+    let first = s;
+    if d.num_digits == 0 {
+        s = s.skip_chars(b'0');
     }
-    s = s.skip_chars(b'0');
-    parse_digits(&mut s, |digit| d.try_add_digit(digit));
-    if s.check_first(b'.') {
-        s = s.advance(1);
-        let first = s;
-        if d.num_digits == 0 {
-            s = s.skip_chars(b'0');
-        }
-        if cfg!(target_endian = "little") {
-            while s.len() >= 8 && d.num_digits + 8 < Decimal::MAX_DIGITS {
-                let v = s.read_u64();
-                if !is_8digits_le(v) {
-                    break;
-                }
-                d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030);
-                d.num_digits += 8;
-                s = s.advance(8);
+    if cfg!(target_endian = "little") {
+        while s.len() >= 8 && d.num_digits + 8 < Decimal::MAX_DIGITS {
+            let v = s.read_u64();
+            if !is_8digits_le(v) {
+                break;
             }
+            d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030);
+            d.num_digits += 8;
+            s = s.advance(8);
         }
-        parse_digits(&mut s, |digit| d.try_add_digit(digit));
-        d.decimal_point = s.len() as i32 - first.len() as i32;
     }
+    parse_digits(&mut s, |digit| d.try_add_digit(digit));
+    d.decimal_point = s.len() as i32 - first.len() as i32;
+
+    s
+}
+
+#[inline]
+fn trim_zeros<'a, Iter>(iter: Iter, d: &mut Decimal)
+where
+    Iter: Iterator<Item=&'a u8>
+{
     if d.num_digits != 0 {
         // Ignore the trailing zeros if there are any
         let mut n_trailing_zeros = 0;
-        for &c in start[..(start.len() - s.len())].iter().rev() {
+        for &c in iter {
             if c == b'0' {
                 n_trailing_zeros += 1;
             } else if c != b'.' {
@@ -236,6 +232,51 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
             d.num_digits = Decimal::MAX_DIGITS;
         }
     }
+}
+
+#[inline]
+fn add_zero_digits(d: &mut Decimal) {
+    for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW {
+        d.digits[i] = 0;
+    }
+}
+
+#[inline]
+pub fn parse_decimal_from_parts(mut i: &[u8], f: &[u8], e: i64, negative: bool) -> Decimal {
+    // can't fail since it follows a call to parse_number
+    let mut d = Decimal::default();
+
+    // Integral
+    let i_start = i;
+    d.negative = negative;
+    i = i.skip_chars(b'0');
+    parse_digits(&mut i, |digit| d.try_add_digit(digit));
+
+    parse_fractional(f, &mut d);
+    trim_zeros(i_start.iter().chain(f.iter()).rev(), &mut d);
+    d.decimal_point += e as i32;
+    add_zero_digits(&mut d);
+
+    d
+}
+
+#[inline]
+pub fn parse_decimal(mut s: &[u8]) -> Decimal {
+    // can't fail since it follows a call to parse_number
+    let mut d = Decimal::default();
+    let start = s;
+    let c = s.get_first();
+    d.negative = c == b'-';
+    if c == b'-' || c == b'+' {
+        s = s.advance(1);
+    }
+    s = s.skip_chars(b'0');
+    parse_digits(&mut s, |digit| d.try_add_digit(digit));
+    if s.check_first(b'.') {
+        s = s.advance(1);
+        s = parse_fractional(s, &mut d);
+    }
+    trim_zeros(start[..(start.len() - s.len())].iter().rev(), &mut d);
     if s.check_first2(b'e', b'E') {
         s = s.advance(1);
         let mut neg_exp = false;
@@ -253,9 +294,7 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
         });
         d.decimal_point += if neg_exp { -exp_num } else { exp_num };
     }
-    for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW {
-        d.digits[i] = 0;
-    }
+    add_zero_digits(&mut d);
     d
 }
 

diff --git a/src/lib.rs b/src/lib.rs
@@ -105,6 +105,21 @@ pub trait FastFloat: float::Float {
     fn parse_float_partial<S: AsRef<[u8]>>(s: S) -> Result<(Self, usize)> {
         parse::parse_float(s.as_ref()).ok_or(Error)
     }
+
+    /// Parse a pre-tokenized decimal number from string into float.
+    ///
+    /// This assumes the float has already been tokenized into valid
+    /// integral and fractional components, and has parsed an optional
+    /// exponent notation.
+    ///
+    /// It is up to you to validate and tokenize the input: although
+    /// this will not error, this might truncate the significant
+    /// digits as soon as an invalid digit is found. This does not
+    /// handle special values, such as NaN, INF, or Infinity.
+    #[inline]
+    fn parse_from_parts<S: AsRef<[u8]>>(integral: S, fractional: S, exponent: i64, negative: bool) -> Self {
+        parse::parse_from_parts(integral.as_ref(), fractional.as_ref(), exponent, negative)
+    }
 }
 
 impl FastFloat for f32 {}
@@ -134,3 +149,18 @@ pub fn parse<T: FastFloat, S: AsRef<[u8]>>(s: S) -> Result<T> {
 pub fn parse_partial<T: FastFloat, S: AsRef<[u8]>>(s: S) -> Result<(T, usize)> {
     T::parse_float_partial(s)
 }
+
+/// Parse a pre-tokenized decimal number from string into float.
+///
+/// This assumes the float has already been tokenized into valid
+/// integral and fractional components, and has parsed an optional
+/// exponent notation.
+///
+/// It is up to you to validate and tokenize the input: although
+/// this will not error, this might truncate the significant
+/// digits as soon as an invalid digit is found. This does not
+/// handle special values, such as NaN, INF, or Infinity.
+#[inline]
+pub fn parse_from_parts<T: FastFloat, S: AsRef<[u8]>>(integral: S, fractional: S, exponent: i64, negative: bool) -> T {
+    T::parse_from_parts(integral.as_ref(), fractional.as_ref(), exponent, negative)
+}