Skip to content

Commit

Permalink
Add support for tokenized floats.
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexhuszagh committed May 15, 2021
1 parent 0c714bb commit 37390c3
Show file tree
Hide file tree
Showing 7 changed files with 632 additions and 49 deletions.
76 changes: 75 additions & 1 deletion extras/simple-bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ fn run_bench<T: FastFloat, F: Fn(&str) -> T>(
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum Method {
FastFloat,
FastFloatTokenized,
Lexical,
FromStr,
}
Expand All @@ -120,10 +121,79 @@ fn type_str(float32: bool) -> &'static str {
}
}

#[inline]
fn parse_sign<'a>(s: &'a str) -> (bool, &'a str) {
match s.as_bytes().get(0) {
Some(&b'+') => (false, &s[1..]),
Some(&b'-') => (true, &s[1..]),
_ => (false, s),
}
}

#[inline]
fn decimal_index(s: &str) -> Option<usize> {
s.as_bytes().iter().position(|&c| c == b'.')
}

#[inline]
fn exponent_index(s: &str) -> Option<usize> {
s.as_bytes().iter().position(|&c| c == b'e' || c == b'E')
}

#[inline]
fn split_index<'a>(s: &'a str, index: usize) -> (&'a str, &'a str) {
let (lead, trail) = s.as_bytes().split_at(index);
let trail = &trail[1..];
use std::str;
unsafe {
(str::from_utf8_unchecked(lead), str::from_utf8_unchecked(trail))
}
}

#[inline]
fn split_end<'a>(s: &'a str) -> (&'a str, &'a str) {
let (lead, trail) = s.as_bytes().split_at(s.len());
use std::str;
unsafe {
(str::from_utf8_unchecked(lead), str::from_utf8_unchecked(trail))
}
}

#[inline]
fn parse_exponent(s: &str) -> i64 {
s.parse::<i64>().unwrap()
}

#[inline]
fn tokenize<'a>(s: &'a str) -> (&'a str, &'a str, i64, bool) {
let (negative, s) = parse_sign(s);
if let Some(index) = decimal_index(s) {
let (i, rest) = split_index(s, index);
if let Some(index) = exponent_index(s) {
let (f, exp) = split_index(rest, index);
let exp = parse_exponent(exp);
(i, f, exp, negative)
} else {
(i, rest, 0, negative)
}
} else {
if let Some(index) = exponent_index(s) {
let (i, exp) = split_index(s, index);
let (i, f) = split_end(i);
let exp = parse_exponent(exp);
(i, f, exp, negative)
} else {
let (i, f) = split_end(s);
(i, f, 0, negative)
}
}
}

impl Method {
pub fn name(&self) -> &'static str {
match self {
Self::FastFloat => "fast-float",
Self::FastFloatTokenized => "fast-float-tokenized",
Self::Lexical => "lexical",
Self::FromStr => "from_str",
}
Expand All @@ -140,6 +210,10 @@ impl Method {
Self::FastFloat => run_bench(data, repeat, |s: &str| {
fast_float::parse_partial::<T, _>(s).unwrap_or_default().0
}),
Self::FastFloatTokenized => run_bench(data, repeat, |s: &str| {
let (i, f, e, n) = tokenize(s);
fast_float::parse_from_parts::<T, _>(i, f, e, n)
}),
Self::Lexical => run_bench(data, repeat, |s: &str| {
lexical_core::parse_partial::<T>(s.as_bytes())
.unwrap_or_default()
Expand All @@ -165,7 +239,7 @@ impl Method {
}

pub fn all() -> &'static [Self] {
&[Method::FastFloat, Method::Lexical, Method::FromStr]
&[Method::FastFloat, Method::FastFloatTokenized, Method::Lexical, Method::FromStr]
}
}

Expand Down
101 changes: 70 additions & 31 deletions src/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,41 +187,37 @@ impl Decimal {
}

#[inline]
pub fn parse_decimal(mut s: &[u8]) -> Decimal {
// can't fail since it follows a call to parse_number
let mut d = Decimal::default();
let start = s;
let c = s.get_first();
d.negative = c == b'-';
if c == b'-' || c == b'+' {
s = s.advance(1);
fn parse_fractional<'a>(mut s: &'a [u8], d: &mut Decimal) -> &'a [u8] {
let first = s;
if d.num_digits == 0 {
s = s.skip_chars(b'0');
}
s = s.skip_chars(b'0');
parse_digits(&mut s, |digit| d.try_add_digit(digit));
if s.check_first(b'.') {
s = s.advance(1);
let first = s;
if d.num_digits == 0 {
s = s.skip_chars(b'0');
}
if cfg!(target_endian = "little") {
while s.len() >= 8 && d.num_digits + 8 < Decimal::MAX_DIGITS {
let v = s.read_u64();
if !is_8digits_le(v) {
break;
}
d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030);
d.num_digits += 8;
s = s.advance(8);
if cfg!(target_endian = "little") {
while s.len() >= 8 && d.num_digits + 8 < Decimal::MAX_DIGITS {
let v = s.read_u64();
if !is_8digits_le(v) {
break;
}
d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030);
d.num_digits += 8;
s = s.advance(8);
}
parse_digits(&mut s, |digit| d.try_add_digit(digit));
d.decimal_point = s.len() as i32 - first.len() as i32;
}
parse_digits(&mut s, |digit| d.try_add_digit(digit));
d.decimal_point = s.len() as i32 - first.len() as i32;

s
}

#[inline]
fn trim_zeros<'a, Iter>(iter: Iter, d: &mut Decimal)
where
Iter: Iterator<Item=&'a u8>
{
if d.num_digits != 0 {
// Ignore the trailing zeros if there are any
let mut n_trailing_zeros = 0;
for &c in start[..(start.len() - s.len())].iter().rev() {
for &c in iter {
if c == b'0' {
n_trailing_zeros += 1;
} else if c != b'.' {
Expand All @@ -236,6 +232,51 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
d.num_digits = Decimal::MAX_DIGITS;
}
}
}

#[inline]
fn add_zero_digits(d: &mut Decimal) {
for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW {
d.digits[i] = 0;
}
}

#[inline]
pub fn parse_decimal_from_parts(mut i: &[u8], f: &[u8], e: i64, negative: bool) -> Decimal {
// can't fail since it follows a call to parse_number
let mut d = Decimal::default();

// Integral
let i_start = i;
d.negative = negative;
i = i.skip_chars(b'0');
parse_digits(&mut i, |digit| d.try_add_digit(digit));

parse_fractional(f, &mut d);
trim_zeros(i_start.iter().chain(f.iter()).rev(), &mut d);
d.decimal_point += e as i32;
add_zero_digits(&mut d);

d
}

#[inline]
pub fn parse_decimal(mut s: &[u8]) -> Decimal {
// can't fail since it follows a call to parse_number
let mut d = Decimal::default();
let start = s;
let c = s.get_first();
d.negative = c == b'-';
if c == b'-' || c == b'+' {
s = s.advance(1);
}
s = s.skip_chars(b'0');
parse_digits(&mut s, |digit| d.try_add_digit(digit));
if s.check_first(b'.') {
s = s.advance(1);
s = parse_fractional(s, &mut d);
}
trim_zeros(start[..(start.len() - s.len())].iter().rev(), &mut d);
if s.check_first2(b'e', b'E') {
s = s.advance(1);
let mut neg_exp = false;
Expand All @@ -253,9 +294,7 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
});
d.decimal_point += if neg_exp { -exp_num } else { exp_num };
}
for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW {
d.digits[i] = 0;
}
add_zero_digits(&mut d);
d
}

Expand Down
30 changes: 30 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,21 @@ pub trait FastFloat: float::Float {
fn parse_float_partial<S: AsRef<[u8]>>(s: S) -> Result<(Self, usize)> {
parse::parse_float(s.as_ref()).ok_or(Error)
}

/// Parse a pre-tokenized decimal number from string into float.
///
/// This assumes the float has already been tokenized into valid
/// integral and fractional components, and has parsed an optional
/// exponent notation.
///
/// It is up to you to validate and tokenize the input: although
/// this will not error, this might truncate the significant
/// digits as soon as an invalid digit is found. This does not
/// handle special values, such as NaN, INF, or Infinity.
#[inline]
fn parse_from_parts<S: AsRef<[u8]>>(integral: S, fractional: S, exponent: i64, negative: bool) -> Self {
parse::parse_from_parts(integral.as_ref(), fractional.as_ref(), exponent, negative)
}
}

impl FastFloat for f32 {}
Expand Down Expand Up @@ -134,3 +149,18 @@ pub fn parse<T: FastFloat, S: AsRef<[u8]>>(s: S) -> Result<T> {
pub fn parse_partial<T: FastFloat, S: AsRef<[u8]>>(s: S) -> Result<(T, usize)> {
T::parse_float_partial(s)
}

/// Parse a pre-tokenized decimal number from string into float.
///
/// This assumes the float has already been tokenized into valid
/// integral and fractional components, and has parsed an optional
/// exponent notation.
///
/// It is up to you to validate and tokenize the input: although
/// this will not error, this might truncate the significant
/// digits as soon as an invalid digit is found. This does not
/// handle special values, such as NaN, INF, or Infinity.
#[inline]
pub fn parse_from_parts<T: FastFloat, S: AsRef<[u8]>>(integral: S, fractional: S, exponent: i64, negative: bool) -> T {
T::parse_from_parts(integral.as_ref(), fractional.as_ref(), exponent, negative)
}
Loading

0 comments on commit 37390c3

Please sign in to comment.