Skip to content

Commit fdef013

Browse files
committed
Add support for requiring base prefixes and suffixes.
This requires them when parsing but also adds them to our float and integer writers when writing formats. This is useful for cases like hex floats where the floats only make sense when they have a literal `0x` prefixing them.
1 parent 933a8da commit fdef013

File tree

18 files changed

+882
-122
lines changed

18 files changed

+882
-122
lines changed

CHANGELOG

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1515
- Added `build_checked` to our `Options` API (#204).
1616
- Added `has_digit_separator` to `NumberFormat` (#204).
1717
- Re-export `NumberFormat` to our other crates (#204).
18-
- Add `Options::from_radix` for all options for similar APIs for each (#208).
18+
- Added `Options::from_radix` for all options for similar APIs for each (#208).
19+
- Support for requiring both integer and fraction digits with exponents, that is, `1.e5` and `.1e5`, as opposed to just requiring `1e5` (#215).
20+
- Added `supports_parsing_integers`, `supports_parsing_floats`, `supports_writing_integers`, and `supports_writing_floats` for our number formats (#215).
21+
- Added `required_base_prefix` and `required_base_suffix` for our number formats, requiring base prefixes and/or suffixes when parsing, and allowing writing base prefixes and/or suffixes (#215).
1922

2023
### Changed
2124

lexical-parse-float/src/parse.rs

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -536,25 +536,31 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
536536
// INTEGER
537537

538538
// Check to see if we have a valid base prefix.
539+
// NOTE: `lz_prefix` is if we had a leading zero when
540+
// checking for a base prefix: it is not if the prefix
541+
// exists or not.
539542
#[allow(unused_variables)]
540-
let mut is_prefix = false;
541-
#[cfg(feature = "format")]
543+
let mut lz_prefix = false;
544+
#[cfg(all(feature = "format", feature = "power-of-two"))]
542545
{
543546
let base_prefix = format.base_prefix();
547+
let mut has_prefix = false;
544548
let mut iter = byte.integer_iter();
545549
if base_prefix != 0 && iter.read_if_value_cased(b'0').is_some() {
546550
// Check to see if the next character is the base prefix.
547551
// We must have a format like `0x`, `0d`, `0o`.
548552
// NOTE: The check for empty integer digits happens below so
549553
// we don't need a redundant check here.
550-
is_prefix = true;
551-
if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some()
552-
&& iter.is_buffer_empty()
553-
&& format.required_integer_digits()
554-
{
554+
lz_prefix = true;
555+
let prefix = iter.read_if_value(base_prefix, format.case_sensitive_base_prefix());
556+
has_prefix = prefix.is_some();
557+
if has_prefix && iter.is_buffer_empty() && format.required_integer_digits() {
555558
return Err(Error::EmptyInteger(iter.cursor()));
556559
}
557560
}
561+
if format.required_base_prefix() && !has_prefix {
562+
return Err(Error::MissingBasePrefix(iter.cursor()));
563+
}
558564
}
559565

560566
// Parse our integral digits.
@@ -600,7 +606,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
600606

601607
// Check if integer leading zeros are disabled.
602608
#[cfg(feature = "format")]
603-
if !is_prefix && format.no_float_leading_zeros() {
609+
if !lz_prefix && format.no_float_leading_zeros() {
604610
if integer_digits.len() > 1 && integer_digits.first() == Some(&b'0') {
605611
return Err(Error::InvalidLeadingZeros(start.cursor()));
606612
}
@@ -741,11 +747,14 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
741747
// that the first character **is not** a digit separator.
742748
#[allow(unused_variables)]
743749
let base_suffix = format.base_suffix();
744-
#[cfg(feature = "format")]
750+
#[cfg(all(feature = "format", feature = "power-of-two"))]
745751
if base_suffix != 0 {
746-
if byte.first_is(base_suffix, format.case_sensitive_base_suffix()) {
752+
let is_suffix = byte.first_is(base_suffix, format.case_sensitive_base_suffix());
753+
if is_suffix {
747754
// SAFETY: safe since `byte.len() >= 1`.
748755
unsafe { byte.step_unchecked() };
756+
} else if format.required_base_suffix() {
757+
return Err(Error::MissingBaseSuffix(byte.cursor()));
749758
}
750759
}
751760

lexical-parse-float/tests/api_tests.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,3 +1307,39 @@ fn supported_test() {
13071307
let value = f64::from_lexical_partial_with_options::<FORMAT>(float.as_bytes(), &OPTIONS);
13081308
assert_eq!(value, Ok((12345.0, 7)));
13091309
}
1310+
1311+
#[test]
1312+
#[cfg(all(feature = "format", feature = "power-of-two"))]
1313+
fn require_base_prefix_test() {
1314+
use core::num;
1315+
1316+
const PREFIX: u128 = NumberFormatBuilder::new()
1317+
.base_prefix(num::NonZeroU8::new(b'd'))
1318+
.required_base_prefix(true)
1319+
.build_strict();
1320+
const OPTIONS: Options = Options::new();
1321+
1322+
let value = f64::from_lexical_with_options::<PREFIX>(b"0d12345", &OPTIONS);
1323+
assert_eq!(value, Ok(12345.0));
1324+
let value = f64::from_lexical_with_options::<PREFIX>(b"12345", &OPTIONS);
1325+
assert_eq!(value, Err(Error::MissingBasePrefix(0)));
1326+
1327+
let value = f64::from_lexical_with_options::<PREFIX>(b"-0d12345", &OPTIONS);
1328+
assert_eq!(value, Ok(-12345.0));
1329+
let value = f64::from_lexical_with_options::<PREFIX>(b"-12345", &OPTIONS);
1330+
assert_eq!(value, Err(Error::MissingBasePrefix(1)));
1331+
1332+
const SUFFIX: u128 = NumberFormatBuilder::rebuild(PREFIX)
1333+
.base_suffix(num::NonZeroU8::new(b'z'))
1334+
.required_base_suffix(true)
1335+
.build_strict();
1336+
let value = f64::from_lexical_with_options::<SUFFIX>(b"0d12345z", &OPTIONS);
1337+
assert_eq!(value, Ok(12345.0));
1338+
let value = f64::from_lexical_with_options::<SUFFIX>(b"0d12345", &OPTIONS);
1339+
assert_eq!(value, Err(Error::MissingBaseSuffix(7)));
1340+
1341+
let value = f64::from_lexical_with_options::<SUFFIX>(b"-0d12345z", &OPTIONS);
1342+
assert_eq!(value, Ok(-12345.0));
1343+
let value = f64::from_lexical_with_options::<SUFFIX>(b"-0d12345", &OPTIONS);
1344+
assert_eq!(value, Err(Error::MissingBaseSuffix(8)));
1345+
}

lexical-parse-integer/src/algorithm.rs

Lines changed: 121 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -120,19 +120,26 @@ macro_rules! into_error {
120120
#[cfg(feature = "format")]
121121
macro_rules! fmt_invalid_digit {
122122
(
123-
$value:ident, $iter:ident, $c:expr, $start_index:ident, $invalid_digit:ident, $is_end:expr
123+
$value:ident,
124+
$iter:ident,
125+
$c:expr,
126+
$start_index:ident,
127+
$invalid_digit:ident,
128+
$has_suffix:ident,
129+
$is_end:expr $(,)?
124130
) => {{
125131
// NOTE: If we have non-contiguous iterators, we could have a skip character
126132
// here at the boundary. This does not affect safety but it does affect
127133
// correctness.
128134
debug_assert!($iter.is_contiguous() || $is_end);
129135

130-
let base_suffix = NumberFormat::<FORMAT>::BASE_SUFFIX;
131-
let uncased_base_suffix = NumberFormat::<FORMAT>::CASE_SENSITIVE_BASE_SUFFIX;
136+
let format = NumberFormat::<FORMAT> {};
137+
let base_suffix = format.base_suffix();
138+
let uncased_base_suffix = format.case_sensitive_base_suffix();
132139
// Need to check for a base suffix, if so, return a valid value.
133140
// We can't have a base suffix at the first value (need at least
134141
// 1 digit).
135-
if base_suffix != 0 && $iter.cursor() - $start_index > 1 {
142+
if cfg!(feature = "power-of-two") && base_suffix != 0 && $iter.cursor() - $start_index > 1 {
136143
let is_suffix = if uncased_base_suffix {
137144
$c == base_suffix
138145
} else {
@@ -144,6 +151,7 @@ macro_rules! fmt_invalid_digit {
144151
// contiguous iterators.
145152
if is_suffix && $is_end && $iter.is_buffer_empty() {
146153
// Break out of the loop, we've finished parsing.
154+
$has_suffix = true;
147155
break;
148156
} else if !$iter.is_buffer_empty() {
149157
// Haven't finished parsing, so we're going to call
@@ -165,7 +173,13 @@ macro_rules! fmt_invalid_digit {
165173
#[cfg(not(feature = "format"))]
166174
macro_rules! fmt_invalid_digit {
167175
(
168-
$value:ident, $iter:ident, $c:expr, $start_index:ident, $invalid_digit:ident, $is_end:expr
176+
$value:ident,
177+
$iter:ident,
178+
$c:expr,
179+
$start_index:ident,
180+
$invalid_digit:ident,
181+
$has_suffix:ident,
182+
$is_end:expr $(,)?
169183
) => {{
170184
$invalid_digit!($value, $iter.cursor(), $iter.current_count());
171185
}};
@@ -393,6 +407,7 @@ where
393407
/// * `add_op` - The unchecked add/sub op.
394408
/// * `start_index` - The offset where parsing started.
395409
/// * `invalid_digit` - Behavior when an invalid digit is found.
410+
/// * `has_suffix` - If a base suffix was found at the end of the buffer.
396411
/// * `is_end` - If iter corresponds to the full input.
397412
///
398413
/// core: <https://doc.rust-lang.org/1.81.0/src/core/num/mod.rs.html#1480>
@@ -403,15 +418,24 @@ macro_rules! parse_1digit_unchecked {
403418
$add_op:ident,
404419
$start_index:ident,
405420
$invalid_digit:ident,
406-
$is_end:expr
421+
$has_suffix:ident,
422+
$is_end:expr $(,)?
407423
) => {{
408424
// This is a slower parsing algorithm, going 1 digit at a time, but doing it in
409425
// an unchecked loop.
410426
let radix = NumberFormat::<FORMAT>::MANTISSA_RADIX;
411427
while let Some(&c) = $iter.next() {
412428
let digit = match char_to_digit_const(c, radix) {
413429
Some(v) => v,
414-
None => fmt_invalid_digit!($value, $iter, c, $start_index, $invalid_digit, $is_end),
430+
None => fmt_invalid_digit!(
431+
$value,
432+
$iter,
433+
c,
434+
$start_index,
435+
$invalid_digit,
436+
$has_suffix,
437+
$is_end,
438+
),
415439
};
416440
// multiply first since compilers are good at optimizing things out and will do
417441
// a fused mul/add We must do this after getting the digit for
@@ -431,6 +455,7 @@ macro_rules! parse_1digit_unchecked {
431455
/// * `add_op` - The checked add/sub op.
432456
/// * `start_index` - The offset where parsing started.
433457
/// * `invalid_digit` - Behavior when an invalid digit is found.
458+
/// * `has_suffix` - If a base suffix was found at the end of the buffer.
434459
/// * `overflow` - If the error is overflow or underflow.
435460
///
436461
/// core: <https://doc.rust-lang.org/1.81.0/src/core/num/mod.rs.html#1505>
@@ -441,15 +466,24 @@ macro_rules! parse_1digit_checked {
441466
$add_op:ident,
442467
$start_index:ident,
443468
$invalid_digit:ident,
444-
$overflow:ident
469+
$has_suffix:ident,
470+
$overflow:ident $(,)?
445471
) => {{
446472
// This is a slower parsing algorithm, going 1 digit at a time, but doing it in
447473
// an unchecked loop.
448474
let radix = NumberFormat::<FORMAT>::MANTISSA_RADIX;
449475
while let Some(&c) = $iter.next() {
450476
let digit = match char_to_digit_const(c, radix) {
451477
Some(v) => v,
452-
None => fmt_invalid_digit!($value, $iter, c, $start_index, $invalid_digit, true),
478+
None => fmt_invalid_digit!(
479+
$value,
480+
$iter,
481+
c,
482+
$start_index,
483+
$invalid_digit,
484+
$has_suffix,
485+
true,
486+
),
453487
};
454488
// multiply first since compilers are good at optimizing things out and will do
455489
// a fused mul/add
@@ -477,6 +511,7 @@ macro_rules! parse_1digit_checked {
477511
/// * `start_index` - The offset where parsing started.
478512
/// * `invalid_digit` - Behavior when an invalid digit is found.
479513
/// * `no_multi_digit` - If to disable multi-digit optimizations.
514+
/// * `has_suffix` - If a base suffix was found at the end of the buffer.
480515
/// * `is_end` - If iter corresponds to the full input.
481516
macro_rules! parse_digits_unchecked {
482517
(
@@ -486,7 +521,8 @@ macro_rules! parse_digits_unchecked {
486521
$start_index:ident,
487522
$invalid_digit:ident,
488523
$no_multi_digit:expr,
489-
$is_end:expr
524+
$has_suffix:ident,
525+
$is_end:expr $(,)?
490526
) => {{
491527
let can_multi = can_try_parse_multidigits::<_, FORMAT>(&$iter);
492528
let use_multi = can_multi && !$no_multi_digit;
@@ -510,7 +546,15 @@ macro_rules! parse_digits_unchecked {
510546
$value = $value.wrapping_mul(radix4).$add_op(value);
511547
}
512548
}
513-
parse_1digit_unchecked!($value, $iter, $add_op, $start_index, $invalid_digit, $is_end)
549+
parse_1digit_unchecked!(
550+
$value,
551+
$iter,
552+
$add_op,
553+
$start_index,
554+
$invalid_digit,
555+
$has_suffix,
556+
$is_end
557+
)
514558
}};
515559
}
516560

@@ -528,6 +572,7 @@ macro_rules! parse_digits_unchecked {
528572
/// * `invalid_digit` - Behavior when an invalid digit is found.
529573
/// * `overflow` - If the error is overflow or underflow.
530574
/// * `no_multi_digit` - If to disable multi-digit optimizations.
575+
/// * `has_suffix` - If a base suffix was found at the end of the buffer.
531576
/// * `overflow_digits` - The number of digits before we need to consider
532577
/// checked ops.
533578
macro_rules! parse_digits_checked {
@@ -540,7 +585,8 @@ macro_rules! parse_digits_checked {
540585
$invalid_digit:ident,
541586
$overflow:ident,
542587
$no_multi_digit:expr,
543-
$overflow_digits:expr
588+
$has_suffix:ident,
589+
$overflow_digits:expr $(,)?
544590
) => {{
545591
// Can use the unchecked for the `max_digits` here. If we
546592
// have a non-contiguous iterator, we could have a case like
@@ -557,13 +603,22 @@ macro_rules! parse_digits_checked {
557603
$start_index,
558604
$invalid_digit,
559605
$no_multi_digit,
606+
$has_suffix,
560607
false
561608
);
562609
}
563610
}
564611

565612
// NOTE: all our multi-digit optimizations have been done here: skip this
566-
parse_1digit_checked!($value, $iter, $add_op, $start_index, $invalid_digit, $overflow)
613+
parse_1digit_checked!(
614+
$value,
615+
$iter,
616+
$add_op,
617+
$start_index,
618+
$invalid_digit,
619+
$has_suffix,
620+
$overflow
621+
)
567622
}};
568623
}
569624

@@ -650,6 +705,9 @@ macro_rules! algorithm {
650705
}
651706
}
652707
}
708+
if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_prefix() && !is_prefix {
709+
return Err(Error::MissingBasePrefix(iter.cursor()));
710+
}
653711

654712
// If we have a format that doesn't accept leading zeros,
655713
// check if the next value is invalid. It's invalid if the
@@ -684,14 +742,60 @@ macro_rules! algorithm {
684742
// culminates in **way** slower performance overall for simple
685743
// integers, and no improvement for large integers.
686744
let mut value = T::ZERO;
745+
#[allow(unused_mut)]
746+
let mut has_suffix = false;
687747
if cannot_overflow && is_negative {
688-
parse_digits_unchecked!(value, iter, wrapping_sub, start_index, $invalid_digit, $no_multi_digit, true);
748+
parse_digits_unchecked!(
749+
value,
750+
iter,
751+
wrapping_sub,
752+
start_index,
753+
$invalid_digit,
754+
$no_multi_digit,
755+
has_suffix,
756+
true,
757+
);
689758
} if cannot_overflow {
690-
parse_digits_unchecked!(value, iter, wrapping_add, start_index, $invalid_digit, $no_multi_digit, true);
759+
parse_digits_unchecked!(
760+
value,
761+
iter,
762+
wrapping_add,
763+
start_index,
764+
$invalid_digit,
765+
$no_multi_digit,
766+
has_suffix,
767+
true,
768+
);
691769
} else if is_negative {
692-
parse_digits_checked!(value, iter, checked_sub, wrapping_sub, start_index, $invalid_digit, Underflow, $no_multi_digit, overflow_digits);
770+
parse_digits_checked!(
771+
value,
772+
iter,
773+
checked_sub,
774+
wrapping_sub,
775+
start_index,
776+
$invalid_digit,
777+
Underflow,
778+
$no_multi_digit,
779+
has_suffix,
780+
overflow_digits,
781+
);
693782
} else {
694-
parse_digits_checked!(value, iter, checked_add, wrapping_add, start_index, $invalid_digit, Overflow, $no_multi_digit, overflow_digits);
783+
parse_digits_checked!(
784+
value,
785+
iter,
786+
checked_add,
787+
wrapping_add,
788+
start_index,
789+
$invalid_digit,
790+
Overflow,
791+
$no_multi_digit,
792+
has_suffix,
793+
overflow_digits,
794+
);
795+
}
796+
797+
if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_suffix() && !has_suffix {
798+
return Err(Error::MissingBaseSuffix(iter.cursor()));
695799
}
696800

697801
$into_ok!(value, iter.buffer_length(), iter.current_count())

0 commit comments

Comments
 (0)