Skip to content

Commit

Permalink
Improve the formatting API.
Browse files Browse the repository at this point in the history
This addressed #96 and #97, fixing the lack of processing with
consecutive digit separators by enhancing the internal logic, adds logic
for internal and first digit separators to simplify logic and improve
performance, fix unittests, and also make it so the errors are
consistent by adding checks when formatting is enabled to ensure the
correct logic is used.

Closes #96
Closes #97
  • Loading branch information
Alexhuszagh committed Sep 22, 2024
1 parent c102122 commit b2de8a2
Show file tree
Hide file tree
Showing 15 changed files with 2,107 additions and 236 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed

- Higher performance when parsing floats with digit separators.

### Fixed

- Inlining inconsistency between public API methods (credit to @zheland)
- Incorrectly accepting leading zeros when `no_integer_leading_zeros` was enabled.
- Have consistent errors when an invalid leading digit is found for floating point numbers to always be `Error::InvalidDigit`.
- Incorrect parsing of consecutive digit separators.
- Inaccuracies when parsing digit separators at various positions leading to incorect errors being returned.

## [1.0.1] 2024-09-16

Expand Down
2 changes: 2 additions & 0 deletions clippy.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ disallowed-macros = [
{ path = "std::println", reason = "no IO allowed" },
{ path = "std::format", reason = "no string allocation allowed" },
{ path = "std::debug", reason = "debugging macros should not be present in any release" },
# NOTE: unimplemented is fine because this can be for intentionally disabled methods
{ path = "std::todo", reason = "should never have TODO macros in releases" },
]
disallowed-methods = [
{ path = "std::io::stdout", reason = "no IO allowed" },
Expand Down
68 changes: 55 additions & 13 deletions lexical-parse-float/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,11 +246,18 @@ pub fn parse_complete<F: LemireFloat, const FORMAT: u128>(
let mut byte = bytes.bytes::<{ FORMAT }>();
let is_negative = parse_mantissa_sign(&mut byte)?;
if byte.integer_iter().is_consumed() {
return Err(Error::Empty(byte.cursor()));
if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
|| NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
{
return Err(Error::Empty(byte.cursor()));
} else {
return Ok(F::ZERO);
}
}

// Parse our a small representation of our number.
let num = parse_number!(FORMAT, byte, is_negative, options, parse_number, parse_special);
let num: Number<'_> =
parse_number!(FORMAT, byte, is_negative, options, parse_complete_number, parse_special);
// Try the fast-path algorithm.
if let Some(value) = num.try_fast_path::<_, FORMAT>() {
return Ok(value);
Expand Down Expand Up @@ -281,11 +288,18 @@ pub fn fast_path_complete<F: LemireFloat, const FORMAT: u128>(
let mut byte = bytes.bytes::<{ FORMAT }>();
let is_negative = parse_mantissa_sign(&mut byte)?;
if byte.integer_iter().is_consumed() {
return Err(Error::Empty(byte.cursor()));
if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
|| NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
{
return Err(Error::Empty(byte.cursor()));
} else {
return Ok(F::ZERO);
}
}

// Parse our a small representation of our number.
let num = parse_number!(FORMAT, byte, is_negative, options, parse_number, parse_special);
let num =
parse_number!(FORMAT, byte, is_negative, options, parse_complete_number, parse_special);
Ok(num.force_fast_path::<_, FORMAT>())
}

Expand All @@ -298,7 +312,13 @@ pub fn parse_partial<F: LemireFloat, const FORMAT: u128>(
let mut byte = bytes.bytes::<{ FORMAT }>();
let is_negative = parse_mantissa_sign(&mut byte)?;
if byte.integer_iter().is_consumed() {
return Err(Error::Empty(byte.cursor()));
if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
|| NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
{
return Err(Error::Empty(byte.cursor()));
} else {
return Ok((F::ZERO, byte.cursor()));
}
}

// Parse our a small representation of our number.
Expand Down Expand Up @@ -340,7 +360,13 @@ pub fn fast_path_partial<F: LemireFloat, const FORMAT: u128>(
let mut byte = bytes.bytes::<{ FORMAT }>();
let is_negative = parse_mantissa_sign(&mut byte)?;
if byte.integer_iter().is_consumed() {
return Err(Error::Empty(byte.cursor()));
if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
|| NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
{
return Err(Error::Empty(byte.cursor()));
} else {
return Ok((F::ZERO, byte.cursor()));
}
}

// Parse our a small representation of our number.
Expand Down Expand Up @@ -458,7 +484,7 @@ pub fn slow_path<F: LemireFloat, const FORMAT: u128>(
#[allow(clippy::collapsible_if)] // reason = "more readable uncollapsed"
#[allow(clippy::cast_possible_wrap)] // reason = "no hardware supports buffers >= i64::MAX"
#[allow(clippy::too_many_lines)] // reason = "function is one logical entity"
pub fn parse_partial_number<'a, const FORMAT: u128>(
pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
mut byte: Bytes<'a, FORMAT>,
is_negative: bool,
options: &Options,
Expand Down Expand Up @@ -510,12 +536,15 @@ pub fn parse_partial_number<'a, const FORMAT: u128>(
let mut iter = byte.integer_iter();
if base_prefix != 0 && iter.read_if_value_cased(b'0').is_some() {
// Check to see if the next character is the base prefix.
// We must have a format like `0x`, `0d`, `0o`. Note:
// We must have a format like `0x`, `0d`, `0o`.
// NOTE: The check for empty integer digits happens below so
// we don't need a redunant check here.
is_prefix = true;
if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some()
&& iter.is_buffer_empty()
&& format.required_integer_digits()
{
return Err(Error::Empty(iter.cursor()));
return Err(Error::EmptyInteger(iter.cursor()));
}
}
}
Expand Down Expand Up @@ -607,11 +636,13 @@ pub fn parse_partial_number<'a, const FORMAT: u128>(

// check to see if we have any inval;id leading zeros
n_digits += n_after_dot;
if format.required_mantissa_digits() && n_digits == 0 {
if format.required_mantissa_digits()
&& (n_digits == 0 || (cfg!(feature = "format") && byte.current_count() == 0))
{
let any_digits = start.clone().integer_iter().peek().is_some();
// NOTE: This is because numbers like `_12.34` have significant digits,
// they just don't have a valid digit (#97).
if has_decimal || has_exponent || !any_digits {
if has_decimal || has_exponent || !any_digits || IS_PARTIAL {
return Err(Error::EmptyMantissa(byte.cursor()));
} else {
return Err(Error::InvalidDigit(start.cursor()));
Expand Down Expand Up @@ -770,15 +801,24 @@ pub fn parse_partial_number<'a, const FORMAT: u128>(
))
}

pub fn parse_partial_number<'a, const FORMAT: u128>(
byte: Bytes<'a, FORMAT>,
is_negative: bool,
options: &Options,
) -> Result<(Number<'a>, usize)> {
parse_number::<FORMAT, true>(byte, is_negative, options)
}

/// Try to parse a non-special floating point number.
#[inline(always)]
pub fn parse_number<'a, const FORMAT: u128>(
pub fn parse_complete_number<'a, const FORMAT: u128>(
byte: Bytes<'a, FORMAT>,
is_negative: bool,
options: &Options,
) -> Result<Number<'a>> {
// Then have a const `IsPartial` as well
let length = byte.buffer_length();
let (float, count) = parse_partial_number::<FORMAT>(byte, is_negative, options)?;
let (float, count) = parse_number::<FORMAT, false>(byte, is_negative, options)?;
if count == length {
Ok(float)
} else {
Expand Down Expand Up @@ -807,6 +847,7 @@ where
// NOTE: Because of the match statement, this would optimize poorly with
// read_if.
unsafe { iter.step_unchecked() };
iter.increment_count();
}
}

Expand Down Expand Up @@ -869,6 +910,7 @@ pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>(
*step -= 1;
// SAFETY: safe, since `iter` cannot be empty due to `iter.peek()`.
unsafe { iter.step_unchecked() };
iter.increment_count();
} else {
break;
}
Expand Down
Loading

0 comments on commit b2de8a2

Please sign in to comment.