Improve the formatting API.

This addressed #96 and #97, fixing the lack of processing with consecutive digit separators by enhancing the internal logic, adds logic for internal and first digit separators to simplify logic and improve performance, fix unittests, and also make it so the errors are consistent by adding checks when formatting is enabled to ensure the correct logic is used. Closes #96 Closes #97
Alexhuszagh · Sep 22, 2024 · b2de8a2 · b2de8a2
1 parent c102122
commit b2de8a2
Show file tree

Hide file tree

Showing 15 changed files with 2,107 additions and 236 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -7,11 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Changed
+
+- Higher performance when parsing floats with digit separators.
+
 ### Fixed
 
 - Inlining inconsistency between public API methods (credit to @zheland)
 - Incorrectly accepting leading zeros when `no_integer_leading_zeros` was enabled.
 - Have consistent errors when an invalid leading digit is found for floating point numbers to always be `Error::InvalidDigit`.
+- Incorrect parsing of consecutive digit separators.
+- Inaccuracies when parsing digit separators at various positions leading to incorect errors being returned.
 
 ## [1.0.1] 2024-09-16
 

diff --git a/clippy.toml b/clippy.toml
@@ -5,6 +5,8 @@ disallowed-macros = [
     { path = "std::println", reason = "no IO allowed" },
     { path = "std::format", reason = "no string allocation allowed" },
     { path = "std::debug", reason = "debugging macros should not be present in any release" },
+    # NOTE: unimplemented is fine because this can be for intentionally disabled methods
+    { path = "std::todo", reason = "should never have TODO macros in releases" },
 ]
 disallowed-methods = [
     { path = "std::io::stdout", reason = "no IO allowed" },

diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs
@@ -246,11 +246,18 @@ pub fn parse_complete<F: LemireFloat, const FORMAT: u128>(
     let mut byte = bytes.bytes::<{ FORMAT }>();
     let is_negative = parse_mantissa_sign(&mut byte)?;
     if byte.integer_iter().is_consumed() {
-        return Err(Error::Empty(byte.cursor()));
+        if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
+            || NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
+        {
+            return Err(Error::Empty(byte.cursor()));
+        } else {
+            return Ok(F::ZERO);
+        }
     }
 
     // Parse our a small representation of our number.
-    let num = parse_number!(FORMAT, byte, is_negative, options, parse_number, parse_special);
+    let num: Number<'_> =
+        parse_number!(FORMAT, byte, is_negative, options, parse_complete_number, parse_special);
     // Try the fast-path algorithm.
     if let Some(value) = num.try_fast_path::<_, FORMAT>() {
         return Ok(value);
@@ -281,11 +288,18 @@ pub fn fast_path_complete<F: LemireFloat, const FORMAT: u128>(
     let mut byte = bytes.bytes::<{ FORMAT }>();
     let is_negative = parse_mantissa_sign(&mut byte)?;
     if byte.integer_iter().is_consumed() {
-        return Err(Error::Empty(byte.cursor()));
+        if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
+            || NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
+        {
+            return Err(Error::Empty(byte.cursor()));
+        } else {
+            return Ok(F::ZERO);
+        }
     }
 
     // Parse our a small representation of our number.
-    let num = parse_number!(FORMAT, byte, is_negative, options, parse_number, parse_special);
+    let num =
+        parse_number!(FORMAT, byte, is_negative, options, parse_complete_number, parse_special);
     Ok(num.force_fast_path::<_, FORMAT>())
 }
 
@@ -298,7 +312,13 @@ pub fn parse_partial<F: LemireFloat, const FORMAT: u128>(
     let mut byte = bytes.bytes::<{ FORMAT }>();
     let is_negative = parse_mantissa_sign(&mut byte)?;
     if byte.integer_iter().is_consumed() {
-        return Err(Error::Empty(byte.cursor()));
+        if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
+            || NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
+        {
+            return Err(Error::Empty(byte.cursor()));
+        } else {
+            return Ok((F::ZERO, byte.cursor()));
+        }
     }
 
     // Parse our a small representation of our number.
@@ -340,7 +360,13 @@ pub fn fast_path_partial<F: LemireFloat, const FORMAT: u128>(
     let mut byte = bytes.bytes::<{ FORMAT }>();
     let is_negative = parse_mantissa_sign(&mut byte)?;
     if byte.integer_iter().is_consumed() {
-        return Err(Error::Empty(byte.cursor()));
+        if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
+            || NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
+        {
+            return Err(Error::Empty(byte.cursor()));
+        } else {
+            return Ok((F::ZERO, byte.cursor()));
+        }
     }
 
     // Parse our a small representation of our number.
@@ -458,7 +484,7 @@ pub fn slow_path<F: LemireFloat, const FORMAT: u128>(
 #[allow(clippy::collapsible_if)] // reason = "more readable uncollapsed"
 #[allow(clippy::cast_possible_wrap)] // reason = "no hardware supports buffers >= i64::MAX"
 #[allow(clippy::too_many_lines)] // reason = "function is one logical entity"
-pub fn parse_partial_number<'a, const FORMAT: u128>(
+pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
     mut byte: Bytes<'a, FORMAT>,
     is_negative: bool,
     options: &Options,
@@ -510,12 +536,15 @@ pub fn parse_partial_number<'a, const FORMAT: u128>(
         let mut iter = byte.integer_iter();
         if base_prefix != 0 && iter.read_if_value_cased(b'0').is_some() {
             // Check to see if the next character is the base prefix.
-            // We must have a format like `0x`, `0d`, `0o`. Note:
+            // We must have a format like `0x`, `0d`, `0o`.
+            // NOTE: The check for empty integer digits happens below so
+            // we don't need a redunant check here.
             is_prefix = true;
             if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some()
                 && iter.is_buffer_empty()
+                && format.required_integer_digits()
             {
-                return Err(Error::Empty(iter.cursor()));
+                return Err(Error::EmptyInteger(iter.cursor()));
             }
         }
     }
@@ -607,11 +636,13 @@ pub fn parse_partial_number<'a, const FORMAT: u128>(
 
     // check to see if we have any inval;id leading zeros
     n_digits += n_after_dot;
-    if format.required_mantissa_digits() && n_digits == 0 {
+    if format.required_mantissa_digits()
+        && (n_digits == 0 || (cfg!(feature = "format") && byte.current_count() == 0))
+    {
         let any_digits = start.clone().integer_iter().peek().is_some();
         // NOTE: This is because numbers like `_12.34` have significant digits,
         // they just don't have a valid digit (#97).
-        if has_decimal || has_exponent || !any_digits {
+        if has_decimal || has_exponent || !any_digits || IS_PARTIAL {
             return Err(Error::EmptyMantissa(byte.cursor()));
         } else {
             return Err(Error::InvalidDigit(start.cursor()));
@@ -770,15 +801,24 @@ pub fn parse_partial_number<'a, const FORMAT: u128>(
     ))
 }
 
+pub fn parse_partial_number<'a, const FORMAT: u128>(
+    byte: Bytes<'a, FORMAT>,
+    is_negative: bool,
+    options: &Options,
+) -> Result<(Number<'a>, usize)> {
+    parse_number::<FORMAT, true>(byte, is_negative, options)
+}
+
 /// Try to parse a non-special floating point number.
 #[inline(always)]
-pub fn parse_number<'a, const FORMAT: u128>(
+pub fn parse_complete_number<'a, const FORMAT: u128>(
     byte: Bytes<'a, FORMAT>,
     is_negative: bool,
     options: &Options,
 ) -> Result<Number<'a>> {
+    // Then have a const `IsPartial` as well
     let length = byte.buffer_length();
-    let (float, count) = parse_partial_number::<FORMAT>(byte, is_negative, options)?;
+    let (float, count) = parse_number::<FORMAT, false>(byte, is_negative, options)?;
     if count == length {
         Ok(float)
     } else {
@@ -807,6 +847,7 @@ where
         // NOTE: Because of the match statement, this would optimize poorly with
         // read_if.
         unsafe { iter.step_unchecked() };
+        iter.increment_count();
     }
 }
 
@@ -869,6 +910,7 @@ pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>(
             *step -= 1;
             // SAFETY: safe, since `iter` cannot be empty due to `iter.peek()`.
             unsafe { iter.step_unchecked() };
+            iter.increment_count();
         } else {
             break;
         }