From b2de8a2111f25a38862b9f9742fded5031195f1e Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sun, 22 Sep 2024 11:00:37 -0500 Subject: [PATCH] Improve the formatting API. This addressed #96 and #97, fixing the lack of processing with consecutive digit separators by enhancing the internal logic, adds logic for internal and first digit separators to simplify logic and improve performance, fix unittests, and also make it so the errors are consistent by adding checks when formatting is enabled to ensure the correct logic is used. Closes #96 Closes #97 --- CHANGELOG | 6 + clippy.toml | 2 + lexical-parse-float/src/parse.rs | 68 +- lexical-parse-float/tests/issue_96_tests.rs | 366 +++++++ lexical-parse-float/tests/parse_tests.rs | 6 +- lexical-parse-integer/src/algorithm.rs | 94 +- lexical-parse-integer/tests/issue_96_tests.rs | 371 +++++++ lexical-util/src/feature_format.rs | 92 ++ lexical-util/src/format_builder.rs | 70 ++ lexical-util/src/iterator.rs | 39 +- lexical-util/src/noskip.rs | 17 +- lexical-util/src/not_feature_format.rs | 92 ++ lexical-util/src/skip.rs | 970 ++++++++++++++++-- lexical-util/tests/iterator_tests.rs | 4 +- lexical-util/tests/skip_tests.rs | 146 +-- 15 files changed, 2107 insertions(+), 236 deletions(-) create mode 100644 lexical-parse-float/tests/issue_96_tests.rs create mode 100644 lexical-parse-integer/tests/issue_96_tests.rs diff --git a/CHANGELOG b/CHANGELOG index 3ef94ac1..0df7778a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,11 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- Higher performance when parsing floats with digit separators. + ### Fixed - Inlining inconsistency between public API methods (credit to @zheland) - Incorrectly accepting leading zeros when `no_integer_leading_zeros` was enabled. - Have consistent errors when an invalid leading digit is found for floating point numbers to always be `Error::InvalidDigit`. +- Incorrect parsing of consecutive digit separators. +- Inaccuracies when parsing digit separators at various positions leading to incorect errors being returned. ## [1.0.1] 2024-09-16 diff --git a/clippy.toml b/clippy.toml index fac585ae..8bd81b7e 100644 --- a/clippy.toml +++ b/clippy.toml @@ -5,6 +5,8 @@ disallowed-macros = [ { path = "std::println", reason = "no IO allowed" }, { path = "std::format", reason = "no string allocation allowed" }, { path = "std::debug", reason = "debugging macros should not be present in any release" }, + # NOTE: unimplemented is fine because this can be for intentionally disabled methods + { path = "std::todo", reason = "should never have TODO macros in releases" }, ] disallowed-methods = [ { path = "std::io::stdout", reason = "no IO allowed" }, diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs index e667cc0d..53bcd789 100644 --- a/lexical-parse-float/src/parse.rs +++ b/lexical-parse-float/src/parse.rs @@ -246,11 +246,18 @@ pub fn parse_complete( let mut byte = bytes.bytes::<{ FORMAT }>(); let is_negative = parse_mantissa_sign(&mut byte)?; if byte.integer_iter().is_consumed() { - return Err(Error::Empty(byte.cursor())); + if NumberFormat::::REQUIRED_INTEGER_DIGITS + || NumberFormat::::REQUIRED_MANTISSA_DIGITS + { + return Err(Error::Empty(byte.cursor())); + } else { + return Ok(F::ZERO); + } } // Parse our a small representation of our number. - let num = parse_number!(FORMAT, byte, is_negative, options, parse_number, parse_special); + let num: Number<'_> = + parse_number!(FORMAT, byte, is_negative, options, parse_complete_number, parse_special); // Try the fast-path algorithm. if let Some(value) = num.try_fast_path::<_, FORMAT>() { return Ok(value); @@ -281,11 +288,18 @@ pub fn fast_path_complete( let mut byte = bytes.bytes::<{ FORMAT }>(); let is_negative = parse_mantissa_sign(&mut byte)?; if byte.integer_iter().is_consumed() { - return Err(Error::Empty(byte.cursor())); + if NumberFormat::::REQUIRED_INTEGER_DIGITS + || NumberFormat::::REQUIRED_MANTISSA_DIGITS + { + return Err(Error::Empty(byte.cursor())); + } else { + return Ok(F::ZERO); + } } // Parse our a small representation of our number. - let num = parse_number!(FORMAT, byte, is_negative, options, parse_number, parse_special); + let num = + parse_number!(FORMAT, byte, is_negative, options, parse_complete_number, parse_special); Ok(num.force_fast_path::<_, FORMAT>()) } @@ -298,7 +312,13 @@ pub fn parse_partial( let mut byte = bytes.bytes::<{ FORMAT }>(); let is_negative = parse_mantissa_sign(&mut byte)?; if byte.integer_iter().is_consumed() { - return Err(Error::Empty(byte.cursor())); + if NumberFormat::::REQUIRED_INTEGER_DIGITS + || NumberFormat::::REQUIRED_MANTISSA_DIGITS + { + return Err(Error::Empty(byte.cursor())); + } else { + return Ok((F::ZERO, byte.cursor())); + } } // Parse our a small representation of our number. @@ -340,7 +360,13 @@ pub fn fast_path_partial( let mut byte = bytes.bytes::<{ FORMAT }>(); let is_negative = parse_mantissa_sign(&mut byte)?; if byte.integer_iter().is_consumed() { - return Err(Error::Empty(byte.cursor())); + if NumberFormat::::REQUIRED_INTEGER_DIGITS + || NumberFormat::::REQUIRED_MANTISSA_DIGITS + { + return Err(Error::Empty(byte.cursor())); + } else { + return Ok((F::ZERO, byte.cursor())); + } } // Parse our a small representation of our number. @@ -458,7 +484,7 @@ pub fn slow_path( #[allow(clippy::collapsible_if)] // reason = "more readable uncollapsed" #[allow(clippy::cast_possible_wrap)] // reason = "no hardware supports buffers >= i64::MAX" #[allow(clippy::too_many_lines)] // reason = "function is one logical entity" -pub fn parse_partial_number<'a, const FORMAT: u128>( +pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( mut byte: Bytes<'a, FORMAT>, is_negative: bool, options: &Options, @@ -510,12 +536,15 @@ pub fn parse_partial_number<'a, const FORMAT: u128>( let mut iter = byte.integer_iter(); if base_prefix != 0 && iter.read_if_value_cased(b'0').is_some() { // Check to see if the next character is the base prefix. - // We must have a format like `0x`, `0d`, `0o`. Note: + // We must have a format like `0x`, `0d`, `0o`. + // NOTE: The check for empty integer digits happens below so + // we don't need a redunant check here. is_prefix = true; if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some() && iter.is_buffer_empty() + && format.required_integer_digits() { - return Err(Error::Empty(iter.cursor())); + return Err(Error::EmptyInteger(iter.cursor())); } } } @@ -607,11 +636,13 @@ pub fn parse_partial_number<'a, const FORMAT: u128>( // check to see if we have any inval;id leading zeros n_digits += n_after_dot; - if format.required_mantissa_digits() && n_digits == 0 { + if format.required_mantissa_digits() + && (n_digits == 0 || (cfg!(feature = "format") && byte.current_count() == 0)) + { let any_digits = start.clone().integer_iter().peek().is_some(); // NOTE: This is because numbers like `_12.34` have significant digits, // they just don't have a valid digit (#97). - if has_decimal || has_exponent || !any_digits { + if has_decimal || has_exponent || !any_digits || IS_PARTIAL { return Err(Error::EmptyMantissa(byte.cursor())); } else { return Err(Error::InvalidDigit(start.cursor())); @@ -770,15 +801,24 @@ pub fn parse_partial_number<'a, const FORMAT: u128>( )) } +pub fn parse_partial_number<'a, const FORMAT: u128>( + byte: Bytes<'a, FORMAT>, + is_negative: bool, + options: &Options, +) -> Result<(Number<'a>, usize)> { + parse_number::(byte, is_negative, options) +} + /// Try to parse a non-special floating point number. #[inline(always)] -pub fn parse_number<'a, const FORMAT: u128>( +pub fn parse_complete_number<'a, const FORMAT: u128>( byte: Bytes<'a, FORMAT>, is_negative: bool, options: &Options, ) -> Result> { + // Then have a const `IsPartial` as well let length = byte.buffer_length(); - let (float, count) = parse_partial_number::(byte, is_negative, options)?; + let (float, count) = parse_number::(byte, is_negative, options)?; if count == length { Ok(float) } else { @@ -807,6 +847,7 @@ where // NOTE: Because of the match statement, this would optimize poorly with // read_if. unsafe { iter.step_unchecked() }; + iter.increment_count(); } } @@ -869,6 +910,7 @@ pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>( *step -= 1; // SAFETY: safe, since `iter` cannot be empty due to `iter.peek()`. unsafe { iter.step_unchecked() }; + iter.increment_count(); } else { break; } diff --git a/lexical-parse-float/tests/issue_96_tests.rs b/lexical-parse-float/tests/issue_96_tests.rs new file mode 100644 index 00000000..4e015420 --- /dev/null +++ b/lexical-parse-float/tests/issue_96_tests.rs @@ -0,0 +1,366 @@ +#![cfg(feature = "format")] + +use core::num; + +use lexical_parse_float::{ + Error, + FromLexical, + FromLexicalWithOptions, + NumberFormatBuilder, + Options, +}; + +#[test] +fn issue_96_test() { + let opts = Options::new(); + const NO_CONSECUTIVE: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + const CONSECUTIVE: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(true) + .build(); + const NO_LEADING: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(false) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(true) + .build(); + + let result = f64::from_lexical(b"_-1234"); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = f64::from_lexical_with_options::(b"_-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(1))); + + let result = f64::from_lexical_with_options::(b"^-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + // NOTE: This uis correct, since it's "trailing" + let result = f64::from_lexical_with_options::(b"_-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(1))); + + let result = f64::from_lexical_with_options::(b"_1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = f64::from_lexical_with_options::(b"X1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = f64::from_lexical_with_options::(b"__1__234__", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = f64::from_lexical_with_options::(b"__1__234__", &opts); + assert_eq!(result, Ok(1234f64)); +} + +#[test] +fn issue_96_i_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .internal_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(0))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"_1_", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(0))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(0))); + + let result = f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((11f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_23", &opts); + assert_eq!(result, Ok((1123f64, 6))); + + let result = f64::from_lexical_partial_with_options::(b"1_1__23", &opts); + assert_eq!(result, Ok((11f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_23_", &opts); + assert_eq!(result, Ok((1123f64, 6))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_23.", &opts); + assert_eq!(result, Ok((1123f64, 7))); +} + +#[test] +fn issue_96_l_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((1f64, 2))); + + let result = f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((1f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); +} + +#[test] +fn issue_96_t_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(0))); + + let result = f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123f64, 5))); + + let result = f64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123f64, 4))); +} + +#[test] +fn issue_96_il_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .internal_digit_separator(true) + .leading_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((123f64, 5))); + + let result = f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((123f64, 6))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((11f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((11f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123f64, 4))); + + let result = f64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123f64, 4))); +} + +#[test] +fn issue_96_it_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(0))); + + let result: Result<(f64, usize), Error> = + f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((11f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((11f64, 4))); + + let result = f64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123f64, 5))); + + let result = f64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123f64, 4))); +} + +#[test] +fn issue_96_lt_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((1f64, 2))); + + let result = f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((1f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"_11_", &opts); + assert_eq!(result, Ok((11f64, 4))); + + let result = f64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123f64, 5))); + + let result = f64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123f64, 4))); +} + +#[test] +fn issue_96_no_required_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .required_digits(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Ok((0f64, 0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Ok((0f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Ok((0f64, 2))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((1f64, 2))); + + let result = f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((1f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"_11_", &opts); + assert_eq!(result, Ok((11f64, 4))); + + let result = f64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Ok((0f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123f64, 5))); + + let result = f64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123f64, 4))); +} diff --git a/lexical-parse-float/tests/parse_tests.rs b/lexical-parse-float/tests/parse_tests.rs index d1946e28..2bbd8659 100644 --- a/lexical-parse-float/tests/parse_tests.rs +++ b/lexical-parse-float/tests/parse_tests.rs @@ -78,7 +78,7 @@ fn parse_number_test() { let options = Options::new(); let string = b"1.2345e10"; let byte = string.bytes::<{ FORMAT }>(); - let result = parse::parse_number(byte, false, &options); + let result = parse::parse_complete_number(byte, false, &options); assert!(result.is_ok()); let num = result.unwrap(); assert_eq!(num.mantissa, 12345); @@ -87,12 +87,12 @@ fn parse_number_test() { let string = b"1.2345e"; let byte = string.bytes::<{ FORMAT }>(); - let result = parse::parse_number(byte, false, &options); + let result = parse::parse_complete_number(byte, false, &options); assert!(result.is_err()); let string = b"1.2345 "; let byte = string.bytes::<{ FORMAT }>(); - let result = parse::parse_number(byte, false, &options); + let result = parse::parse_complete_number(byte, false, &options); assert!(result.is_err()); } diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs index 255484fd..a5b081de 100644 --- a/lexical-parse-integer/src/algorithm.rs +++ b/lexical-parse-integer/src/algorithm.rs @@ -49,23 +49,48 @@ const fn can_try_parse_multidigits<'a, Iter: DigitsIter<'a>, const FORMAT: u128> Iter::IS_CONTIGUOUS && (cfg!(not(feature = "power-of-two")) || format.mantissa_radix() <= 10) } +// Get if digits are required for the format. +#[cfg_attr(not(feature = "format"), allow(unused_macros))] +macro_rules! required_digits { + () => { + NumberFormat::::REQUIRED_INTEGER_DIGITS + || NumberFormat::::REQUIRED_MANTISSA_DIGITS + }; +} + /// Return an value for a complete parser. macro_rules! into_ok_complete { - ($value:expr, $index:expr) => { - Ok(as_cast($value)) - }; + ($value:expr, $index:expr, $count:expr) => {{ + #[cfg(not(feature = "format"))] + return Ok(as_cast($value)); + + #[cfg(feature = "format")] + if required_digits!() && $count == 0 { + into_error!(Empty, $index); + } else { + return Ok(as_cast($value)); + } + }}; } /// Return an value and index for a partial parser. macro_rules! into_ok_partial { - ($value:expr, $index:expr) => { - Ok((as_cast($value), $index)) - }; + ($value:expr, $index:expr, $count:expr) => {{ + #[cfg(not(feature = "format"))] + return Ok((as_cast($value), $index)); + + #[cfg(feature = "format")] + if required_digits!() && $count == 0 { + into_error!(Empty, $index); + } else { + return Ok((as_cast($value), $index)); + } + }}; } /// Return an error for a complete parser upon an invalid digit. macro_rules! invalid_digit_complete { - ($value:expr, $index:expr) => { + ($value:expr, $index:expr, $count:expr) => { // Don't do any overflow checking here: we don't need it. into_error!(InvalidDigit, $index - 1) }; @@ -74,17 +99,17 @@ macro_rules! invalid_digit_complete { /// Return a value for a partial parser upon an invalid digit. /// This checks for numeric overflow, and returns the appropriate error. macro_rules! invalid_digit_partial { - ($value:expr, $index:expr) => { + ($value:expr, $index:expr, $count:expr) => { // NOTE: The value is already positive/negative - into_ok_partial!($value, $index - 1) + into_ok_partial!($value, $index - 1, $count) }; } /// Return an error, returning the index and the error. macro_rules! into_error { - ($code:ident, $index:expr) => { - Err(Error::$code($index)) - }; + ($code:ident, $index:expr) => {{ + return Err(Error::$code($index)); + }}; } /// Handle an invalid digit if the format feature is enabled. @@ -115,7 +140,8 @@ macro_rules! fmt_invalid_digit { }; // NOTE: If we're using the `take_n` optimization where it can't // be the end, then the iterator cannot be done. So, in that case, - // we need to end. + // we need to end. `take_n` also can never be used for non- + // contiguous iterators. if is_suffix && $is_end && $iter.is_buffer_empty() { // Break out of the loop, we've finished parsing. break; @@ -131,7 +157,7 @@ macro_rules! fmt_invalid_digit { } } // Might have handled our base-prefix here. - return $invalid_digit!($value, $iter.cursor()); + $invalid_digit!($value, $iter.cursor(), $iter.current_count()) }}; } @@ -141,7 +167,7 @@ macro_rules! fmt_invalid_digit { ( $value:ident, $iter:ident, $c:expr, $start_index:ident, $invalid_digit:ident, $is_end:expr ) => {{ - return $invalid_digit!($value, $iter.cursor()); + $invalid_digit!($value, $iter.cursor(), $iter.current_count()); }}; } @@ -169,7 +195,7 @@ macro_rules! parse_sign { $missing:ident ) => { // NOTE: read_if optimizes poorly since we then match after - match $byte.integer_iter().peek() { + match $byte.integer_iter().first() { Some(&b'+') if !$no_positive => { // SAFETY: We have at least 1 item left since we peaked a value unsafe { $byte.step_unchecked() }; @@ -429,7 +455,7 @@ macro_rules! parse_1digit_checked { $value = match $value.checked_mul(as_cast(radix)).and_then(|x| x.$add_op(as_cast(digit))) { Some(value) => value, - None => return into_error!($overflow, $iter.cursor() - 1), + None => into_error!($overflow, $iter.cursor() - 1), } } }}; @@ -580,30 +606,44 @@ macro_rules! algorithm { let is_negative = parse_sign::(&mut byte)?; let mut iter = byte.integer_iter(); if iter.is_buffer_empty() { - return into_error!(Empty, iter.cursor()); + // Our default format **ALWAYS** requires significant digits, however, + // we can have cases where we don + #[cfg(not(feature = "format"))] + into_error!(Empty, iter.cursor()); + + #[cfg(feature = "format")] + if required_digits!() { + into_error!(Empty, iter.cursor()); + } else { + $into_ok!(T::ZERO, iter.cursor(), 0) + } } // Feature-gate a lot of format-only code here to simplify analysis with our branching + // We only want to skip the zeros if have either require a base prefix or we don't + // allow integer leading zeros, since the skip is expensive #[allow(unused_variables, unused_mut)] let mut start_index = iter.cursor(); + #[cfg_attr(not(feature = "format"), allow(unused_variables))] + let format = NumberFormat:: {}; #[cfg(feature = "format")] - { + if format.has_base_prefix() || format.no_integer_leading_zeros() { // Skip any leading zeros. We want to do our check if it can't possibly overflow after. // For skipping digit-based formats, this approximation is a way over estimate. // NOTE: Skipping zeros is **EXPENSIVE* so we skip that without our format feature let zeros = iter.skip_zeros(); start_index += zeros; + // Now, check to see if we have a valid base prefix. - let format = NumberFormat:: {}; - let base_prefix = format.base_prefix(); let mut is_prefix = false; + let base_prefix = format.base_prefix(); if base_prefix != 0 && zeros == 1 { // Check to see if the next character is the base prefix. // We must have a format like `0x`, `0d`, `0o`. Note: if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some() { is_prefix = true; if iter.is_buffer_empty() { - return into_error!(Empty, iter.cursor()); + into_error!(Empty, iter.cursor()); } else { start_index += 1; } @@ -617,16 +657,16 @@ macro_rules! algorithm { // Cannot have a base prefix and no leading zeros. let index = iter.cursor() - zeros; if zeros > 1 { - return into_error!(InvalidLeadingZeros, index); + into_error!(InvalidLeadingZeros, index); } // NOTE: Zeros has to be 0 here, so our index == 1 or 2 (depending on sign) match iter.peek().map(|&c| char_to_digit_const(c, format.radix())) { // Valid digit, we have an invalid value. - Some(Some(_)) => return into_error!(InvalidLeadingZeros, index), + Some(Some(_)) => into_error!(InvalidLeadingZeros, index), // Have a non-digit character that follows. - Some(None) => return $invalid_digit!(::ZERO, iter.cursor() + 1), + Some(None) => $invalid_digit!(::ZERO, iter.cursor() + 1, iter.current_count()), // No digits following, has to be ok - None => return $into_ok!(::ZERO, index), + None => $into_ok!(::ZERO, index, iter.current_count()), }; } } @@ -653,7 +693,7 @@ macro_rules! algorithm { parse_digits_checked!(value, iter, checked_add, wrapping_add, start_index, $invalid_digit, Overflow, $no_multi_digit, overflow_digits); } - $into_ok!(value, iter.buffer_length()) + $into_ok!(value, iter.buffer_length(), iter.current_count()) }}; } diff --git a/lexical-parse-integer/tests/issue_96_tests.rs b/lexical-parse-integer/tests/issue_96_tests.rs new file mode 100644 index 00000000..d6c994b0 --- /dev/null +++ b/lexical-parse-integer/tests/issue_96_tests.rs @@ -0,0 +1,371 @@ +#![cfg(feature = "format")] + +use core::num; + +use lexical_parse_integer::{ + Error, + FromLexical, + FromLexicalWithOptions, + NumberFormatBuilder, + Options, +}; + +#[test] +fn issue_96_test() { + let opts = Options::new(); + const NO_CONSECUTIVE: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + const CONSECUTIVE: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(true) + .build(); + const NO_LEADING: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(false) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(true) + .build(); + + let result = i64::from_lexical(b"_-1234"); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + // NOTE: We need to make sure we're not skipping digit separators before the + // sign, which is never allowed. + let result = u64::from_lexical_with_options::(b"_-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(1))); + + let result = i64::from_lexical_with_options::(b"_-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(1))); + + let result = i64::from_lexical_with_options::(b"^-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + // NOTE: This uis correct, since it's "trailing" + let result = i64::from_lexical_with_options::(b"_-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(1))); + + let result = i64::from_lexical_with_options::(b"_1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = i64::from_lexical_with_options::(b"X1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = i64::from_lexical_with_options::(b"__1__234__", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = i64::from_lexical_with_options::(b"__1__234__", &opts); + assert_eq!(result, Ok(1234)); +} + +#[test] +fn issue_96_i_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .internal_digit_separator(true) + .consecutive_digit_separator(false) + .required_digits(true) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"_1_", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((11, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_23", &opts); + assert_eq!(result, Ok((1123, 6))); + + let result = i64::from_lexical_partial_with_options::(b"1_1__23", &opts); + assert_eq!(result, Ok((11, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_23_", &opts); + assert_eq!(result, Ok((1123, 6))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_23.", &opts); + assert_eq!(result, Ok((1123, 6))); +} + +#[test] +fn issue_96_l_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((1, 2))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((1, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); +} + +#[test] +fn issue_96_t_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123, 5))); + + let result = i64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123, 4))); +} + +#[test] +fn issue_96_il_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .internal_digit_separator(true) + .leading_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((123, 5))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((123, 6))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((11, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((11, 3))); + + let result = i64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123, 4))); + + let result = i64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123, 4))); +} + +#[test] +fn issue_96_it_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((11, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((11, 4))); + + let result = i64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123, 5))); + + let result = i64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123, 4))); +} + +#[test] +fn issue_96_lt_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((1, 2))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((1, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"_11_", &opts); + assert_eq!(result, Ok((11, 4))); + + let result = i64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123, 5))); + + let result = i64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123, 4))); +} + +#[test] +fn issue_96_no_required_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .required_digits(false) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Ok((0, 0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Ok((0, 1))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Ok((0, 2))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((1, 2))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((1, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"_11_", &opts); + assert_eq!(result, Ok((11, 4))); + + let result = i64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Ok((0, 1))); + + let result = i64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123, 5))); + + let result = i64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123, 4))); +} diff --git a/lexical-util/src/feature_format.rs b/lexical-util/src/feature_format.rs index 350eac50..9f8fe190 100644 --- a/lexical-util/src/feature_format.rs +++ b/lexical-util/src/feature_format.rs @@ -947,108 +947,188 @@ impl NumberFormat { // DIGIT SEPARATOR FLAGS & MASKS // If digit separators are allowed between integer digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const INTEGER_INTERNAL_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, INTEGER_INTERNAL_DIGIT_SEPARATOR); /// Get if digit separators are allowed between integer digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn integer_internal_digit_separator(&self) -> bool { Self::INTEGER_INTERNAL_DIGIT_SEPARATOR } /// If digit separators are allowed between fraction digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const FRACTION_INTERNAL_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, FRACTION_INTERNAL_DIGIT_SEPARATOR); /// Get if digit separators are allowed between fraction digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn fraction_internal_digit_separator(&self) -> bool { Self::FRACTION_INTERNAL_DIGIT_SEPARATOR } /// If digit separators are allowed between exponent digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const EXPONENT_INTERNAL_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, EXPONENT_INTERNAL_DIGIT_SEPARATOR); /// Get if digit separators are allowed between exponent digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn exponent_internal_digit_separator(&self) -> bool { Self::EXPONENT_INTERNAL_DIGIT_SEPARATOR } /// If digit separators are allowed between digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const INTERNAL_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, INTERNAL_DIGIT_SEPARATOR); /// Get if digit separators are allowed between digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn internal_digit_separator(&self) -> bool { Self::INTERNAL_DIGIT_SEPARATOR } /// If a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const INTEGER_LEADING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, INTEGER_LEADING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn integer_leading_digit_separator(&self) -> bool { Self::INTEGER_LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const FRACTION_LEADING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, FRACTION_LEADING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed before any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn fraction_leading_digit_separator(&self) -> bool { Self::FRACTION_LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed before any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const EXPONENT_LEADING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, EXPONENT_LEADING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed before any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn exponent_leading_digit_separator(&self) -> bool { Self::EXPONENT_LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed before any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const LEADING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, LEADING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed before any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn leading_digit_separator(&self) -> bool { Self::LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const INTEGER_TRAILING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, INTEGER_TRAILING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed after any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn integer_trailing_digit_separator(&self) -> bool { Self::INTEGER_TRAILING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const FRACTION_TRAILING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, FRACTION_TRAILING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed after any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn fraction_trailing_digit_separator(&self) -> bool { Self::FRACTION_TRAILING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const EXPONENT_TRAILING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, EXPONENT_TRAILING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed after any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn exponent_trailing_digit_separator(&self) -> bool { Self::EXPONENT_TRAILING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const TRAILING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, TRAILING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed after any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn trailing_digit_separator(&self) -> bool { Self::TRAILING_DIGIT_SEPARATOR @@ -1126,6 +1206,12 @@ impl NumberFormat { Self::BASE_PREFIX } + /// Get if the format has a base suffix. + #[inline(always)] + pub const fn has_base_prefix(&self) -> bool { + self.base_prefix() != 0 + } + /// The base suffix character in the packed struct. pub const BASE_SUFFIX: u8 = flags::base_suffix(FORMAT); @@ -1140,6 +1226,12 @@ impl NumberFormat { Self::BASE_SUFFIX } + /// Get if the format has a base suffix. + #[inline(always)] + pub const fn has_base_suffix(&self) -> bool { + self.base_suffix() != 0 + } + // RADIX /// The radix for the significant digits in the packed struct. diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index 706eace0..7ea5c09a 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -464,54 +464,84 @@ impl NumberFormatBuilder { } /// Get if digit separators are allowed between integer digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn get_integer_internal_digit_separator(&self) -> bool { self.integer_internal_digit_separator } /// Get if digit separators are allowed between fraction digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn get_fraction_internal_digit_separator(&self) -> bool { self.fraction_internal_digit_separator } /// Get if digit separators are allowed between exponent digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn get_exponent_internal_digit_separator(&self) -> bool { self.exponent_internal_digit_separator } /// Get if a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn get_integer_leading_digit_separator(&self) -> bool { self.integer_leading_digit_separator } /// Get if a digit separator is allowed before any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn get_fraction_leading_digit_separator(&self) -> bool { self.fraction_leading_digit_separator } /// Get if a digit separator is allowed before any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn get_exponent_leading_digit_separator(&self) -> bool { self.exponent_leading_digit_separator } /// Get if a digit separator is allowed after any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn get_integer_trailing_digit_separator(&self) -> bool { self.integer_trailing_digit_separator } /// Get if a digit separator is allowed after any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn get_fraction_trailing_digit_separator(&self) -> bool { self.fraction_trailing_digit_separator } /// Get if a digit separator is allowed after any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn get_exponent_trailing_digit_separator(&self) -> bool { self.exponent_trailing_digit_separator @@ -754,6 +784,10 @@ impl NumberFormatBuilder { } /// Set if digit separators are allowed between integer digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] #[cfg(feature = "format")] pub const fn integer_internal_digit_separator(mut self, flag: bool) -> Self { @@ -762,6 +796,10 @@ impl NumberFormatBuilder { } /// Set if digit separators are allowed between fraction digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] #[cfg(feature = "format")] pub const fn fraction_internal_digit_separator(mut self, flag: bool) -> Self { @@ -770,6 +808,10 @@ impl NumberFormatBuilder { } /// Set if digit separators are allowed between exponent digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] #[cfg(feature = "format")] pub const fn exponent_internal_digit_separator(mut self, flag: bool) -> Self { @@ -778,6 +820,10 @@ impl NumberFormatBuilder { } /// Set all internal digit separator flags. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] #[cfg(feature = "format")] pub const fn internal_digit_separator(mut self, flag: bool) -> Self { @@ -788,6 +834,9 @@ impl NumberFormatBuilder { } /// Set if a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn integer_leading_digit_separator(mut self, flag: bool) -> Self { @@ -796,6 +845,9 @@ impl NumberFormatBuilder { } /// Set if a digit separator is allowed before any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn fraction_leading_digit_separator(mut self, flag: bool) -> Self { @@ -804,6 +856,9 @@ impl NumberFormatBuilder { } /// Set if a digit separator is allowed before any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn exponent_leading_digit_separator(mut self, flag: bool) -> Self { @@ -812,6 +867,9 @@ impl NumberFormatBuilder { } /// Set all leading digit separator flags. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn leading_digit_separator(mut self, flag: bool) -> Self { @@ -822,6 +880,9 @@ impl NumberFormatBuilder { } /// Set if a digit separator is allowed after any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn integer_trailing_digit_separator(mut self, flag: bool) -> Self { @@ -830,6 +891,9 @@ impl NumberFormatBuilder { } /// Set if a digit separator is allowed after any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn fraction_trailing_digit_separator(mut self, flag: bool) -> Self { @@ -838,6 +902,9 @@ impl NumberFormatBuilder { } /// Set if a digit separator is allowed after any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn exponent_trailing_digit_separator(mut self, flag: bool) -> Self { @@ -846,6 +913,9 @@ impl NumberFormatBuilder { } /// Set all trailing digit separator flags. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn trailing_digit_separator(mut self, flag: bool) -> Self { diff --git a/lexical-util/src/iterator.rs b/lexical-util/src/iterator.rs index 7f4cb98e..2f00e013 100644 --- a/lexical-util/src/iterator.rs +++ b/lexical-util/src/iterator.rs @@ -98,10 +98,14 @@ pub unsafe trait Iter<'a> { /// pass if the cursor was set between the two. unsafe fn set_cursor(&mut self, index: usize); - /// Get the current number of values returned by the iterator. + /// Get the current number of digits returned by the iterator. /// - /// For contiguous iterators, this is always the cursor, for - /// non-contiguous iterators this can be smaller. + /// For contiguous iterators, this can include the sign character, decimal + /// point, and the exponent sign (that is, it is always the cursor). For + /// non-contiguous iterators, this must always be the only the number of + /// digits returned. + /// + /// This is never used for indexing but will be used for API detection. fn current_count(&self) -> usize; // PROPERTIES @@ -140,7 +144,7 @@ pub unsafe trait Iter<'a> { /// Check if the next item in buffer is a given value with optional case /// sensitivity. #[inline(always)] - fn first_is(&mut self, value: u8, is_cased: bool) -> bool { + fn first_is(&self, value: u8, is_cased: bool) -> bool { if is_cased { self.first_is_cased(value) } else { @@ -158,6 +162,14 @@ pub unsafe trait Iter<'a> { /// underlying buffer. This is useful for multi-digit optimizations /// for contiguous iterators. /// + /// This does not increment the count of items: returns: this only + /// increments the index, not the total digits returned. You must use + /// this carefully: if stepping over a digit, you must then call + /// [`increment_count`] afterwards or else the internal count will + /// be incorrect. + /// + /// [`increment_count`]: DigitsIter::increment_count + /// /// # Panics /// /// This will panic if the buffer advances for non-contiguous @@ -172,6 +184,15 @@ pub unsafe trait Iter<'a> { /// Advance the internal slice by 1 element. /// + /// + /// This does not increment the count of items: returns: this only + /// increments the index, not the total digits returned. You must + /// use this carefully: if stepping over a digit, you must then call + /// [`increment_count`] afterwards or else the internal count will + /// be incorrect. + /// + /// [`increment_count`]: DigitsIter::increment_count + /// /// # Panics /// /// This will panic if the buffer advances for non-contiguous @@ -277,10 +298,18 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { self.peek().is_none() } + /// Increment the number of digits that have been returned by the iterator. + /// + /// For contiguous iterators, this is a no-op. For non-contiguous iterators, + /// this increments the count by 1. + fn increment_count(&mut self); + /// Peek the next value of the iterator, without consuming it. /// /// Note that this can modify the internal state, by skipping digits - /// for iterators that find the first non-zero value, etc. + /// for iterators that find the first non-zero value, etc. We optimize + /// this for the case where we have contiguous iterators, since + /// non-contiguous iterators already have a major performance penalty. fn peek(&mut self) -> Option; /// Peek the next value of the iterator, and step only if it exists. diff --git a/lexical-util/src/noskip.rs b/lexical-util/src/noskip.rs index e8209a77..b58937e9 100644 --- a/lexical-util/src/noskip.rs +++ b/lexical-util/src/noskip.rs @@ -125,17 +125,17 @@ unsafe impl<'a, const __: u128> Iter<'a> for Bytes<'a, __> { self.index = index; } - /// Get the current number of values returned by the iterator. + /// Get the current number of digits returned by the iterator. + /// + /// For contiguous iterators, this can include the sign character, decimal + /// point, and the exponent sign (that is, it is always the cursor). For + /// non-contiguous iterators, this must always be the only the number of + /// digits returned. #[inline(always)] fn current_count(&self) -> usize { self.index } - #[inline(always)] - fn first(&self) -> Option<&'a u8> { - self.slc.get(self.index) - } - #[inline(always)] #[allow(clippy::assertions_on_constants)] // reason="ensuring safety invariants are valid" unsafe fn step_by_unchecked(&mut self, count: usize) { @@ -243,6 +243,11 @@ impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for DigitsIterator<'a, 'b, F self.is_buffer_empty() } + // Always a no-op + #[inline(always)] + fn increment_count(&mut self) { + } + #[inline(always)] fn peek(&mut self) -> Option<::Item> { self.byte.slc.get(self.byte.index) diff --git a/lexical-util/src/not_feature_format.rs b/lexical-util/src/not_feature_format.rs index 84855a28..23f9a2f6 100644 --- a/lexical-util/src/not_feature_format.rs +++ b/lexical-util/src/not_feature_format.rs @@ -274,108 +274,188 @@ impl NumberFormat { // DIGIT SEPARATOR FLAGS & MASKS // If digit separators are allowed between integer digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const INTEGER_INTERNAL_DIGIT_SEPARATOR: bool = false; /// Get if digit separators are allowed between integer digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn integer_internal_digit_separator(&self) -> bool { Self::INTEGER_INTERNAL_DIGIT_SEPARATOR } /// If digit separators are allowed between fraction digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const FRACTION_INTERNAL_DIGIT_SEPARATOR: bool = false; /// Get if digit separators are allowed between fraction digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn fraction_internal_digit_separator(&self) -> bool { Self::FRACTION_INTERNAL_DIGIT_SEPARATOR } /// If digit separators are allowed between exponent digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const EXPONENT_INTERNAL_DIGIT_SEPARATOR: bool = false; /// Get if digit separators are allowed between exponent digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn exponent_internal_digit_separator(&self) -> bool { Self::EXPONENT_INTERNAL_DIGIT_SEPARATOR } /// If digit separators are allowed between digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const INTERNAL_DIGIT_SEPARATOR: bool = false; /// Get if digit separators are allowed between digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn internal_digit_separator(&self) -> bool { Self::INTERNAL_DIGIT_SEPARATOR } /// If a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const INTEGER_LEADING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn integer_leading_digit_separator(&self) -> bool { Self::INTEGER_LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const FRACTION_LEADING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed before any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn fraction_leading_digit_separator(&self) -> bool { Self::FRACTION_LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed before any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const EXPONENT_LEADING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed before any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn exponent_leading_digit_separator(&self) -> bool { Self::EXPONENT_LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed before any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const LEADING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed before any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn leading_digit_separator(&self) -> bool { Self::LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const INTEGER_TRAILING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed after any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn integer_trailing_digit_separator(&self) -> bool { Self::INTEGER_TRAILING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const FRACTION_TRAILING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed after any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn fraction_trailing_digit_separator(&self) -> bool { Self::FRACTION_TRAILING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const EXPONENT_TRAILING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed after any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn exponent_trailing_digit_separator(&self) -> bool { Self::EXPONENT_TRAILING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const TRAILING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed after any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn trailing_digit_separator(&self) -> bool { Self::TRAILING_DIGIT_SEPARATOR @@ -453,6 +533,12 @@ impl NumberFormat { Self::BASE_PREFIX } + /// Get if the format has a base prefix. + #[inline(always)] + pub const fn has_base_prefix(&self) -> bool { + false + } + /// The base suffix character in the packed struct. pub const BASE_SUFFIX: u8 = 0; @@ -467,6 +553,12 @@ impl NumberFormat { Self::BASE_SUFFIX } + /// Get if the format has a base suffix. + #[inline(always)] + pub const fn has_base_suffix(&self) -> bool { + false + } + // RADIX /// The radix for the significant digits in the packed struct. diff --git a/lexical-util/src/skip.rs b/lexical-util/src/skip.rs index 17e8b8e0..987d1f2e 100644 --- a/lexical-util/src/skip.rs +++ b/lexical-util/src/skip.rs @@ -48,117 +48,806 @@ use crate::format::NumberFormat; use crate::format_flags as flags; use crate::iterator::{DigitsIter, Iter}; -// PEEK -// ---- +// IS_ILTC +// ------- + +// NOTE: The compiler optimizes all these methods pretty well: it's as +// efficient or almost as efficient as optimized assembly without unsafe +// code, especially since we have to do bounds checking +// before and the compiler can determine all cases correctly. + +/// Helpers to get the next or previous elements for checks. +/// +/// This has the non-consecutive iterator variants as well +/// as the consecutive ones. The consecutive ones will iteratively +/// process all digits. +macro_rules! indexing { + (@next $self:ident, $index:expr) => { + $index.wrapping_add(1) + }; + + (@nextc $self:ident, $index:expr) => {{ + let mut index = $index; + let slc = $self.byte.slc; + while slc.get(index.wrapping_add(1)).map_or(false, |&x| $self.is_digit_separator(x)) { + index = index.wrapping_add(1); + } + index.wrapping_add(1) + }}; + + (@prev $self:ident, $index:expr) => { + $index.wrapping_sub(1) + }; + + (@prevc $self:ident, $index:expr) => {{ + let mut index = $index; + let slc = $self.byte.slc; + while slc.get(index.wrapping_sub(1)).map_or(false, |&x| $self.is_digit_separator(x)) { + index = index.wrapping_sub(1); + } + index.wrapping_sub(1) + }}; +} -/// Determine if the digit separator is internal. +/// Determine if a single digit separator is internal. /// -/// Preconditions: Assumes `slc[index]` is a digit separator. -/// The compiler optimizes this pretty well: it's almost as efficient as -/// optimized assembly without bounds checking. +/// # Examples +/// +/// - `1__1_23`- invalid +/// - `1_1__23`- invalid +/// - `1_1_23`- valid +/// - `11_x23`- invalid +/// - `_1123`- invalid +/// - `+_1123`- invalid +/// - `_+1123`- invalid +/// - `1123_`- invalid +/// - `1123_.`- invalid +/// - `112_3.`- valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. macro_rules! is_i { - ($self:ident) => { - !is_l!($self) && !is_t!($self) + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index - 1` is a digit + // - `index + 1` is a digit + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(prev).map_or(false, |&x| $self.is_digit(x)) && + slc.get(next).map_or(false, |&x| $self.is_digit(x)) + }}; + + (@first $self:ident) => { + is_i!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is a digit + + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(false, |&x| $self.is_digit(x)) + }}; + + (@internal $self:ident) => { + is_i!(@internal $self, $self.byte.index) + }; +} + +/// Determine if consecutive digit separators are internal. +/// +/// # Examples +/// +/// - `1__1_23`- valid +/// - `1_1__23`- valid +/// - `1_1_23`- valid +/// - `11_x23`- invalid +/// - `_1123`- invalid +/// - `+_1123`- invalid +/// - `_+1123`- invalid +/// - `1123_`- invalid +/// - `1123_.`- invalid +/// - `112_3.`- valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_ic { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index - 1` is a digit after consuming digit separators + // - `index + 1` is a digit after consuming digit separators + + let prev = indexing!(@prevc $self, $index); + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(prev).map_or(false, |&x| $self.is_digit(x)) && + slc.get(next).map_or(false, |&x| $self.is_digit(x)) + }}; + + (@first $self:ident) => { + is_ic!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is a digit after consuming digit separators + + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(false, |&x| $self.is_digit(x)) + }}; + + (@internal $self:ident) => { + is_ic!(@internal $self, $self.byte.index) }; } -/// Determine if the digit separator is leading. +/// Determine if a single digit separator is leading. +/// +/// # Examples +/// +/// - `__123`- invalid +/// - `+__123`- invalid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// +/// Having a subsequent sign character is fine since it might +/// be part of a partial parser. +/// +/// # Preconditions /// -/// Preconditions: Assumes `slc[index]` is a digit separator. -/// The compiler optimizes this pretty well: it's almost as efficient as -/// optimized assembly without bounds checking. +/// Assumes `slc[index]` is a digit separator. macro_rules! is_l { - ($self:ident) => {{ - // Consume any digit separators before the current one. - let mut index = $self.byte.index; - while index > 0 - && $self.byte.slc.get(index - 1).map_or(false, |&x| $self.is_digit_separator(x)) - { - index -= 1; - } + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index - 1` is not a digit + // - `index - 1` is not a digit separator + // - `index + 1` is not a digit separator + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(prev).map_or(true, |&x| !$self.is_digit(x) && !$self.is_digit_separator(x)) && + slc.get(next).map_or(true, |&x| !$self.is_digit_separator(x)) + }}; + + (@first $self:ident) => { + is_l!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: Previous must have been a digit so this cannot be valid. + false + }}; + + (@internal $self:ident) => { + is_l!(@internal $self, $self.byte.index) + }; +} + +/// Determine if one or more digit separators are leading. +/// +/// # Examples +/// +/// - `__123`- valid +/// - `+__123`- valid +/// - `+__+123`- valid +/// - `+__.123`- valid +/// - `._123`- valid +/// - `_+123`- invalid +/// - `_123`- valid +/// - `+_123`- valid +/// +/// Having a subsequent sign character is fine since it might +/// be part of a partial parser. +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_lc { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index - 1` is not a digit after removing digit separators + + let prev = indexing!(@prevc $self, $index); + let slc = $self.byte.slc; + slc.get(prev).map_or(true, |&x| !$self.is_digit(x)) + }}; + + (@first $self:ident) => { + is_lc!(@first $self, $self.byte.index) + }; - // True if there are no items before the digit separator, or character - // before the digit separators is not a digit. - index == 0 || !$self.byte.slc.get(index - 1).map_or(false, |&x| $self.is_digit(x)) + (@internal $self:ident, $index:expr) => {{ + // NOTE: Previous must have been a digit so this cannot be valid. + false }}; + + (@internal $self:ident) => { + is_lc!(@internal $self, $self.byte.index) + }; } -/// Determine if the digit separator is trailing. +/// Determine if a single digit separator is trailing. +/// +/// # Examples +/// +/// - `123_`- valid +/// - `123__`- invalid +/// - `123_.`- valid +/// - `123__.`- invalid +/// - `123_1`- invalid +/// - `123__1`- invalid +/// - _: valid +/// - _+: valid +/// - 1_+: valid /// -/// Preconditions: Assumes `slc[index]` is a digit separator. -/// The compiler optimizes this pretty well: it's almost as efficient as -/// optimized assembly without bounds checking. +/// Having a subsequent sign character is fine since it might +/// be part of a partial parser. +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. macro_rules! is_t { - ($self:ident) => {{ - // Consume any digit separators after the current one. - let mut index = $self.byte.index; - while index < $self.byte.slc.len() - && $self.byte.slc.get(index + 1).map_or(false, |&x| $self.is_digit_separator(x)) - { - index += 1; - } + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit + // - `index + 1` is not a digit separator + // - `index - 1` is not a digit separator + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit(x) && !$self.is_digit_separator(x)) && + slc.get(prev).map_or(true, |&x| !$self.is_digit_separator(x)) + }}; - index == $self.byte.slc.len() - || !$self.byte.slc.get(index + 1).map_or(false, |&x| $self.is_digit(x)) + (@first $self:ident) => { + is_t!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit + // - `index + 1` is not a digit separator + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit(x) && !$self.is_digit_separator(x)) }}; + + (@internal $self:ident) => { + is_t!(@internal $self, $self.byte.index) + }; +} + +/// Determine if one or more digit separators are trailing. +/// +/// # Examples +/// +/// - `123_`- valid +/// - `123__`- valid +/// - `123_.`- valid +/// - `123__.`- valid +/// - `123_1`- invalid +/// - `123__1`- invalid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_tc { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit + + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit(x)) + }}; + + (@first $self:ident) => { + is_tc!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => { + // NOTE: This is already optimized for the first case. + is_tc!(@first $self, $index) + }; + + (@internal $self:ident) => { + is_tc!(@internal $self, $self.byte.index) + }; } /// Determine if the digit separator is leading or internal. /// -/// Preconditions: Assumes `slc[index]` is a digit separator. +/// # Examples +/// +/// - `__123`- invalid +/// - `+__123`- invalid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- valid +/// - `+1__23`- invalid +/// - `+123_`- invalid +/// - `+123__`- invalid +/// - _: valid +/// - _+: valid +/// - 1_+: invalid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. macro_rules! is_il { - ($self:ident) => { - is_l!($self) || !is_t!($self) + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index + 1` is a digit + // - `index + 1` is not a digit separator + // - `index - 1` is not a digit separator + // + // # Logic + // + // If the previous character is a digit, then the + // next character must be a digit. If the previous + // character is not a digit, then the subsequent character can + // be anything besides a digit separator + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + + if slc.get(prev).map_or(false, |&x| $self.is_digit(x)) { + slc.get(next).map_or(false, |&x| $self.is_digit(x)) + } else { + slc.get(prev).map_or(true, |&x| !$self.is_digit_separator(x)) + } + }}; + + (@first $self:ident) => { + is_il!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is a digit + + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(false, |&x| $self.is_digit(x)) + }}; + + (@internal $self:ident) => { + is_il!(@internal $self, $self.byte.index) + }; +} + +/// Determine if consecutive digit separators are leading or internal. +/// +/// # Examples +/// +/// - `__123`- valid +/// - `+__123`- valid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- valid +/// - `+1__23`- valid +/// - `+123_`- invalid +/// - `+123__`- invalid +/// - _: valid +/// - _+: valid +/// - 1_+: invalid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_ilc { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index + 1` is a digit after consuming digit separators + // + // # Logic + // + // We also need to consider the case where it's empty, + // that is, the previous one wasn't a digit if we don't + // have a digit. + + let prev = indexing!(@prevc $self, $index); + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(false, |&x| $self.is_digit(x)) || + slc.get(prev).map_or(true, |&x| !$self.is_digit(x)) + }}; + + (@first $self:ident) => { + is_ilc!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index + 1` is a digit after consuming digit separators + + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| $self.is_digit(x)) + }}; + + (@internal $self:ident) => { + is_ilc!(@internal $self, $self.byte.index) }; } /// Determine if the digit separator is internal or trailing. /// -/// Preconditions: Assumes `slc[index]` is a digit separator. +/// # Examples +/// +/// - `__123`- valid +/// - `+__123`- valid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- valid +/// - `+1__23`- valid +/// - `+123_`- invalid +/// - `+123__`- invalid +/// - _: valid +/// - _+: valid +/// - 1_+: invalid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. macro_rules! is_it { - ($self:ident) => { - is_t!($self) || !is_l!($self) + (@first$self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index - 1` is a digit + // - `index - 1` is not a digit separator + // - `index + 1` is not a digit separator + // + // # Logic + // + // If the previous character is not a digit, there cannot + // be a digit for a following character. If the previous + // character is a digit, then the following one must be + // a digit as well. + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + if slc.get(prev).map_or(false, |&x| $self.is_digit(x)) { + // Have a digit, any character besides a digit separator is valid + slc.get(next).map_or(true, |&x| !$self.is_digit_separator(x)) + } else { + // Not a digit, so we cannot have a digit or a digit separator + slc.get(next).map_or(true, |&x| !$self.is_digit(x) && !$self.is_digit_separator(x)) + } + }}; + + (@first$self:ident) => { + is_it!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit separator + // Since we've previously had a digit, this is guaranteed to + // be internal or trailing. + + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit_separator(x)) + }}; + + (@internal $self:ident) => { + is_it!(@internal $self, $self.byte.index) + }; +} + +/// Determine if consecutive digit separators are internal or trailing. +/// +/// # Examples +/// +/// - `__123`- invalid +/// - `+__123`- invalid +/// - `._123`- invalid +/// - `_+123`- invalid +/// - `_123`- invalid +/// - `+_123`- invalid +/// - `+1_23`- valid +/// - `+1__23`- valid +/// - `+123_`- valid +/// - `+123__`- valid +/// - _: valid +/// - _+: valid +/// - 1_+: valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_itc { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index - 1` is not a digit after consuming digit separators + // + // # Logic + // + // We also need to consider the case where it's empty, + // that is, the previous one wasn't a digit if we don't + // have a digit. + + let prev = indexing!(@prevc $self, $index); + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(prev).map_or(false, |&x| !$self.is_digit(x)) || + slc.get(next).map_or(true, |&x| !$self.is_digit(x)) + }}; + + (@first $self:ident) => { + is_itc!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => { + // NOTE: Previous must have been a digit so this must be valid. + true + }; + + (@internal $self:ident) => { + is_itc!(@internal $self, $self.byte.index) }; } /// Determine if the digit separator is leading or trailing. /// -/// Preconditions: Assumes `slc[index]` is a digit separator. +/// # Examples +/// +/// - `__123`- invalid +/// - `+__123`- invalid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- invalid +/// - `+1__23`- invalid +/// - `+123_`- valid +/// - `+123__`- invalid +/// - _: valid +/// - _+: valid +/// - 1_+: valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. macro_rules! is_lt { - ($self:ident) => { - is_l!($self) || is_t!($self) + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - not (`index - 1` is a digit and `index + 1` is a digit) + // - `index - 1` is not a digit separator + // - `index + 1` is not a digit separator + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + let prev_value = slc.get(prev); + let next_value = slc.get(next); + + let is_prev_sep = prev_value.map_or(false, |&x| $self.is_digit_separator(x)); + let is_prev_dig = prev_value.map_or(false, |&x| $self.is_digit(x)); + let is_next_sep = next_value.map_or(false, |&x| $self.is_digit_separator(x)); + let is_next_dig = next_value.map_or(false, |&x| $self.is_digit(x)); + + !is_prev_sep && !is_next_sep && !(is_prev_dig && is_next_dig) + }}; + + (@first $self:ident) => { + is_lt!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit + // - `index + 1` is not a digit separator + + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit(x) && !$self.is_digit_separator(x)) + }}; + + (@internal $self:ident) => { + is_lt!(@internal $self, $self.byte.index) }; } -/// Determine if the digit separator is internal, leading, or trailing. +/// Determine if consecutive digit separators are leading or trailing. +/// +/// # Examples +/// +/// - `__123`- valid +/// - `+__123`- valid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- invalid +/// - `+1__23`- invalid +/// - `+123_`- valid +/// - `+123__`- valid +/// - _: valid +/// - _+: valid +/// - 1_+: valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_ltc { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that (after consuming separators): + // - not (`index - 1` is a digit and `index + 1` is a digit) + + let prev = indexing!(@prevc $self, $index); + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + !(slc.get(prev).map_or(false, |&x| $self.is_digit(x)) && slc.get(next).map_or(false, |&x| $self.is_digit(x))) + }}; + + (@first $self:ident) => { + is_ltc!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit + + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit(x)) + }}; + + (@internal $self:ident) => { + is_ltc!(@internal $self, $self.byte.index) + }; +} + +/// Determine if a single digit separator is internal, leading, or trailing. +/// +/// # Examples +/// +/// - `__123`- invalid +/// - `+__123`- invalid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- valid +/// - `+1__23`- invalid +/// - `+123_`- valid +/// - `+123__`- invalid +/// - _: valid +/// - _+: valid +/// - 1_+: valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. macro_rules! is_ilt { - ($self:ident) => { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit separator + // - `index - 1` is not a digit separator + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + !slc.get(next).map_or(false, |&x| $self.is_digit_separator(x)) && + !slc.get(prev).map_or(false, |&x| $self.is_digit_separator(x)) + }}; + + (@first $self:ident) => { + is_ilt!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit separator + + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit_separator(x)) + }}; + + (@internal $self:ident) => { + is_ilt!(@internal $self, $self.byte.index) + }; +} + +/// Determine if consecutive digit separators are internal, leading, or +/// trailing. +/// +/// This is always true. +/// +/// # Examples +/// +/// - `__123`- valid +/// - `+__123`- valid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- valid +/// - `+1__23`- valid +/// - `+123_`- valid +/// - `+123__`- valid +/// - _: valid +/// - _+: valid +/// - 1_+: valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_iltc { + (@first $self:ident, $index:expr) => { true }; + + (@first $self:ident) => { + is_iltc!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => { + true + }; + + (@internal $self:ident) => { + is_iltc!(@internal $self, $self.byte.index) + }; } +// PEEK +// ---- + /// Consumes 1 or more digit separators. /// Peeks the next token that's not a digit separator. macro_rules! peek_1 { ($self:ident, $is_skip:ident) => {{ // This will consume a single, non-consecutive digit separators. - let mut index = $self.cursor(); + let index = $self.cursor(); let buffer = $self.get_buffer(); let value = buffer.get(index)?; let is_digit_separator = $self.is_digit_separator(*value); - if is_digit_separator && $is_skip!($self) { - // Have a skippable digit separator: keep incrementing until we find - // a non-digit separator character. Don't need any complex checks - // here, since we've already done them above. - index += 1; - while index < buffer.len() - && buffer.get(index).map_or(false, |&x| $self.is_digit_separator(x)) - { - index += 1; + // NOTE: We can do some pretty major optimizations for internal values, + // since we can check the location and don't need to check previous values. + if is_digit_separator { + // NOTE: This cannot iteratively search for the next value, + // or else the consecutive digit separator has no effect (#96). + let is_skip = if $self.current_count() == 0 { + $is_skip!(@first $self) + } else { + $is_skip!(@internal $self) + }; + if is_skip { + // SAFETY: Safe since `index < buffer.len()`, so `index + 1 <= buffer.len()`` + unsafe { $self.set_cursor(index + 1) }; + buffer.get(index + 1) + } else { + Some(value) } - // SAFETY: Safe since `index < buffer.len()`. - unsafe { $self.set_cursor(index) }; - buffer.get(index) } else { // Have 1 of 2 conditions: // 1. A non-digit separator character. @@ -177,19 +866,30 @@ macro_rules! peek_n { let buffer = $self.get_buffer(); let value = buffer.get(index)?; let is_digit_separator = $self.is_digit_separator(*value); - if is_digit_separator && $is_skip!($self) { - // Have a skippable digit separator: keep incrementing until we find - // a non-digit separator character. Don't need any complex checks - // here, since we've already done them above. - index += 1; - while index < buffer.len() - && buffer.get(index).map_or(false, |&x| $self.is_digit_separator(x)) - { + // NOTE: We can do some pretty major optimizations for internal values, + // since we can check the location and don't need to check previous values. + if is_digit_separator { + let is_skip = if $self.current_count() == 0 { + $is_skip!(@first $self) + } else { + $is_skip!(@internal $self) + }; + if is_skip { + // Have a skippable digit separator: keep incrementing until we find + // a non-digit separator character. Don't need any complex checks + // here, since we've already done them above. index += 1; + while index < buffer.len() + && buffer.get(index).map_or(false, |&x| $self.is_digit_separator(x)) + { + index += 1; + } + // SAFETY: Safe since `index <= buffer.len()`. + unsafe { $self.set_cursor(index) }; + buffer.get(index) + } else { + Some(value) } - // SAFETY: Safe since `index < buffer.len()`. - unsafe { $self.set_cursor(index) }; - buffer.get(index) } else { // Have 1 of 2 conditions: // 1. A non-digit separator character. @@ -261,21 +961,21 @@ macro_rules! peek_ilt { /// Consumes 1 or more leading digit separators and peeks the next value. macro_rules! peek_lc { ($self:ident) => { - peek_n!($self, is_l) + peek_n!($self, is_lc) }; } /// Consumes 1 or more internal digit separators and peeks the next value. macro_rules! peek_ic { ($self:ident) => { - peek_n!($self, is_i) + peek_n!($self, is_ic) }; } /// Consumes 1 or more trailing digit separators and peeks the next value. macro_rules! peek_tc { ($self:ident) => { - peek_n!($self, is_t) + peek_n!($self, is_tc) }; } @@ -283,7 +983,7 @@ macro_rules! peek_tc { /// value. macro_rules! peek_ilc { ($self:ident) => { - peek_n!($self, is_il) + peek_n!($self, is_ilc) }; } @@ -291,7 +991,7 @@ macro_rules! peek_ilc { /// value. macro_rules! peek_itc { ($self:ident) => { - peek_n!($self, is_it) + peek_n!($self, is_itc) }; } @@ -299,21 +999,15 @@ macro_rules! peek_itc { /// value. macro_rules! peek_ltc { ($self:ident) => { - peek_n!($self, is_lt) + peek_n!($self, is_ltc) }; } /// Consumes 1 or more digit separators and peeks the next value. macro_rules! peek_iltc { - ($self:ident) => {{ - loop { - let value = $self.byte.slc.get($self.byte.index)?; - if !$self.is_digit_separator(*value) { - return Some(value); - } - $self.byte.index += 1; - } - }}; + ($self:ident) => { + peek_n!($self, is_iltc) + }; } // AS DIGITS @@ -354,9 +1048,15 @@ pub struct Bytes<'a, const FORMAT: u128> { slc: &'a [u8], /// Current index of the iterator in the slice. index: usize, - /// The current count of values returned by the iterator. + /// The current count of integer digits returned by the iterator. /// This is only used if the iterator is not contiguous. - count: usize, + integer_count: usize, + /// The current count of fraction digits returned by the iterator. + /// This is only used if the iterator is not contiguous. + fraction_count: usize, + /// The current count of exponent digits returned by the iterator. + /// This is only used if the iterator is not contiguous. + exponent_count: usize, } impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { @@ -366,7 +1066,9 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { Self { slc, index: 0, - count: 0, + integer_count: 0, + fraction_count: 0, + exponent_count: 0, } } @@ -386,7 +1088,9 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { Self { slc, index, - count: 0, + integer_count: 0, + fraction_count: 0, + exponent_count: 0, } } @@ -447,11 +1151,6 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { ); } self.index += count; - if !is_contiguous { - // Only increment the count if it's not contiguous, otherwise, - // this is an unnecessary performance penalty. - self.count += count; - } } /// Internal implementation that handles if it's contiguous. @@ -501,7 +1200,12 @@ unsafe impl<'a, const FORMAT: u128> Iter<'a> for Bytes<'a, FORMAT> { self.index = index; } - /// Get the current number of values returned by the iterator. + /// Get the current number of digits returned by the iterator. + /// + /// For contiguous iterators, this can include the sign character, decimal + /// point, and the exponent sign (that is, it is always the cursor). For + /// non-contiguous iterators, this must always be the only the number of + /// digits returned. #[inline(always)] fn current_count(&self) -> usize { // If the buffer is contiguous, then we don't need to track the @@ -509,7 +1213,7 @@ unsafe impl<'a, const FORMAT: u128> Iter<'a> for Bytes<'a, FORMAT> { if Self::IS_CONTIGUOUS { self.index } else { - self.count + self.integer_count + self.fraction_count + self.exponent_count } } @@ -540,6 +1244,14 @@ macro_rules! skip_iterator { }; } +macro_rules! is_sign { + () => { + pub const fn is_sign(&self, value: u8) -> bool { + matches!(value, b'+' | b'-') + } + }; +} + macro_rules! is_digit_separator { ($format:ident) => { /// Determine if the character is a digit separator. @@ -561,9 +1273,11 @@ macro_rules! is_digit_separator { macro_rules! skip_iterator_impl { ($iterator:ident, $radix_cb:ident) => { impl<'a: 'b, 'b, const FORMAT: u128> $iterator<'a, 'b, FORMAT> { + is_sign!(); is_digit_separator!(FORMAT); /// Create a new digits iterator from the bytes underlying item. + #[inline(always)] pub fn new(byte: &'b mut Bytes<'a, FORMAT>) -> Self { Self { byte, @@ -612,10 +1326,13 @@ macro_rules! skip_iterator_iterator_impl { let value = self.peek()?; // Increment the index so we know not to re-fetch it. self.byte.index += 1; - if !Self::IS_CONTIGUOUS { - // Only increment the count if it's not contiguous, otherwise, - // this is an unnecessary performance penalty. - self.byte.count += 1; + // NOTE: Only increment the count if it's not contiguous, otherwise, + // this is an unnecessary performance penalty. We also need + // to check if it's a digit, which adds on additional cost but + // there's not much else we can do. Hopefully the previous inlining + // checks will minimize the performance hit. + if !Self::IS_CONTIGUOUS && self.is_digit(*value) { + self.increment_count(); } Some(value) } @@ -625,7 +1342,7 @@ macro_rules! skip_iterator_iterator_impl { /// Create base methods for the Iter block of a skip iterator. macro_rules! skip_iterator_iter_base { - ($format:ident, $mask:ident) => { + ($format:ident, $mask:ident, $count:ident) => { // It's contiguous if we don't skip over any values. // IE, the digit separator flags for the iterator over // the digits doesn't skip any values. @@ -648,9 +1365,19 @@ macro_rules! skip_iterator_iter_base { unsafe { self.byte.set_cursor(index) }; } + /// Get the current number of digits returned by the iterator. + /// + /// For contiguous iterators, this can include the sign character, decimal + /// point, and the exponent sign (that is, it is always the cursor). For + /// non-contiguous iterators, this must always be the only the number of + /// digits returned. #[inline(always)] fn current_count(&self) -> usize { - self.byte.current_count() + if Self::IS_CONTIGUOUS { + self.byte.current_count() + } else { + self.byte.$count + } } #[inline(always)] @@ -679,15 +1406,35 @@ macro_rules! skip_iterator_digits_iter_base { /// Create impl `ByteIter` block for skip iterator. macro_rules! skip_iterator_bytesiter_impl { - ($iterator:ident, $mask:ident, $i:ident, $l:ident, $t:ident, $c:ident) => { + ($iterator:ident, $mask:ident, $count:ident, $i:ident, $l:ident, $t:ident, $c:ident) => { unsafe impl<'a: 'b, 'b, const FORMAT: u128> Iter<'a> for $iterator<'a, 'b, FORMAT> { - skip_iterator_iter_base!(FORMAT, $mask); + skip_iterator_iter_base!(FORMAT, $mask, $count); } impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for $iterator<'a, 'b, FORMAT> { skip_iterator_digits_iter_base!(); + /// Increment the number of digits that have been returned by the iterator. + /// + /// For contiguous iterators, this is a no-op. For non-contiguous iterators, + /// this increments the count by 1. + #[inline(always)] + fn increment_count(&mut self) { + self.byte.$count += 1; + } + /// Peek the next value of the iterator, without consuming it. + /// + /// Note that this can modify the internal state, by skipping digits + /// for iterators that find the first non-zero value, etc. We optimize + /// this for the case where we have contiguous iterators, since + /// non-contiguous iterators already have a major performance penalty. + /// + /// Checking if the character is a digit in the `next()` implementation + /// after skipping characters can: + /// 1. Likely be optimized out due to the use of macros and inlining. + /// 2. Is a small amount of overhead compared to the branching on + /// characters, #[inline(always)] fn peek(&mut self) -> Option<::Item> { let format = NumberFormat::<{ FORMAT }> {}; @@ -746,6 +1493,7 @@ skip_iterator_iterator_impl!(IntegerDigitsIterator); skip_iterator_bytesiter_impl!( IntegerDigitsIterator, INTEGER_DIGIT_SEPARATOR_FLAG_MASK, + integer_count, INTEGER_INTERNAL_DIGIT_SEPARATOR, INTEGER_LEADING_DIGIT_SEPARATOR, INTEGER_TRAILING_DIGIT_SEPARATOR, @@ -764,6 +1512,7 @@ skip_iterator_iterator_impl!(FractionDigitsIterator); skip_iterator_bytesiter_impl!( FractionDigitsIterator, FRACTION_DIGIT_SEPARATOR_FLAG_MASK, + fraction_count, FRACTION_INTERNAL_DIGIT_SEPARATOR, FRACTION_LEADING_DIGIT_SEPARATOR, FRACTION_TRAILING_DIGIT_SEPARATOR, @@ -782,6 +1531,7 @@ skip_iterator_iterator_impl!(ExponentDigitsIterator); skip_iterator_bytesiter_impl!( ExponentDigitsIterator, EXPONENT_DIGIT_SEPARATOR_FLAG_MASK, + exponent_count, EXPONENT_INTERNAL_DIGIT_SEPARATOR, EXPONENT_LEADING_DIGIT_SEPARATOR, EXPONENT_TRAILING_DIGIT_SEPARATOR, @@ -798,16 +1548,22 @@ skip_iterator!( skip_iterator_iterator_impl!(SpecialDigitsIterator); impl<'a: 'b, 'b, const FORMAT: u128> SpecialDigitsIterator<'a, 'b, FORMAT> { + is_sign!(); is_digit_separator!(FORMAT); } unsafe impl<'a: 'b, 'b, const FORMAT: u128> Iter<'a> for SpecialDigitsIterator<'a, 'b, FORMAT> { - skip_iterator_iter_base!(FORMAT, SPECIAL_DIGIT_SEPARATOR); + skip_iterator_iter_base!(FORMAT, SPECIAL_DIGIT_SEPARATOR, integer_count); } impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for SpecialDigitsIterator<'a, 'b, FORMAT> { skip_iterator_digits_iter_base!(); + // Always a no-op. + #[inline(always)] + fn increment_count(&mut self) { + } + /// Peek the next value of the iterator, without consuming it. #[inline(always)] fn peek(&mut self) -> Option<::Item> { diff --git a/lexical-util/tests/iterator_tests.rs b/lexical-util/tests/iterator_tests.rs index 1882ea6b..46fe0df4 100644 --- a/lexical-util/tests/iterator_tests.rs +++ b/lexical-util/tests/iterator_tests.rs @@ -89,10 +89,10 @@ fn skip_iterator_test() { assert_eq!(iter.current_count(), 0); unsafe { iter.step_unchecked() }; assert_eq!(iter.cursor(), 1); - assert_eq!(iter.current_count(), 1); + assert_eq!(iter.current_count(), 0); iter.next(); assert_eq!(iter.cursor(), 2); - assert_eq!(iter.current_count(), 2); + assert_eq!(iter.current_count(), 1); let mut byte = digits.bytes::<{ FORMAT }>(); let mut iter = byte.integer_iter(); diff --git a/lexical-util/tests/skip_tests.rs b/lexical-util/tests/skip_tests.rs index 7fa6354c..f3e88bf2 100644 --- a/lexical-util/tests/skip_tests.rs +++ b/lexical-util/tests/skip_tests.rs @@ -31,27 +31,27 @@ fn test_skip_iter_i() { skip_iter_eq::<{ FORMAT }>(b"_.45", b"_.45"); skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"45"); - skip_iter_eq::<{ FORMAT }>(b"4__5", b"4_5"); + skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4_"); skip_iter_eq::<{ FORMAT }>(b"4__", b"4__"); skip_iter_eq::<{ FORMAT }>(b"4_.", b"4_."); skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"_455"); - skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45_5"); + skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b"_.455"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45_5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"45_"); - skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4_5__"); + skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"45_.5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4_5__.5"); + skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"_45_"); skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"_45_.56"); skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"_45_"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4_5__"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"_45_.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4_5__.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test] @@ -68,9 +68,9 @@ fn test_skip_iter_l() { skip_iter_eq::<{ FORMAT }>(b"1e", b"1e"); skip_iter_eq::<{ FORMAT }>(b"1", b"1"); skip_iter_eq::<{ FORMAT }>(b"_45", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__45", b"_45"); + skip_iter_eq::<{ FORMAT }>(b"__45", b"__45"); skip_iter_eq::<{ FORMAT }>(b"_.45", b".45"); - skip_iter_eq::<{ FORMAT }>(b"__.45", b"_.45"); + skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"4_5"); skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4_"); @@ -78,21 +78,21 @@ fn test_skip_iter_l() { skip_iter_eq::<{ FORMAT }>(b"4_.", b"4_."); skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"45_5"); - skip_iter_eq::<{ FORMAT }>(b"__45__5", b"_45__5"); + skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b".45_5"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"_.45__5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"4_5_"); skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"4_5_.5"); skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"45_"); - skip_iter_eq::<{ FORMAT }>(b"__45__", b"_45__"); + skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"45_.56"); - skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"_45__.56"); + skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"4_5_"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"_4__5__"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"4_5_.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"_4__5__.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test] @@ -111,29 +111,29 @@ fn test_skip_iter_t() { skip_iter_eq::<{ FORMAT }>(b"_45", b"_45"); skip_iter_eq::<{ FORMAT }>(b"__45", b"__45"); skip_iter_eq::<{ FORMAT }>(b"_.45", b".45"); - skip_iter_eq::<{ FORMAT }>(b"__.45", b"_.45"); + skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"4_5"); skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4"); - skip_iter_eq::<{ FORMAT }>(b"4__", b"4_"); + skip_iter_eq::<{ FORMAT }>(b"4__", b"4__"); skip_iter_eq::<{ FORMAT }>(b"4_.", b"4."); - skip_iter_eq::<{ FORMAT }>(b"4__.", b"4_."); + skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"_45_5"); skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b".45_5"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"_.45__5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"4_5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5_"); + skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"4_5.5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5_.5"); + skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"_45"); - skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45_"); + skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"_45.56"); - skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45_.56"); + skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"_4_5"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5_"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"_4_5.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5_.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test] @@ -151,31 +151,31 @@ fn test_skip_iter_il() { skip_iter_eq::<{ FORMAT }>(b"1e", b"1e"); skip_iter_eq::<{ FORMAT }>(b"1", b"1"); skip_iter_eq::<{ FORMAT }>(b"_45", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__45", b"_45"); + skip_iter_eq::<{ FORMAT }>(b"__45", b"__45"); skip_iter_eq::<{ FORMAT }>(b"_.45", b".45"); - skip_iter_eq::<{ FORMAT }>(b"__.45", b"_.45"); + skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"45"); - skip_iter_eq::<{ FORMAT }>(b"4__5", b"4_5"); + skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4_"); skip_iter_eq::<{ FORMAT }>(b"4__", b"4__"); skip_iter_eq::<{ FORMAT }>(b"4_.", b"4_."); skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"455"); - skip_iter_eq::<{ FORMAT }>(b"__45__5", b"_45_5"); + skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b".455"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"_.45_5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"45_"); - skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4_5__"); + skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"45_.5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4_5__.5"); + skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"45_"); - skip_iter_eq::<{ FORMAT }>(b"__45__", b"_45__"); + skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"45_.56"); - skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"_45__.56"); + skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"45_"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"_4_5__"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"45_.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"_4_5__.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test] @@ -195,29 +195,29 @@ fn test_skip_iter_it() { skip_iter_eq::<{ FORMAT }>(b"_45", b"_45"); skip_iter_eq::<{ FORMAT }>(b"__45", b"__45"); skip_iter_eq::<{ FORMAT }>(b"_.45", b".45"); - skip_iter_eq::<{ FORMAT }>(b"__.45", b"_.45"); + skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"45"); - skip_iter_eq::<{ FORMAT }>(b"4__5", b"4_5"); + skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4"); - skip_iter_eq::<{ FORMAT }>(b"4__", b"4_"); + skip_iter_eq::<{ FORMAT }>(b"4__", b"4__"); skip_iter_eq::<{ FORMAT }>(b"4_.", b"4."); - skip_iter_eq::<{ FORMAT }>(b"4__.", b"4_."); + skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"_455"); - skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45_5"); + skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b".455"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"_.45_5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"45"); - skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4_5_"); + skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"45.5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4_5_.5"); + skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"_45"); - skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45_"); + skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"_45.56"); - skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45_.56"); + skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"_45"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4_5_"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"_45.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4_5_.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test] @@ -235,31 +235,31 @@ fn test_skip_iter_lt() { skip_iter_eq::<{ FORMAT }>(b"1e", b"1e"); skip_iter_eq::<{ FORMAT }>(b"1", b"1"); skip_iter_eq::<{ FORMAT }>(b"_45", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__45", b"_45"); + skip_iter_eq::<{ FORMAT }>(b"__45", b"__45"); skip_iter_eq::<{ FORMAT }>(b"_.45", b".45"); - skip_iter_eq::<{ FORMAT }>(b"__.45", b"_.45"); + skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"4_5"); skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4"); - skip_iter_eq::<{ FORMAT }>(b"4__", b"4_"); + skip_iter_eq::<{ FORMAT }>(b"4__", b"4__"); skip_iter_eq::<{ FORMAT }>(b"4_.", b"4."); - skip_iter_eq::<{ FORMAT }>(b"4__.", b"4_."); + skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"45_5"); - skip_iter_eq::<{ FORMAT }>(b"__45__5", b"_45__5"); + skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b".45_5"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"_.45__5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"4_5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5_"); + skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"4_5.5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5_.5"); + skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__45__", b"_45_"); + skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"45.56"); - skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"_45_.56"); + skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"4_5"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"_4__5_"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"4_5.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"_4__5_.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test] @@ -278,31 +278,31 @@ fn test_skip_iter_ilt() { skip_iter_eq::<{ FORMAT }>(b"1e", b"1e"); skip_iter_eq::<{ FORMAT }>(b"1", b"1"); skip_iter_eq::<{ FORMAT }>(b"_45", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__45", b"_45"); + skip_iter_eq::<{ FORMAT }>(b"__45", b"__45"); skip_iter_eq::<{ FORMAT }>(b"_.45", b".45"); - skip_iter_eq::<{ FORMAT }>(b"__.45", b"_.45"); + skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"45"); - skip_iter_eq::<{ FORMAT }>(b"4__5", b"4_5"); + skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4"); - skip_iter_eq::<{ FORMAT }>(b"4__", b"4_"); + skip_iter_eq::<{ FORMAT }>(b"4__", b"4__"); skip_iter_eq::<{ FORMAT }>(b"4_.", b"4."); - skip_iter_eq::<{ FORMAT }>(b"4__.", b"4_."); + skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"455"); - skip_iter_eq::<{ FORMAT }>(b"__45__5", b"_45_5"); + skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b".455"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"_.45_5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"45"); - skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4_5_"); + skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"45.5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4_5_.5"); + skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__45__", b"_45_"); + skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"45.56"); - skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"_45_.56"); + skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"_4_5_"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"45.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"_4_5_.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test]