diff --git a/CHANGELOG b/CHANGELOG index 3ef94ac1..0df7778a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,11 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- Higher performance when parsing floats with digit separators. + ### Fixed - Inlining inconsistency between public API methods (credit to @zheland) - Incorrectly accepting leading zeros when `no_integer_leading_zeros` was enabled. - Have consistent errors when an invalid leading digit is found for floating point numbers to always be `Error::InvalidDigit`. +- Incorrect parsing of consecutive digit separators. +- Inaccuracies when parsing digit separators at various positions leading to incorect errors being returned. ## [1.0.1] 2024-09-16 diff --git a/clippy.toml b/clippy.toml index fac585ae..8bd81b7e 100644 --- a/clippy.toml +++ b/clippy.toml @@ -5,6 +5,8 @@ disallowed-macros = [ { path = "std::println", reason = "no IO allowed" }, { path = "std::format", reason = "no string allocation allowed" }, { path = "std::debug", reason = "debugging macros should not be present in any release" }, + # NOTE: unimplemented is fine because this can be for intentionally disabled methods + { path = "std::todo", reason = "should never have TODO macros in releases" }, ] disallowed-methods = [ { path = "std::io::stdout", reason = "no IO allowed" }, diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs index e667cc0d..53bcd789 100644 --- a/lexical-parse-float/src/parse.rs +++ b/lexical-parse-float/src/parse.rs @@ -246,11 +246,18 @@ pub fn parse_complete( let mut byte = bytes.bytes::<{ FORMAT }>(); let is_negative = parse_mantissa_sign(&mut byte)?; if byte.integer_iter().is_consumed() { - return Err(Error::Empty(byte.cursor())); + if NumberFormat::::REQUIRED_INTEGER_DIGITS + || NumberFormat::::REQUIRED_MANTISSA_DIGITS + { + return Err(Error::Empty(byte.cursor())); + } else { + return Ok(F::ZERO); + } } // Parse our a small representation of our number. - let num = parse_number!(FORMAT, byte, is_negative, options, parse_number, parse_special); + let num: Number<'_> = + parse_number!(FORMAT, byte, is_negative, options, parse_complete_number, parse_special); // Try the fast-path algorithm. if let Some(value) = num.try_fast_path::<_, FORMAT>() { return Ok(value); @@ -281,11 +288,18 @@ pub fn fast_path_complete( let mut byte = bytes.bytes::<{ FORMAT }>(); let is_negative = parse_mantissa_sign(&mut byte)?; if byte.integer_iter().is_consumed() { - return Err(Error::Empty(byte.cursor())); + if NumberFormat::::REQUIRED_INTEGER_DIGITS + || NumberFormat::::REQUIRED_MANTISSA_DIGITS + { + return Err(Error::Empty(byte.cursor())); + } else { + return Ok(F::ZERO); + } } // Parse our a small representation of our number. - let num = parse_number!(FORMAT, byte, is_negative, options, parse_number, parse_special); + let num = + parse_number!(FORMAT, byte, is_negative, options, parse_complete_number, parse_special); Ok(num.force_fast_path::<_, FORMAT>()) } @@ -298,7 +312,13 @@ pub fn parse_partial( let mut byte = bytes.bytes::<{ FORMAT }>(); let is_negative = parse_mantissa_sign(&mut byte)?; if byte.integer_iter().is_consumed() { - return Err(Error::Empty(byte.cursor())); + if NumberFormat::::REQUIRED_INTEGER_DIGITS + || NumberFormat::::REQUIRED_MANTISSA_DIGITS + { + return Err(Error::Empty(byte.cursor())); + } else { + return Ok((F::ZERO, byte.cursor())); + } } // Parse our a small representation of our number. @@ -340,7 +360,13 @@ pub fn fast_path_partial( let mut byte = bytes.bytes::<{ FORMAT }>(); let is_negative = parse_mantissa_sign(&mut byte)?; if byte.integer_iter().is_consumed() { - return Err(Error::Empty(byte.cursor())); + if NumberFormat::::REQUIRED_INTEGER_DIGITS + || NumberFormat::::REQUIRED_MANTISSA_DIGITS + { + return Err(Error::Empty(byte.cursor())); + } else { + return Ok((F::ZERO, byte.cursor())); + } } // Parse our a small representation of our number. @@ -458,7 +484,7 @@ pub fn slow_path( #[allow(clippy::collapsible_if)] // reason = "more readable uncollapsed" #[allow(clippy::cast_possible_wrap)] // reason = "no hardware supports buffers >= i64::MAX" #[allow(clippy::too_many_lines)] // reason = "function is one logical entity" -pub fn parse_partial_number<'a, const FORMAT: u128>( +pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( mut byte: Bytes<'a, FORMAT>, is_negative: bool, options: &Options, @@ -510,12 +536,15 @@ pub fn parse_partial_number<'a, const FORMAT: u128>( let mut iter = byte.integer_iter(); if base_prefix != 0 && iter.read_if_value_cased(b'0').is_some() { // Check to see if the next character is the base prefix. - // We must have a format like `0x`, `0d`, `0o`. Note: + // We must have a format like `0x`, `0d`, `0o`. + // NOTE: The check for empty integer digits happens below so + // we don't need a redunant check here. is_prefix = true; if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some() && iter.is_buffer_empty() + && format.required_integer_digits() { - return Err(Error::Empty(iter.cursor())); + return Err(Error::EmptyInteger(iter.cursor())); } } } @@ -607,11 +636,13 @@ pub fn parse_partial_number<'a, const FORMAT: u128>( // check to see if we have any inval;id leading zeros n_digits += n_after_dot; - if format.required_mantissa_digits() && n_digits == 0 { + if format.required_mantissa_digits() + && (n_digits == 0 || (cfg!(feature = "format") && byte.current_count() == 0)) + { let any_digits = start.clone().integer_iter().peek().is_some(); // NOTE: This is because numbers like `_12.34` have significant digits, // they just don't have a valid digit (#97). - if has_decimal || has_exponent || !any_digits { + if has_decimal || has_exponent || !any_digits || IS_PARTIAL { return Err(Error::EmptyMantissa(byte.cursor())); } else { return Err(Error::InvalidDigit(start.cursor())); @@ -770,15 +801,24 @@ pub fn parse_partial_number<'a, const FORMAT: u128>( )) } +pub fn parse_partial_number<'a, const FORMAT: u128>( + byte: Bytes<'a, FORMAT>, + is_negative: bool, + options: &Options, +) -> Result<(Number<'a>, usize)> { + parse_number::(byte, is_negative, options) +} + /// Try to parse a non-special floating point number. #[inline(always)] -pub fn parse_number<'a, const FORMAT: u128>( +pub fn parse_complete_number<'a, const FORMAT: u128>( byte: Bytes<'a, FORMAT>, is_negative: bool, options: &Options, ) -> Result> { + // Then have a const `IsPartial` as well let length = byte.buffer_length(); - let (float, count) = parse_partial_number::(byte, is_negative, options)?; + let (float, count) = parse_number::(byte, is_negative, options)?; if count == length { Ok(float) } else { @@ -807,6 +847,7 @@ where // NOTE: Because of the match statement, this would optimize poorly with // read_if. unsafe { iter.step_unchecked() }; + iter.increment_count(); } } @@ -869,6 +910,7 @@ pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>( *step -= 1; // SAFETY: safe, since `iter` cannot be empty due to `iter.peek()`. unsafe { iter.step_unchecked() }; + iter.increment_count(); } else { break; } diff --git a/lexical-parse-float/tests/issue_96_tests.rs b/lexical-parse-float/tests/issue_96_tests.rs new file mode 100644 index 00000000..4e015420 --- /dev/null +++ b/lexical-parse-float/tests/issue_96_tests.rs @@ -0,0 +1,366 @@ +#![cfg(feature = "format")] + +use core::num; + +use lexical_parse_float::{ + Error, + FromLexical, + FromLexicalWithOptions, + NumberFormatBuilder, + Options, +}; + +#[test] +fn issue_96_test() { + let opts = Options::new(); + const NO_CONSECUTIVE: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + const CONSECUTIVE: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(true) + .build(); + const NO_LEADING: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(false) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(true) + .build(); + + let result = f64::from_lexical(b"_-1234"); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = f64::from_lexical_with_options::(b"_-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(1))); + + let result = f64::from_lexical_with_options::(b"^-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + // NOTE: This uis correct, since it's "trailing" + let result = f64::from_lexical_with_options::(b"_-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(1))); + + let result = f64::from_lexical_with_options::(b"_1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = f64::from_lexical_with_options::(b"X1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = f64::from_lexical_with_options::(b"__1__234__", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = f64::from_lexical_with_options::(b"__1__234__", &opts); + assert_eq!(result, Ok(1234f64)); +} + +#[test] +fn issue_96_i_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .internal_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(0))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"_1_", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(0))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(0))); + + let result = f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((11f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_23", &opts); + assert_eq!(result, Ok((1123f64, 6))); + + let result = f64::from_lexical_partial_with_options::(b"1_1__23", &opts); + assert_eq!(result, Ok((11f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_23_", &opts); + assert_eq!(result, Ok((1123f64, 6))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_23.", &opts); + assert_eq!(result, Ok((1123f64, 7))); +} + +#[test] +fn issue_96_l_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((1f64, 2))); + + let result = f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((1f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); +} + +#[test] +fn issue_96_t_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(0))); + + let result = f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123f64, 5))); + + let result = f64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123f64, 4))); +} + +#[test] +fn issue_96_il_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .internal_digit_separator(true) + .leading_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((123f64, 5))); + + let result = f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((123f64, 6))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((11f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((11f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123f64, 4))); + + let result = f64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123f64, 4))); +} + +#[test] +fn issue_96_it_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(0))); + + let result: Result<(f64, usize), Error> = + f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((11f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((11f64, 4))); + + let result = f64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123f64, 5))); + + let result = f64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123f64, 4))); +} + +#[test] +fn issue_96_lt_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((1f64, 2))); + + let result = f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((1f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"_11_", &opts); + assert_eq!(result, Ok((11f64, 4))); + + let result = f64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::EmptyMantissa(1))); + + let result = f64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123f64, 5))); + + let result = f64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123f64, 4))); +} + +#[test] +fn issue_96_no_required_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .required_digits(false) + .build(); + + let result = f64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Ok((0f64, 0))); + + let result = f64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Ok((0f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Ok((0f64, 2))); + + let result = f64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((1f64, 2))); + + let result = f64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((1f64, 3))); + + let result = f64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((1f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"_11_", &opts); + assert_eq!(result, Ok((11f64, 4))); + + let result = f64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Ok((0f64, 1))); + + let result = f64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123f64, 5))); + + let result = f64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123f64, 4))); +} diff --git a/lexical-parse-float/tests/parse_tests.rs b/lexical-parse-float/tests/parse_tests.rs index d1946e28..2bbd8659 100644 --- a/lexical-parse-float/tests/parse_tests.rs +++ b/lexical-parse-float/tests/parse_tests.rs @@ -78,7 +78,7 @@ fn parse_number_test() { let options = Options::new(); let string = b"1.2345e10"; let byte = string.bytes::<{ FORMAT }>(); - let result = parse::parse_number(byte, false, &options); + let result = parse::parse_complete_number(byte, false, &options); assert!(result.is_ok()); let num = result.unwrap(); assert_eq!(num.mantissa, 12345); @@ -87,12 +87,12 @@ fn parse_number_test() { let string = b"1.2345e"; let byte = string.bytes::<{ FORMAT }>(); - let result = parse::parse_number(byte, false, &options); + let result = parse::parse_complete_number(byte, false, &options); assert!(result.is_err()); let string = b"1.2345 "; let byte = string.bytes::<{ FORMAT }>(); - let result = parse::parse_number(byte, false, &options); + let result = parse::parse_complete_number(byte, false, &options); assert!(result.is_err()); } diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs index 255484fd..a5b081de 100644 --- a/lexical-parse-integer/src/algorithm.rs +++ b/lexical-parse-integer/src/algorithm.rs @@ -49,23 +49,48 @@ const fn can_try_parse_multidigits<'a, Iter: DigitsIter<'a>, const FORMAT: u128> Iter::IS_CONTIGUOUS && (cfg!(not(feature = "power-of-two")) || format.mantissa_radix() <= 10) } +// Get if digits are required for the format. +#[cfg_attr(not(feature = "format"), allow(unused_macros))] +macro_rules! required_digits { + () => { + NumberFormat::::REQUIRED_INTEGER_DIGITS + || NumberFormat::::REQUIRED_MANTISSA_DIGITS + }; +} + /// Return an value for a complete parser. macro_rules! into_ok_complete { - ($value:expr, $index:expr) => { - Ok(as_cast($value)) - }; + ($value:expr, $index:expr, $count:expr) => {{ + #[cfg(not(feature = "format"))] + return Ok(as_cast($value)); + + #[cfg(feature = "format")] + if required_digits!() && $count == 0 { + into_error!(Empty, $index); + } else { + return Ok(as_cast($value)); + } + }}; } /// Return an value and index for a partial parser. macro_rules! into_ok_partial { - ($value:expr, $index:expr) => { - Ok((as_cast($value), $index)) - }; + ($value:expr, $index:expr, $count:expr) => {{ + #[cfg(not(feature = "format"))] + return Ok((as_cast($value), $index)); + + #[cfg(feature = "format")] + if required_digits!() && $count == 0 { + into_error!(Empty, $index); + } else { + return Ok((as_cast($value), $index)); + } + }}; } /// Return an error for a complete parser upon an invalid digit. macro_rules! invalid_digit_complete { - ($value:expr, $index:expr) => { + ($value:expr, $index:expr, $count:expr) => { // Don't do any overflow checking here: we don't need it. into_error!(InvalidDigit, $index - 1) }; @@ -74,17 +99,17 @@ macro_rules! invalid_digit_complete { /// Return a value for a partial parser upon an invalid digit. /// This checks for numeric overflow, and returns the appropriate error. macro_rules! invalid_digit_partial { - ($value:expr, $index:expr) => { + ($value:expr, $index:expr, $count:expr) => { // NOTE: The value is already positive/negative - into_ok_partial!($value, $index - 1) + into_ok_partial!($value, $index - 1, $count) }; } /// Return an error, returning the index and the error. macro_rules! into_error { - ($code:ident, $index:expr) => { - Err(Error::$code($index)) - }; + ($code:ident, $index:expr) => {{ + return Err(Error::$code($index)); + }}; } /// Handle an invalid digit if the format feature is enabled. @@ -115,7 +140,8 @@ macro_rules! fmt_invalid_digit { }; // NOTE: If we're using the `take_n` optimization where it can't // be the end, then the iterator cannot be done. So, in that case, - // we need to end. + // we need to end. `take_n` also can never be used for non- + // contiguous iterators. if is_suffix && $is_end && $iter.is_buffer_empty() { // Break out of the loop, we've finished parsing. break; @@ -131,7 +157,7 @@ macro_rules! fmt_invalid_digit { } } // Might have handled our base-prefix here. - return $invalid_digit!($value, $iter.cursor()); + $invalid_digit!($value, $iter.cursor(), $iter.current_count()) }}; } @@ -141,7 +167,7 @@ macro_rules! fmt_invalid_digit { ( $value:ident, $iter:ident, $c:expr, $start_index:ident, $invalid_digit:ident, $is_end:expr ) => {{ - return $invalid_digit!($value, $iter.cursor()); + $invalid_digit!($value, $iter.cursor(), $iter.current_count()); }}; } @@ -169,7 +195,7 @@ macro_rules! parse_sign { $missing:ident ) => { // NOTE: read_if optimizes poorly since we then match after - match $byte.integer_iter().peek() { + match $byte.integer_iter().first() { Some(&b'+') if !$no_positive => { // SAFETY: We have at least 1 item left since we peaked a value unsafe { $byte.step_unchecked() }; @@ -429,7 +455,7 @@ macro_rules! parse_1digit_checked { $value = match $value.checked_mul(as_cast(radix)).and_then(|x| x.$add_op(as_cast(digit))) { Some(value) => value, - None => return into_error!($overflow, $iter.cursor() - 1), + None => into_error!($overflow, $iter.cursor() - 1), } } }}; @@ -580,30 +606,44 @@ macro_rules! algorithm { let is_negative = parse_sign::(&mut byte)?; let mut iter = byte.integer_iter(); if iter.is_buffer_empty() { - return into_error!(Empty, iter.cursor()); + // Our default format **ALWAYS** requires significant digits, however, + // we can have cases where we don + #[cfg(not(feature = "format"))] + into_error!(Empty, iter.cursor()); + + #[cfg(feature = "format")] + if required_digits!() { + into_error!(Empty, iter.cursor()); + } else { + $into_ok!(T::ZERO, iter.cursor(), 0) + } } // Feature-gate a lot of format-only code here to simplify analysis with our branching + // We only want to skip the zeros if have either require a base prefix or we don't + // allow integer leading zeros, since the skip is expensive #[allow(unused_variables, unused_mut)] let mut start_index = iter.cursor(); + #[cfg_attr(not(feature = "format"), allow(unused_variables))] + let format = NumberFormat:: {}; #[cfg(feature = "format")] - { + if format.has_base_prefix() || format.no_integer_leading_zeros() { // Skip any leading zeros. We want to do our check if it can't possibly overflow after. // For skipping digit-based formats, this approximation is a way over estimate. // NOTE: Skipping zeros is **EXPENSIVE* so we skip that without our format feature let zeros = iter.skip_zeros(); start_index += zeros; + // Now, check to see if we have a valid base prefix. - let format = NumberFormat:: {}; - let base_prefix = format.base_prefix(); let mut is_prefix = false; + let base_prefix = format.base_prefix(); if base_prefix != 0 && zeros == 1 { // Check to see if the next character is the base prefix. // We must have a format like `0x`, `0d`, `0o`. Note: if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some() { is_prefix = true; if iter.is_buffer_empty() { - return into_error!(Empty, iter.cursor()); + into_error!(Empty, iter.cursor()); } else { start_index += 1; } @@ -617,16 +657,16 @@ macro_rules! algorithm { // Cannot have a base prefix and no leading zeros. let index = iter.cursor() - zeros; if zeros > 1 { - return into_error!(InvalidLeadingZeros, index); + into_error!(InvalidLeadingZeros, index); } // NOTE: Zeros has to be 0 here, so our index == 1 or 2 (depending on sign) match iter.peek().map(|&c| char_to_digit_const(c, format.radix())) { // Valid digit, we have an invalid value. - Some(Some(_)) => return into_error!(InvalidLeadingZeros, index), + Some(Some(_)) => into_error!(InvalidLeadingZeros, index), // Have a non-digit character that follows. - Some(None) => return $invalid_digit!(::ZERO, iter.cursor() + 1), + Some(None) => $invalid_digit!(::ZERO, iter.cursor() + 1, iter.current_count()), // No digits following, has to be ok - None => return $into_ok!(::ZERO, index), + None => $into_ok!(::ZERO, index, iter.current_count()), }; } } @@ -653,7 +693,7 @@ macro_rules! algorithm { parse_digits_checked!(value, iter, checked_add, wrapping_add, start_index, $invalid_digit, Overflow, $no_multi_digit, overflow_digits); } - $into_ok!(value, iter.buffer_length()) + $into_ok!(value, iter.buffer_length(), iter.current_count()) }}; } diff --git a/lexical-parse-integer/tests/issue_96_tests.rs b/lexical-parse-integer/tests/issue_96_tests.rs new file mode 100644 index 00000000..d6c994b0 --- /dev/null +++ b/lexical-parse-integer/tests/issue_96_tests.rs @@ -0,0 +1,371 @@ +#![cfg(feature = "format")] + +use core::num; + +use lexical_parse_integer::{ + Error, + FromLexical, + FromLexicalWithOptions, + NumberFormatBuilder, + Options, +}; + +#[test] +fn issue_96_test() { + let opts = Options::new(); + const NO_CONSECUTIVE: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + const CONSECUTIVE: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(true) + .build(); + const NO_LEADING: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(false) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(true) + .build(); + + let result = i64::from_lexical(b"_-1234"); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + // NOTE: We need to make sure we're not skipping digit separators before the + // sign, which is never allowed. + let result = u64::from_lexical_with_options::(b"_-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(1))); + + let result = i64::from_lexical_with_options::(b"_-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(1))); + + let result = i64::from_lexical_with_options::(b"^-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + // NOTE: This uis correct, since it's "trailing" + let result = i64::from_lexical_with_options::(b"_-1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(1))); + + let result = i64::from_lexical_with_options::(b"_1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = i64::from_lexical_with_options::(b"X1234", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = i64::from_lexical_with_options::(b"__1__234__", &opts); + assert_eq!(result, Err(Error::InvalidDigit(0))); + + let result = i64::from_lexical_with_options::(b"__1__234__", &opts); + assert_eq!(result, Ok(1234)); +} + +#[test] +fn issue_96_i_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .internal_digit_separator(true) + .consecutive_digit_separator(false) + .required_digits(true) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"_1_", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((11, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_23", &opts); + assert_eq!(result, Ok((1123, 6))); + + let result = i64::from_lexical_partial_with_options::(b"1_1__23", &opts); + assert_eq!(result, Ok((11, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_23_", &opts); + assert_eq!(result, Ok((1123, 6))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_23.", &opts); + assert_eq!(result, Ok((1123, 6))); +} + +#[test] +fn issue_96_l_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((1, 2))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((1, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); +} + +#[test] +fn issue_96_t_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123, 5))); + + let result = i64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123, 4))); +} + +#[test] +fn issue_96_il_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .internal_digit_separator(true) + .leading_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((123, 5))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((123, 6))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((11, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((11, 3))); + + let result = i64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123, 4))); + + let result = i64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123, 4))); +} + +#[test] +fn issue_96_it_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((11, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((11, 4))); + + let result = i64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123, 5))); + + let result = i64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123, 4))); +} + +#[test] +fn issue_96_lt_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Err(Error::Empty(0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Err(Error::Empty(2))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((1, 2))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((1, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"_11_", &opts); + assert_eq!(result, Ok((11, 4))); + + let result = i64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Err(Error::Empty(1))); + + let result = i64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123, 5))); + + let result = i64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123, 4))); +} + +#[test] +fn issue_96_no_required_test() { + let opts = Options::new(); + const FMT: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(false) + .required_digits(false) + .build(); + + let result = i64::from_lexical_partial_with_options::(b"", &opts); + assert_eq!(result, Ok((0, 0))); + + let result = i64::from_lexical_partial_with_options::(b"_", &opts); + assert_eq!(result, Ok((0, 1))); + + let result = i64::from_lexical_partial_with_options::(b"+_", &opts); + assert_eq!(result, Ok((0, 2))); + + let result = i64::from_lexical_partial_with_options::(b"_1_23", &opts); + assert_eq!(result, Ok((1, 2))); + + let result = i64::from_lexical_partial_with_options::(b"+_1_23", &opts); + assert_eq!(result, Ok((1, 3))); + + let result = i64::from_lexical_partial_with_options::(b"1__1_23", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"1_1_", &opts); + assert_eq!(result, Ok((1, 1))); + + let result = i64::from_lexical_partial_with_options::(b"_11_", &opts); + assert_eq!(result, Ok((11, 4))); + + let result = i64::from_lexical_partial_with_options::(b"_+1_23", &opts); + assert_eq!(result, Ok((0, 1))); + + let result = i64::from_lexical_partial_with_options::(b"+123_", &opts); + assert_eq!(result, Ok((123, 5))); + + let result = i64::from_lexical_partial_with_options::(b"+123__", &opts); + assert_eq!(result, Ok((123, 4))); +} diff --git a/lexical-util/src/feature_format.rs b/lexical-util/src/feature_format.rs index 350eac50..9f8fe190 100644 --- a/lexical-util/src/feature_format.rs +++ b/lexical-util/src/feature_format.rs @@ -947,108 +947,188 @@ impl NumberFormat { // DIGIT SEPARATOR FLAGS & MASKS // If digit separators are allowed between integer digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const INTEGER_INTERNAL_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, INTEGER_INTERNAL_DIGIT_SEPARATOR); /// Get if digit separators are allowed between integer digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn integer_internal_digit_separator(&self) -> bool { Self::INTEGER_INTERNAL_DIGIT_SEPARATOR } /// If digit separators are allowed between fraction digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const FRACTION_INTERNAL_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, FRACTION_INTERNAL_DIGIT_SEPARATOR); /// Get if digit separators are allowed between fraction digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn fraction_internal_digit_separator(&self) -> bool { Self::FRACTION_INTERNAL_DIGIT_SEPARATOR } /// If digit separators are allowed between exponent digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const EXPONENT_INTERNAL_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, EXPONENT_INTERNAL_DIGIT_SEPARATOR); /// Get if digit separators are allowed between exponent digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn exponent_internal_digit_separator(&self) -> bool { Self::EXPONENT_INTERNAL_DIGIT_SEPARATOR } /// If digit separators are allowed between digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const INTERNAL_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, INTERNAL_DIGIT_SEPARATOR); /// Get if digit separators are allowed between digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn internal_digit_separator(&self) -> bool { Self::INTERNAL_DIGIT_SEPARATOR } /// If a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const INTEGER_LEADING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, INTEGER_LEADING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn integer_leading_digit_separator(&self) -> bool { Self::INTEGER_LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const FRACTION_LEADING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, FRACTION_LEADING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed before any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn fraction_leading_digit_separator(&self) -> bool { Self::FRACTION_LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed before any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const EXPONENT_LEADING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, EXPONENT_LEADING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed before any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn exponent_leading_digit_separator(&self) -> bool { Self::EXPONENT_LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed before any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const LEADING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, LEADING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed before any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn leading_digit_separator(&self) -> bool { Self::LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const INTEGER_TRAILING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, INTEGER_TRAILING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed after any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn integer_trailing_digit_separator(&self) -> bool { Self::INTEGER_TRAILING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const FRACTION_TRAILING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, FRACTION_TRAILING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed after any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn fraction_trailing_digit_separator(&self) -> bool { Self::FRACTION_TRAILING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const EXPONENT_TRAILING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, EXPONENT_TRAILING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed after any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn exponent_trailing_digit_separator(&self) -> bool { Self::EXPONENT_TRAILING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const TRAILING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, TRAILING_DIGIT_SEPARATOR); /// Get if a digit separator is allowed after any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn trailing_digit_separator(&self) -> bool { Self::TRAILING_DIGIT_SEPARATOR @@ -1126,6 +1206,12 @@ impl NumberFormat { Self::BASE_PREFIX } + /// Get if the format has a base suffix. + #[inline(always)] + pub const fn has_base_prefix(&self) -> bool { + self.base_prefix() != 0 + } + /// The base suffix character in the packed struct. pub const BASE_SUFFIX: u8 = flags::base_suffix(FORMAT); @@ -1140,6 +1226,12 @@ impl NumberFormat { Self::BASE_SUFFIX } + /// Get if the format has a base suffix. + #[inline(always)] + pub const fn has_base_suffix(&self) -> bool { + self.base_suffix() != 0 + } + // RADIX /// The radix for the significant digits in the packed struct. diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index 706eace0..7ea5c09a 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -464,54 +464,84 @@ impl NumberFormatBuilder { } /// Get if digit separators are allowed between integer digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn get_integer_internal_digit_separator(&self) -> bool { self.integer_internal_digit_separator } /// Get if digit separators are allowed between fraction digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn get_fraction_internal_digit_separator(&self) -> bool { self.fraction_internal_digit_separator } /// Get if digit separators are allowed between exponent digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn get_exponent_internal_digit_separator(&self) -> bool { self.exponent_internal_digit_separator } /// Get if a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn get_integer_leading_digit_separator(&self) -> bool { self.integer_leading_digit_separator } /// Get if a digit separator is allowed before any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn get_fraction_leading_digit_separator(&self) -> bool { self.fraction_leading_digit_separator } /// Get if a digit separator is allowed before any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn get_exponent_leading_digit_separator(&self) -> bool { self.exponent_leading_digit_separator } /// Get if a digit separator is allowed after any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn get_integer_trailing_digit_separator(&self) -> bool { self.integer_trailing_digit_separator } /// Get if a digit separator is allowed after any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn get_fraction_trailing_digit_separator(&self) -> bool { self.fraction_trailing_digit_separator } /// Get if a digit separator is allowed after any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn get_exponent_trailing_digit_separator(&self) -> bool { self.exponent_trailing_digit_separator @@ -754,6 +784,10 @@ impl NumberFormatBuilder { } /// Set if digit separators are allowed between integer digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] #[cfg(feature = "format")] pub const fn integer_internal_digit_separator(mut self, flag: bool) -> Self { @@ -762,6 +796,10 @@ impl NumberFormatBuilder { } /// Set if digit separators are allowed between fraction digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] #[cfg(feature = "format")] pub const fn fraction_internal_digit_separator(mut self, flag: bool) -> Self { @@ -770,6 +808,10 @@ impl NumberFormatBuilder { } /// Set if digit separators are allowed between exponent digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] #[cfg(feature = "format")] pub const fn exponent_internal_digit_separator(mut self, flag: bool) -> Self { @@ -778,6 +820,10 @@ impl NumberFormatBuilder { } /// Set all internal digit separator flags. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] #[cfg(feature = "format")] pub const fn internal_digit_separator(mut self, flag: bool) -> Self { @@ -788,6 +834,9 @@ impl NumberFormatBuilder { } /// Set if a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn integer_leading_digit_separator(mut self, flag: bool) -> Self { @@ -796,6 +845,9 @@ impl NumberFormatBuilder { } /// Set if a digit separator is allowed before any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn fraction_leading_digit_separator(mut self, flag: bool) -> Self { @@ -804,6 +856,9 @@ impl NumberFormatBuilder { } /// Set if a digit separator is allowed before any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn exponent_leading_digit_separator(mut self, flag: bool) -> Self { @@ -812,6 +867,9 @@ impl NumberFormatBuilder { } /// Set all leading digit separator flags. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn leading_digit_separator(mut self, flag: bool) -> Self { @@ -822,6 +880,9 @@ impl NumberFormatBuilder { } /// Set if a digit separator is allowed after any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn integer_trailing_digit_separator(mut self, flag: bool) -> Self { @@ -830,6 +891,9 @@ impl NumberFormatBuilder { } /// Set if a digit separator is allowed after any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn fraction_trailing_digit_separator(mut self, flag: bool) -> Self { @@ -838,6 +902,9 @@ impl NumberFormatBuilder { } /// Set if a digit separator is allowed after any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn exponent_trailing_digit_separator(mut self, flag: bool) -> Self { @@ -846,6 +913,9 @@ impl NumberFormatBuilder { } /// Set all trailing digit separator flags. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] #[cfg(feature = "format")] pub const fn trailing_digit_separator(mut self, flag: bool) -> Self { diff --git a/lexical-util/src/iterator.rs b/lexical-util/src/iterator.rs index 7f4cb98e..2f00e013 100644 --- a/lexical-util/src/iterator.rs +++ b/lexical-util/src/iterator.rs @@ -98,10 +98,14 @@ pub unsafe trait Iter<'a> { /// pass if the cursor was set between the two. unsafe fn set_cursor(&mut self, index: usize); - /// Get the current number of values returned by the iterator. + /// Get the current number of digits returned by the iterator. /// - /// For contiguous iterators, this is always the cursor, for - /// non-contiguous iterators this can be smaller. + /// For contiguous iterators, this can include the sign character, decimal + /// point, and the exponent sign (that is, it is always the cursor). For + /// non-contiguous iterators, this must always be the only the number of + /// digits returned. + /// + /// This is never used for indexing but will be used for API detection. fn current_count(&self) -> usize; // PROPERTIES @@ -140,7 +144,7 @@ pub unsafe trait Iter<'a> { /// Check if the next item in buffer is a given value with optional case /// sensitivity. #[inline(always)] - fn first_is(&mut self, value: u8, is_cased: bool) -> bool { + fn first_is(&self, value: u8, is_cased: bool) -> bool { if is_cased { self.first_is_cased(value) } else { @@ -158,6 +162,14 @@ pub unsafe trait Iter<'a> { /// underlying buffer. This is useful for multi-digit optimizations /// for contiguous iterators. /// + /// This does not increment the count of items: returns: this only + /// increments the index, not the total digits returned. You must use + /// this carefully: if stepping over a digit, you must then call + /// [`increment_count`] afterwards or else the internal count will + /// be incorrect. + /// + /// [`increment_count`]: DigitsIter::increment_count + /// /// # Panics /// /// This will panic if the buffer advances for non-contiguous @@ -172,6 +184,15 @@ pub unsafe trait Iter<'a> { /// Advance the internal slice by 1 element. /// + /// + /// This does not increment the count of items: returns: this only + /// increments the index, not the total digits returned. You must + /// use this carefully: if stepping over a digit, you must then call + /// [`increment_count`] afterwards or else the internal count will + /// be incorrect. + /// + /// [`increment_count`]: DigitsIter::increment_count + /// /// # Panics /// /// This will panic if the buffer advances for non-contiguous @@ -277,10 +298,18 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { self.peek().is_none() } + /// Increment the number of digits that have been returned by the iterator. + /// + /// For contiguous iterators, this is a no-op. For non-contiguous iterators, + /// this increments the count by 1. + fn increment_count(&mut self); + /// Peek the next value of the iterator, without consuming it. /// /// Note that this can modify the internal state, by skipping digits - /// for iterators that find the first non-zero value, etc. + /// for iterators that find the first non-zero value, etc. We optimize + /// this for the case where we have contiguous iterators, since + /// non-contiguous iterators already have a major performance penalty. fn peek(&mut self) -> Option; /// Peek the next value of the iterator, and step only if it exists. diff --git a/lexical-util/src/noskip.rs b/lexical-util/src/noskip.rs index e8209a77..b58937e9 100644 --- a/lexical-util/src/noskip.rs +++ b/lexical-util/src/noskip.rs @@ -125,17 +125,17 @@ unsafe impl<'a, const __: u128> Iter<'a> for Bytes<'a, __> { self.index = index; } - /// Get the current number of values returned by the iterator. + /// Get the current number of digits returned by the iterator. + /// + /// For contiguous iterators, this can include the sign character, decimal + /// point, and the exponent sign (that is, it is always the cursor). For + /// non-contiguous iterators, this must always be the only the number of + /// digits returned. #[inline(always)] fn current_count(&self) -> usize { self.index } - #[inline(always)] - fn first(&self) -> Option<&'a u8> { - self.slc.get(self.index) - } - #[inline(always)] #[allow(clippy::assertions_on_constants)] // reason="ensuring safety invariants are valid" unsafe fn step_by_unchecked(&mut self, count: usize) { @@ -243,6 +243,11 @@ impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for DigitsIterator<'a, 'b, F self.is_buffer_empty() } + // Always a no-op + #[inline(always)] + fn increment_count(&mut self) { + } + #[inline(always)] fn peek(&mut self) -> Option<::Item> { self.byte.slc.get(self.byte.index) diff --git a/lexical-util/src/not_feature_format.rs b/lexical-util/src/not_feature_format.rs index 84855a28..23f9a2f6 100644 --- a/lexical-util/src/not_feature_format.rs +++ b/lexical-util/src/not_feature_format.rs @@ -274,108 +274,188 @@ impl NumberFormat { // DIGIT SEPARATOR FLAGS & MASKS // If digit separators are allowed between integer digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const INTEGER_INTERNAL_DIGIT_SEPARATOR: bool = false; /// Get if digit separators are allowed between integer digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn integer_internal_digit_separator(&self) -> bool { Self::INTEGER_INTERNAL_DIGIT_SEPARATOR } /// If digit separators are allowed between fraction digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const FRACTION_INTERNAL_DIGIT_SEPARATOR: bool = false; /// Get if digit separators are allowed between fraction digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn fraction_internal_digit_separator(&self) -> bool { Self::FRACTION_INTERNAL_DIGIT_SEPARATOR } /// If digit separators are allowed between exponent digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const EXPONENT_INTERNAL_DIGIT_SEPARATOR: bool = false; /// Get if digit separators are allowed between exponent digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn exponent_internal_digit_separator(&self) -> bool { Self::EXPONENT_INTERNAL_DIGIT_SEPARATOR } /// If digit separators are allowed between digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. pub const INTERNAL_DIGIT_SEPARATOR: bool = false; /// Get if digit separators are allowed between digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. #[inline(always)] pub const fn internal_digit_separator(&self) -> bool { Self::INTERNAL_DIGIT_SEPARATOR } /// If a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const INTEGER_LEADING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn integer_leading_digit_separator(&self) -> bool { Self::INTEGER_LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed before any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const FRACTION_LEADING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed before any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn fraction_leading_digit_separator(&self) -> bool { Self::FRACTION_LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed before any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const EXPONENT_LEADING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed before any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn exponent_leading_digit_separator(&self) -> bool { Self::EXPONENT_LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed before any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const LEADING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed before any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn leading_digit_separator(&self) -> bool { Self::LEADING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const INTEGER_TRAILING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed after any integer digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn integer_trailing_digit_separator(&self) -> bool { Self::INTEGER_TRAILING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const FRACTION_TRAILING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed after any fraction digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn fraction_trailing_digit_separator(&self) -> bool { Self::FRACTION_TRAILING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const EXPONENT_TRAILING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed after any exponent digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn exponent_trailing_digit_separator(&self) -> bool { Self::EXPONENT_TRAILING_DIGIT_SEPARATOR } /// If a digit separator is allowed after any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. pub const TRAILING_DIGIT_SEPARATOR: bool = false; /// Get if a digit separator is allowed after any digits. + /// + /// This will consider an input of only the digit separator + /// to be a identical to empty input. #[inline(always)] pub const fn trailing_digit_separator(&self) -> bool { Self::TRAILING_DIGIT_SEPARATOR @@ -453,6 +533,12 @@ impl NumberFormat { Self::BASE_PREFIX } + /// Get if the format has a base prefix. + #[inline(always)] + pub const fn has_base_prefix(&self) -> bool { + false + } + /// The base suffix character in the packed struct. pub const BASE_SUFFIX: u8 = 0; @@ -467,6 +553,12 @@ impl NumberFormat { Self::BASE_SUFFIX } + /// Get if the format has a base suffix. + #[inline(always)] + pub const fn has_base_suffix(&self) -> bool { + false + } + // RADIX /// The radix for the significant digits in the packed struct. diff --git a/lexical-util/src/skip.rs b/lexical-util/src/skip.rs index 17e8b8e0..987d1f2e 100644 --- a/lexical-util/src/skip.rs +++ b/lexical-util/src/skip.rs @@ -48,117 +48,806 @@ use crate::format::NumberFormat; use crate::format_flags as flags; use crate::iterator::{DigitsIter, Iter}; -// PEEK -// ---- +// IS_ILTC +// ------- + +// NOTE: The compiler optimizes all these methods pretty well: it's as +// efficient or almost as efficient as optimized assembly without unsafe +// code, especially since we have to do bounds checking +// before and the compiler can determine all cases correctly. + +/// Helpers to get the next or previous elements for checks. +/// +/// This has the non-consecutive iterator variants as well +/// as the consecutive ones. The consecutive ones will iteratively +/// process all digits. +macro_rules! indexing { + (@next $self:ident, $index:expr) => { + $index.wrapping_add(1) + }; + + (@nextc $self:ident, $index:expr) => {{ + let mut index = $index; + let slc = $self.byte.slc; + while slc.get(index.wrapping_add(1)).map_or(false, |&x| $self.is_digit_separator(x)) { + index = index.wrapping_add(1); + } + index.wrapping_add(1) + }}; + + (@prev $self:ident, $index:expr) => { + $index.wrapping_sub(1) + }; + + (@prevc $self:ident, $index:expr) => {{ + let mut index = $index; + let slc = $self.byte.slc; + while slc.get(index.wrapping_sub(1)).map_or(false, |&x| $self.is_digit_separator(x)) { + index = index.wrapping_sub(1); + } + index.wrapping_sub(1) + }}; +} -/// Determine if the digit separator is internal. +/// Determine if a single digit separator is internal. /// -/// Preconditions: Assumes `slc[index]` is a digit separator. -/// The compiler optimizes this pretty well: it's almost as efficient as -/// optimized assembly without bounds checking. +/// # Examples +/// +/// - `1__1_23`- invalid +/// - `1_1__23`- invalid +/// - `1_1_23`- valid +/// - `11_x23`- invalid +/// - `_1123`- invalid +/// - `+_1123`- invalid +/// - `_+1123`- invalid +/// - `1123_`- invalid +/// - `1123_.`- invalid +/// - `112_3.`- valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. macro_rules! is_i { - ($self:ident) => { - !is_l!($self) && !is_t!($self) + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index - 1` is a digit + // - `index + 1` is a digit + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(prev).map_or(false, |&x| $self.is_digit(x)) && + slc.get(next).map_or(false, |&x| $self.is_digit(x)) + }}; + + (@first $self:ident) => { + is_i!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is a digit + + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(false, |&x| $self.is_digit(x)) + }}; + + (@internal $self:ident) => { + is_i!(@internal $self, $self.byte.index) + }; +} + +/// Determine if consecutive digit separators are internal. +/// +/// # Examples +/// +/// - `1__1_23`- valid +/// - `1_1__23`- valid +/// - `1_1_23`- valid +/// - `11_x23`- invalid +/// - `_1123`- invalid +/// - `+_1123`- invalid +/// - `_+1123`- invalid +/// - `1123_`- invalid +/// - `1123_.`- invalid +/// - `112_3.`- valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_ic { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index - 1` is a digit after consuming digit separators + // - `index + 1` is a digit after consuming digit separators + + let prev = indexing!(@prevc $self, $index); + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(prev).map_or(false, |&x| $self.is_digit(x)) && + slc.get(next).map_or(false, |&x| $self.is_digit(x)) + }}; + + (@first $self:ident) => { + is_ic!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is a digit after consuming digit separators + + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(false, |&x| $self.is_digit(x)) + }}; + + (@internal $self:ident) => { + is_ic!(@internal $self, $self.byte.index) }; } -/// Determine if the digit separator is leading. +/// Determine if a single digit separator is leading. +/// +/// # Examples +/// +/// - `__123`- invalid +/// - `+__123`- invalid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// +/// Having a subsequent sign character is fine since it might +/// be part of a partial parser. +/// +/// # Preconditions /// -/// Preconditions: Assumes `slc[index]` is a digit separator. -/// The compiler optimizes this pretty well: it's almost as efficient as -/// optimized assembly without bounds checking. +/// Assumes `slc[index]` is a digit separator. macro_rules! is_l { - ($self:ident) => {{ - // Consume any digit separators before the current one. - let mut index = $self.byte.index; - while index > 0 - && $self.byte.slc.get(index - 1).map_or(false, |&x| $self.is_digit_separator(x)) - { - index -= 1; - } + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index - 1` is not a digit + // - `index - 1` is not a digit separator + // - `index + 1` is not a digit separator + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(prev).map_or(true, |&x| !$self.is_digit(x) && !$self.is_digit_separator(x)) && + slc.get(next).map_or(true, |&x| !$self.is_digit_separator(x)) + }}; + + (@first $self:ident) => { + is_l!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: Previous must have been a digit so this cannot be valid. + false + }}; + + (@internal $self:ident) => { + is_l!(@internal $self, $self.byte.index) + }; +} + +/// Determine if one or more digit separators are leading. +/// +/// # Examples +/// +/// - `__123`- valid +/// - `+__123`- valid +/// - `+__+123`- valid +/// - `+__.123`- valid +/// - `._123`- valid +/// - `_+123`- invalid +/// - `_123`- valid +/// - `+_123`- valid +/// +/// Having a subsequent sign character is fine since it might +/// be part of a partial parser. +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_lc { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index - 1` is not a digit after removing digit separators + + let prev = indexing!(@prevc $self, $index); + let slc = $self.byte.slc; + slc.get(prev).map_or(true, |&x| !$self.is_digit(x)) + }}; + + (@first $self:ident) => { + is_lc!(@first $self, $self.byte.index) + }; - // True if there are no items before the digit separator, or character - // before the digit separators is not a digit. - index == 0 || !$self.byte.slc.get(index - 1).map_or(false, |&x| $self.is_digit(x)) + (@internal $self:ident, $index:expr) => {{ + // NOTE: Previous must have been a digit so this cannot be valid. + false }}; + + (@internal $self:ident) => { + is_lc!(@internal $self, $self.byte.index) + }; } -/// Determine if the digit separator is trailing. +/// Determine if a single digit separator is trailing. +/// +/// # Examples +/// +/// - `123_`- valid +/// - `123__`- invalid +/// - `123_.`- valid +/// - `123__.`- invalid +/// - `123_1`- invalid +/// - `123__1`- invalid +/// - _: valid +/// - _+: valid +/// - 1_+: valid /// -/// Preconditions: Assumes `slc[index]` is a digit separator. -/// The compiler optimizes this pretty well: it's almost as efficient as -/// optimized assembly without bounds checking. +/// Having a subsequent sign character is fine since it might +/// be part of a partial parser. +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. macro_rules! is_t { - ($self:ident) => {{ - // Consume any digit separators after the current one. - let mut index = $self.byte.index; - while index < $self.byte.slc.len() - && $self.byte.slc.get(index + 1).map_or(false, |&x| $self.is_digit_separator(x)) - { - index += 1; - } + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit + // - `index + 1` is not a digit separator + // - `index - 1` is not a digit separator + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit(x) && !$self.is_digit_separator(x)) && + slc.get(prev).map_or(true, |&x| !$self.is_digit_separator(x)) + }}; - index == $self.byte.slc.len() - || !$self.byte.slc.get(index + 1).map_or(false, |&x| $self.is_digit(x)) + (@first $self:ident) => { + is_t!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit + // - `index + 1` is not a digit separator + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit(x) && !$self.is_digit_separator(x)) }}; + + (@internal $self:ident) => { + is_t!(@internal $self, $self.byte.index) + }; +} + +/// Determine if one or more digit separators are trailing. +/// +/// # Examples +/// +/// - `123_`- valid +/// - `123__`- valid +/// - `123_.`- valid +/// - `123__.`- valid +/// - `123_1`- invalid +/// - `123__1`- invalid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_tc { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit + + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit(x)) + }}; + + (@first $self:ident) => { + is_tc!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => { + // NOTE: This is already optimized for the first case. + is_tc!(@first $self, $index) + }; + + (@internal $self:ident) => { + is_tc!(@internal $self, $self.byte.index) + }; } /// Determine if the digit separator is leading or internal. /// -/// Preconditions: Assumes `slc[index]` is a digit separator. +/// # Examples +/// +/// - `__123`- invalid +/// - `+__123`- invalid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- valid +/// - `+1__23`- invalid +/// - `+123_`- invalid +/// - `+123__`- invalid +/// - _: valid +/// - _+: valid +/// - 1_+: invalid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. macro_rules! is_il { - ($self:ident) => { - is_l!($self) || !is_t!($self) + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index + 1` is a digit + // - `index + 1` is not a digit separator + // - `index - 1` is not a digit separator + // + // # Logic + // + // If the previous character is a digit, then the + // next character must be a digit. If the previous + // character is not a digit, then the subsequent character can + // be anything besides a digit separator + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + + if slc.get(prev).map_or(false, |&x| $self.is_digit(x)) { + slc.get(next).map_or(false, |&x| $self.is_digit(x)) + } else { + slc.get(prev).map_or(true, |&x| !$self.is_digit_separator(x)) + } + }}; + + (@first $self:ident) => { + is_il!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is a digit + + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(false, |&x| $self.is_digit(x)) + }}; + + (@internal $self:ident) => { + is_il!(@internal $self, $self.byte.index) + }; +} + +/// Determine if consecutive digit separators are leading or internal. +/// +/// # Examples +/// +/// - `__123`- valid +/// - `+__123`- valid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- valid +/// - `+1__23`- valid +/// - `+123_`- invalid +/// - `+123__`- invalid +/// - _: valid +/// - _+: valid +/// - 1_+: invalid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_ilc { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index + 1` is a digit after consuming digit separators + // + // # Logic + // + // We also need to consider the case where it's empty, + // that is, the previous one wasn't a digit if we don't + // have a digit. + + let prev = indexing!(@prevc $self, $index); + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(false, |&x| $self.is_digit(x)) || + slc.get(prev).map_or(true, |&x| !$self.is_digit(x)) + }}; + + (@first $self:ident) => { + is_ilc!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index + 1` is a digit after consuming digit separators + + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| $self.is_digit(x)) + }}; + + (@internal $self:ident) => { + is_ilc!(@internal $self, $self.byte.index) }; } /// Determine if the digit separator is internal or trailing. /// -/// Preconditions: Assumes `slc[index]` is a digit separator. +/// # Examples +/// +/// - `__123`- valid +/// - `+__123`- valid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- valid +/// - `+1__23`- valid +/// - `+123_`- invalid +/// - `+123__`- invalid +/// - _: valid +/// - _+: valid +/// - 1_+: invalid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. macro_rules! is_it { - ($self:ident) => { - is_t!($self) || !is_l!($self) + (@first$self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index - 1` is a digit + // - `index - 1` is not a digit separator + // - `index + 1` is not a digit separator + // + // # Logic + // + // If the previous character is not a digit, there cannot + // be a digit for a following character. If the previous + // character is a digit, then the following one must be + // a digit as well. + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + if slc.get(prev).map_or(false, |&x| $self.is_digit(x)) { + // Have a digit, any character besides a digit separator is valid + slc.get(next).map_or(true, |&x| !$self.is_digit_separator(x)) + } else { + // Not a digit, so we cannot have a digit or a digit separator + slc.get(next).map_or(true, |&x| !$self.is_digit(x) && !$self.is_digit_separator(x)) + } + }}; + + (@first$self:ident) => { + is_it!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit separator + // Since we've previously had a digit, this is guaranteed to + // be internal or trailing. + + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit_separator(x)) + }}; + + (@internal $self:ident) => { + is_it!(@internal $self, $self.byte.index) + }; +} + +/// Determine if consecutive digit separators are internal or trailing. +/// +/// # Examples +/// +/// - `__123`- invalid +/// - `+__123`- invalid +/// - `._123`- invalid +/// - `_+123`- invalid +/// - `_123`- invalid +/// - `+_123`- invalid +/// - `+1_23`- valid +/// - `+1__23`- valid +/// - `+123_`- valid +/// - `+123__`- valid +/// - _: valid +/// - _+: valid +/// - 1_+: valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_itc { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index - 1` is not a digit after consuming digit separators + // + // # Logic + // + // We also need to consider the case where it's empty, + // that is, the previous one wasn't a digit if we don't + // have a digit. + + let prev = indexing!(@prevc $self, $index); + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(prev).map_or(false, |&x| !$self.is_digit(x)) || + slc.get(next).map_or(true, |&x| !$self.is_digit(x)) + }}; + + (@first $self:ident) => { + is_itc!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => { + // NOTE: Previous must have been a digit so this must be valid. + true + }; + + (@internal $self:ident) => { + is_itc!(@internal $self, $self.byte.index) }; } /// Determine if the digit separator is leading or trailing. /// -/// Preconditions: Assumes `slc[index]` is a digit separator. +/// # Examples +/// +/// - `__123`- invalid +/// - `+__123`- invalid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- invalid +/// - `+1__23`- invalid +/// - `+123_`- valid +/// - `+123__`- invalid +/// - _: valid +/// - _+: valid +/// - 1_+: valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. macro_rules! is_lt { - ($self:ident) => { - is_l!($self) || is_t!($self) + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - not (`index - 1` is a digit and `index + 1` is a digit) + // - `index - 1` is not a digit separator + // - `index + 1` is not a digit separator + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + let prev_value = slc.get(prev); + let next_value = slc.get(next); + + let is_prev_sep = prev_value.map_or(false, |&x| $self.is_digit_separator(x)); + let is_prev_dig = prev_value.map_or(false, |&x| $self.is_digit(x)); + let is_next_sep = next_value.map_or(false, |&x| $self.is_digit_separator(x)); + let is_next_dig = next_value.map_or(false, |&x| $self.is_digit(x)); + + !is_prev_sep && !is_next_sep && !(is_prev_dig && is_next_dig) + }}; + + (@first $self:ident) => { + is_lt!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit + // - `index + 1` is not a digit separator + + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit(x) && !$self.is_digit_separator(x)) + }}; + + (@internal $self:ident) => { + is_lt!(@internal $self, $self.byte.index) }; } -/// Determine if the digit separator is internal, leading, or trailing. +/// Determine if consecutive digit separators are leading or trailing. +/// +/// # Examples +/// +/// - `__123`- valid +/// - `+__123`- valid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- invalid +/// - `+1__23`- invalid +/// - `+123_`- valid +/// - `+123__`- valid +/// - _: valid +/// - _+: valid +/// - 1_+: valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_ltc { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that (after consuming separators): + // - not (`index - 1` is a digit and `index + 1` is a digit) + + let prev = indexing!(@prevc $self, $index); + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + !(slc.get(prev).map_or(false, |&x| $self.is_digit(x)) && slc.get(next).map_or(false, |&x| $self.is_digit(x))) + }}; + + (@first $self:ident) => { + is_ltc!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit + + let next = indexing!(@nextc $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit(x)) + }}; + + (@internal $self:ident) => { + is_ltc!(@internal $self, $self.byte.index) + }; +} + +/// Determine if a single digit separator is internal, leading, or trailing. +/// +/// # Examples +/// +/// - `__123`- invalid +/// - `+__123`- invalid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- valid +/// - `+1__23`- invalid +/// - `+123_`- valid +/// - `+123__`- invalid +/// - _: valid +/// - _+: valid +/// - 1_+: valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. macro_rules! is_ilt { - ($self:ident) => { + (@first $self:ident, $index:expr) => {{ + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit separator + // - `index - 1` is not a digit separator + + let prev = indexing!(@prev $self, $index); + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + !slc.get(next).map_or(false, |&x| $self.is_digit_separator(x)) && + !slc.get(prev).map_or(false, |&x| $self.is_digit_separator(x)) + }}; + + (@first $self:ident) => { + is_ilt!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => {{ + // NOTE: We must have validated `prev`, so this just checks `next`. + // NOTE: The conditions here then are that: + // - `index + 1` is not a digit separator + + let next = indexing!(@next $self, $index); + let slc = $self.byte.slc; + slc.get(next).map_or(true, |&x| !$self.is_digit_separator(x)) + }}; + + (@internal $self:ident) => { + is_ilt!(@internal $self, $self.byte.index) + }; +} + +/// Determine if consecutive digit separators are internal, leading, or +/// trailing. +/// +/// This is always true. +/// +/// # Examples +/// +/// - `__123`- valid +/// - `+__123`- valid +/// - `._123`- valid +/// - `_+123`- valid +/// - `_123`- valid +/// - `+_123`- valid +/// - `+1_23`- valid +/// - `+1__23`- valid +/// - `+123_`- valid +/// - `+123__`- valid +/// - _: valid +/// - _+: valid +/// - 1_+: valid +/// +/// # Preconditions +/// +/// Assumes `slc[index]` is a digit separator. +macro_rules! is_iltc { + (@first $self:ident, $index:expr) => { true }; + + (@first $self:ident) => { + is_iltc!(@first $self, $self.byte.index) + }; + + (@internal $self:ident, $index:expr) => { + true + }; + + (@internal $self:ident) => { + is_iltc!(@internal $self, $self.byte.index) + }; } +// PEEK +// ---- + /// Consumes 1 or more digit separators. /// Peeks the next token that's not a digit separator. macro_rules! peek_1 { ($self:ident, $is_skip:ident) => {{ // This will consume a single, non-consecutive digit separators. - let mut index = $self.cursor(); + let index = $self.cursor(); let buffer = $self.get_buffer(); let value = buffer.get(index)?; let is_digit_separator = $self.is_digit_separator(*value); - if is_digit_separator && $is_skip!($self) { - // Have a skippable digit separator: keep incrementing until we find - // a non-digit separator character. Don't need any complex checks - // here, since we've already done them above. - index += 1; - while index < buffer.len() - && buffer.get(index).map_or(false, |&x| $self.is_digit_separator(x)) - { - index += 1; + // NOTE: We can do some pretty major optimizations for internal values, + // since we can check the location and don't need to check previous values. + if is_digit_separator { + // NOTE: This cannot iteratively search for the next value, + // or else the consecutive digit separator has no effect (#96). + let is_skip = if $self.current_count() == 0 { + $is_skip!(@first $self) + } else { + $is_skip!(@internal $self) + }; + if is_skip { + // SAFETY: Safe since `index < buffer.len()`, so `index + 1 <= buffer.len()`` + unsafe { $self.set_cursor(index + 1) }; + buffer.get(index + 1) + } else { + Some(value) } - // SAFETY: Safe since `index < buffer.len()`. - unsafe { $self.set_cursor(index) }; - buffer.get(index) } else { // Have 1 of 2 conditions: // 1. A non-digit separator character. @@ -177,19 +866,30 @@ macro_rules! peek_n { let buffer = $self.get_buffer(); let value = buffer.get(index)?; let is_digit_separator = $self.is_digit_separator(*value); - if is_digit_separator && $is_skip!($self) { - // Have a skippable digit separator: keep incrementing until we find - // a non-digit separator character. Don't need any complex checks - // here, since we've already done them above. - index += 1; - while index < buffer.len() - && buffer.get(index).map_or(false, |&x| $self.is_digit_separator(x)) - { + // NOTE: We can do some pretty major optimizations for internal values, + // since we can check the location and don't need to check previous values. + if is_digit_separator { + let is_skip = if $self.current_count() == 0 { + $is_skip!(@first $self) + } else { + $is_skip!(@internal $self) + }; + if is_skip { + // Have a skippable digit separator: keep incrementing until we find + // a non-digit separator character. Don't need any complex checks + // here, since we've already done them above. index += 1; + while index < buffer.len() + && buffer.get(index).map_or(false, |&x| $self.is_digit_separator(x)) + { + index += 1; + } + // SAFETY: Safe since `index <= buffer.len()`. + unsafe { $self.set_cursor(index) }; + buffer.get(index) + } else { + Some(value) } - // SAFETY: Safe since `index < buffer.len()`. - unsafe { $self.set_cursor(index) }; - buffer.get(index) } else { // Have 1 of 2 conditions: // 1. A non-digit separator character. @@ -261,21 +961,21 @@ macro_rules! peek_ilt { /// Consumes 1 or more leading digit separators and peeks the next value. macro_rules! peek_lc { ($self:ident) => { - peek_n!($self, is_l) + peek_n!($self, is_lc) }; } /// Consumes 1 or more internal digit separators and peeks the next value. macro_rules! peek_ic { ($self:ident) => { - peek_n!($self, is_i) + peek_n!($self, is_ic) }; } /// Consumes 1 or more trailing digit separators and peeks the next value. macro_rules! peek_tc { ($self:ident) => { - peek_n!($self, is_t) + peek_n!($self, is_tc) }; } @@ -283,7 +983,7 @@ macro_rules! peek_tc { /// value. macro_rules! peek_ilc { ($self:ident) => { - peek_n!($self, is_il) + peek_n!($self, is_ilc) }; } @@ -291,7 +991,7 @@ macro_rules! peek_ilc { /// value. macro_rules! peek_itc { ($self:ident) => { - peek_n!($self, is_it) + peek_n!($self, is_itc) }; } @@ -299,21 +999,15 @@ macro_rules! peek_itc { /// value. macro_rules! peek_ltc { ($self:ident) => { - peek_n!($self, is_lt) + peek_n!($self, is_ltc) }; } /// Consumes 1 or more digit separators and peeks the next value. macro_rules! peek_iltc { - ($self:ident) => {{ - loop { - let value = $self.byte.slc.get($self.byte.index)?; - if !$self.is_digit_separator(*value) { - return Some(value); - } - $self.byte.index += 1; - } - }}; + ($self:ident) => { + peek_n!($self, is_iltc) + }; } // AS DIGITS @@ -354,9 +1048,15 @@ pub struct Bytes<'a, const FORMAT: u128> { slc: &'a [u8], /// Current index of the iterator in the slice. index: usize, - /// The current count of values returned by the iterator. + /// The current count of integer digits returned by the iterator. /// This is only used if the iterator is not contiguous. - count: usize, + integer_count: usize, + /// The current count of fraction digits returned by the iterator. + /// This is only used if the iterator is not contiguous. + fraction_count: usize, + /// The current count of exponent digits returned by the iterator. + /// This is only used if the iterator is not contiguous. + exponent_count: usize, } impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { @@ -366,7 +1066,9 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { Self { slc, index: 0, - count: 0, + integer_count: 0, + fraction_count: 0, + exponent_count: 0, } } @@ -386,7 +1088,9 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { Self { slc, index, - count: 0, + integer_count: 0, + fraction_count: 0, + exponent_count: 0, } } @@ -447,11 +1151,6 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { ); } self.index += count; - if !is_contiguous { - // Only increment the count if it's not contiguous, otherwise, - // this is an unnecessary performance penalty. - self.count += count; - } } /// Internal implementation that handles if it's contiguous. @@ -501,7 +1200,12 @@ unsafe impl<'a, const FORMAT: u128> Iter<'a> for Bytes<'a, FORMAT> { self.index = index; } - /// Get the current number of values returned by the iterator. + /// Get the current number of digits returned by the iterator. + /// + /// For contiguous iterators, this can include the sign character, decimal + /// point, and the exponent sign (that is, it is always the cursor). For + /// non-contiguous iterators, this must always be the only the number of + /// digits returned. #[inline(always)] fn current_count(&self) -> usize { // If the buffer is contiguous, then we don't need to track the @@ -509,7 +1213,7 @@ unsafe impl<'a, const FORMAT: u128> Iter<'a> for Bytes<'a, FORMAT> { if Self::IS_CONTIGUOUS { self.index } else { - self.count + self.integer_count + self.fraction_count + self.exponent_count } } @@ -540,6 +1244,14 @@ macro_rules! skip_iterator { }; } +macro_rules! is_sign { + () => { + pub const fn is_sign(&self, value: u8) -> bool { + matches!(value, b'+' | b'-') + } + }; +} + macro_rules! is_digit_separator { ($format:ident) => { /// Determine if the character is a digit separator. @@ -561,9 +1273,11 @@ macro_rules! is_digit_separator { macro_rules! skip_iterator_impl { ($iterator:ident, $radix_cb:ident) => { impl<'a: 'b, 'b, const FORMAT: u128> $iterator<'a, 'b, FORMAT> { + is_sign!(); is_digit_separator!(FORMAT); /// Create a new digits iterator from the bytes underlying item. + #[inline(always)] pub fn new(byte: &'b mut Bytes<'a, FORMAT>) -> Self { Self { byte, @@ -612,10 +1326,13 @@ macro_rules! skip_iterator_iterator_impl { let value = self.peek()?; // Increment the index so we know not to re-fetch it. self.byte.index += 1; - if !Self::IS_CONTIGUOUS { - // Only increment the count if it's not contiguous, otherwise, - // this is an unnecessary performance penalty. - self.byte.count += 1; + // NOTE: Only increment the count if it's not contiguous, otherwise, + // this is an unnecessary performance penalty. We also need + // to check if it's a digit, which adds on additional cost but + // there's not much else we can do. Hopefully the previous inlining + // checks will minimize the performance hit. + if !Self::IS_CONTIGUOUS && self.is_digit(*value) { + self.increment_count(); } Some(value) } @@ -625,7 +1342,7 @@ macro_rules! skip_iterator_iterator_impl { /// Create base methods for the Iter block of a skip iterator. macro_rules! skip_iterator_iter_base { - ($format:ident, $mask:ident) => { + ($format:ident, $mask:ident, $count:ident) => { // It's contiguous if we don't skip over any values. // IE, the digit separator flags for the iterator over // the digits doesn't skip any values. @@ -648,9 +1365,19 @@ macro_rules! skip_iterator_iter_base { unsafe { self.byte.set_cursor(index) }; } + /// Get the current number of digits returned by the iterator. + /// + /// For contiguous iterators, this can include the sign character, decimal + /// point, and the exponent sign (that is, it is always the cursor). For + /// non-contiguous iterators, this must always be the only the number of + /// digits returned. #[inline(always)] fn current_count(&self) -> usize { - self.byte.current_count() + if Self::IS_CONTIGUOUS { + self.byte.current_count() + } else { + self.byte.$count + } } #[inline(always)] @@ -679,15 +1406,35 @@ macro_rules! skip_iterator_digits_iter_base { /// Create impl `ByteIter` block for skip iterator. macro_rules! skip_iterator_bytesiter_impl { - ($iterator:ident, $mask:ident, $i:ident, $l:ident, $t:ident, $c:ident) => { + ($iterator:ident, $mask:ident, $count:ident, $i:ident, $l:ident, $t:ident, $c:ident) => { unsafe impl<'a: 'b, 'b, const FORMAT: u128> Iter<'a> for $iterator<'a, 'b, FORMAT> { - skip_iterator_iter_base!(FORMAT, $mask); + skip_iterator_iter_base!(FORMAT, $mask, $count); } impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for $iterator<'a, 'b, FORMAT> { skip_iterator_digits_iter_base!(); + /// Increment the number of digits that have been returned by the iterator. + /// + /// For contiguous iterators, this is a no-op. For non-contiguous iterators, + /// this increments the count by 1. + #[inline(always)] + fn increment_count(&mut self) { + self.byte.$count += 1; + } + /// Peek the next value of the iterator, without consuming it. + /// + /// Note that this can modify the internal state, by skipping digits + /// for iterators that find the first non-zero value, etc. We optimize + /// this for the case where we have contiguous iterators, since + /// non-contiguous iterators already have a major performance penalty. + /// + /// Checking if the character is a digit in the `next()` implementation + /// after skipping characters can: + /// 1. Likely be optimized out due to the use of macros and inlining. + /// 2. Is a small amount of overhead compared to the branching on + /// characters, #[inline(always)] fn peek(&mut self) -> Option<::Item> { let format = NumberFormat::<{ FORMAT }> {}; @@ -746,6 +1493,7 @@ skip_iterator_iterator_impl!(IntegerDigitsIterator); skip_iterator_bytesiter_impl!( IntegerDigitsIterator, INTEGER_DIGIT_SEPARATOR_FLAG_MASK, + integer_count, INTEGER_INTERNAL_DIGIT_SEPARATOR, INTEGER_LEADING_DIGIT_SEPARATOR, INTEGER_TRAILING_DIGIT_SEPARATOR, @@ -764,6 +1512,7 @@ skip_iterator_iterator_impl!(FractionDigitsIterator); skip_iterator_bytesiter_impl!( FractionDigitsIterator, FRACTION_DIGIT_SEPARATOR_FLAG_MASK, + fraction_count, FRACTION_INTERNAL_DIGIT_SEPARATOR, FRACTION_LEADING_DIGIT_SEPARATOR, FRACTION_TRAILING_DIGIT_SEPARATOR, @@ -782,6 +1531,7 @@ skip_iterator_iterator_impl!(ExponentDigitsIterator); skip_iterator_bytesiter_impl!( ExponentDigitsIterator, EXPONENT_DIGIT_SEPARATOR_FLAG_MASK, + exponent_count, EXPONENT_INTERNAL_DIGIT_SEPARATOR, EXPONENT_LEADING_DIGIT_SEPARATOR, EXPONENT_TRAILING_DIGIT_SEPARATOR, @@ -798,16 +1548,22 @@ skip_iterator!( skip_iterator_iterator_impl!(SpecialDigitsIterator); impl<'a: 'b, 'b, const FORMAT: u128> SpecialDigitsIterator<'a, 'b, FORMAT> { + is_sign!(); is_digit_separator!(FORMAT); } unsafe impl<'a: 'b, 'b, const FORMAT: u128> Iter<'a> for SpecialDigitsIterator<'a, 'b, FORMAT> { - skip_iterator_iter_base!(FORMAT, SPECIAL_DIGIT_SEPARATOR); + skip_iterator_iter_base!(FORMAT, SPECIAL_DIGIT_SEPARATOR, integer_count); } impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for SpecialDigitsIterator<'a, 'b, FORMAT> { skip_iterator_digits_iter_base!(); + // Always a no-op. + #[inline(always)] + fn increment_count(&mut self) { + } + /// Peek the next value of the iterator, without consuming it. #[inline(always)] fn peek(&mut self) -> Option<::Item> { diff --git a/lexical-util/tests/iterator_tests.rs b/lexical-util/tests/iterator_tests.rs index 1882ea6b..46fe0df4 100644 --- a/lexical-util/tests/iterator_tests.rs +++ b/lexical-util/tests/iterator_tests.rs @@ -89,10 +89,10 @@ fn skip_iterator_test() { assert_eq!(iter.current_count(), 0); unsafe { iter.step_unchecked() }; assert_eq!(iter.cursor(), 1); - assert_eq!(iter.current_count(), 1); + assert_eq!(iter.current_count(), 0); iter.next(); assert_eq!(iter.cursor(), 2); - assert_eq!(iter.current_count(), 2); + assert_eq!(iter.current_count(), 1); let mut byte = digits.bytes::<{ FORMAT }>(); let mut iter = byte.integer_iter(); diff --git a/lexical-util/tests/skip_tests.rs b/lexical-util/tests/skip_tests.rs index 7fa6354c..f3e88bf2 100644 --- a/lexical-util/tests/skip_tests.rs +++ b/lexical-util/tests/skip_tests.rs @@ -31,27 +31,27 @@ fn test_skip_iter_i() { skip_iter_eq::<{ FORMAT }>(b"_.45", b"_.45"); skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"45"); - skip_iter_eq::<{ FORMAT }>(b"4__5", b"4_5"); + skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4_"); skip_iter_eq::<{ FORMAT }>(b"4__", b"4__"); skip_iter_eq::<{ FORMAT }>(b"4_.", b"4_."); skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"_455"); - skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45_5"); + skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b"_.455"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45_5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"45_"); - skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4_5__"); + skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"45_.5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4_5__.5"); + skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"_45_"); skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"_45_.56"); skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"_45_"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4_5__"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"_45_.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4_5__.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test] @@ -68,9 +68,9 @@ fn test_skip_iter_l() { skip_iter_eq::<{ FORMAT }>(b"1e", b"1e"); skip_iter_eq::<{ FORMAT }>(b"1", b"1"); skip_iter_eq::<{ FORMAT }>(b"_45", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__45", b"_45"); + skip_iter_eq::<{ FORMAT }>(b"__45", b"__45"); skip_iter_eq::<{ FORMAT }>(b"_.45", b".45"); - skip_iter_eq::<{ FORMAT }>(b"__.45", b"_.45"); + skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"4_5"); skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4_"); @@ -78,21 +78,21 @@ fn test_skip_iter_l() { skip_iter_eq::<{ FORMAT }>(b"4_.", b"4_."); skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"45_5"); - skip_iter_eq::<{ FORMAT }>(b"__45__5", b"_45__5"); + skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b".45_5"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"_.45__5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"4_5_"); skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"4_5_.5"); skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"45_"); - skip_iter_eq::<{ FORMAT }>(b"__45__", b"_45__"); + skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"45_.56"); - skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"_45__.56"); + skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"4_5_"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"_4__5__"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"4_5_.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"_4__5__.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test] @@ -111,29 +111,29 @@ fn test_skip_iter_t() { skip_iter_eq::<{ FORMAT }>(b"_45", b"_45"); skip_iter_eq::<{ FORMAT }>(b"__45", b"__45"); skip_iter_eq::<{ FORMAT }>(b"_.45", b".45"); - skip_iter_eq::<{ FORMAT }>(b"__.45", b"_.45"); + skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"4_5"); skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4"); - skip_iter_eq::<{ FORMAT }>(b"4__", b"4_"); + skip_iter_eq::<{ FORMAT }>(b"4__", b"4__"); skip_iter_eq::<{ FORMAT }>(b"4_.", b"4."); - skip_iter_eq::<{ FORMAT }>(b"4__.", b"4_."); + skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"_45_5"); skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b".45_5"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"_.45__5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"4_5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5_"); + skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"4_5.5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5_.5"); + skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"_45"); - skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45_"); + skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"_45.56"); - skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45_.56"); + skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"_4_5"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5_"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"_4_5.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5_.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test] @@ -151,31 +151,31 @@ fn test_skip_iter_il() { skip_iter_eq::<{ FORMAT }>(b"1e", b"1e"); skip_iter_eq::<{ FORMAT }>(b"1", b"1"); skip_iter_eq::<{ FORMAT }>(b"_45", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__45", b"_45"); + skip_iter_eq::<{ FORMAT }>(b"__45", b"__45"); skip_iter_eq::<{ FORMAT }>(b"_.45", b".45"); - skip_iter_eq::<{ FORMAT }>(b"__.45", b"_.45"); + skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"45"); - skip_iter_eq::<{ FORMAT }>(b"4__5", b"4_5"); + skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4_"); skip_iter_eq::<{ FORMAT }>(b"4__", b"4__"); skip_iter_eq::<{ FORMAT }>(b"4_.", b"4_."); skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"455"); - skip_iter_eq::<{ FORMAT }>(b"__45__5", b"_45_5"); + skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b".455"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"_.45_5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"45_"); - skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4_5__"); + skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"45_.5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4_5__.5"); + skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"45_"); - skip_iter_eq::<{ FORMAT }>(b"__45__", b"_45__"); + skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"45_.56"); - skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"_45__.56"); + skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"45_"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"_4_5__"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"45_.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"_4_5__.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test] @@ -195,29 +195,29 @@ fn test_skip_iter_it() { skip_iter_eq::<{ FORMAT }>(b"_45", b"_45"); skip_iter_eq::<{ FORMAT }>(b"__45", b"__45"); skip_iter_eq::<{ FORMAT }>(b"_.45", b".45"); - skip_iter_eq::<{ FORMAT }>(b"__.45", b"_.45"); + skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"45"); - skip_iter_eq::<{ FORMAT }>(b"4__5", b"4_5"); + skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4"); - skip_iter_eq::<{ FORMAT }>(b"4__", b"4_"); + skip_iter_eq::<{ FORMAT }>(b"4__", b"4__"); skip_iter_eq::<{ FORMAT }>(b"4_.", b"4."); - skip_iter_eq::<{ FORMAT }>(b"4__.", b"4_."); + skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"_455"); - skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45_5"); + skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b".455"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"_.45_5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"45"); - skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4_5_"); + skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"45.5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4_5_.5"); + skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"_45"); - skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45_"); + skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"_45.56"); - skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45_.56"); + skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"_45"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4_5_"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"_45.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4_5_.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test] @@ -235,31 +235,31 @@ fn test_skip_iter_lt() { skip_iter_eq::<{ FORMAT }>(b"1e", b"1e"); skip_iter_eq::<{ FORMAT }>(b"1", b"1"); skip_iter_eq::<{ FORMAT }>(b"_45", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__45", b"_45"); + skip_iter_eq::<{ FORMAT }>(b"__45", b"__45"); skip_iter_eq::<{ FORMAT }>(b"_.45", b".45"); - skip_iter_eq::<{ FORMAT }>(b"__.45", b"_.45"); + skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"4_5"); skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4"); - skip_iter_eq::<{ FORMAT }>(b"4__", b"4_"); + skip_iter_eq::<{ FORMAT }>(b"4__", b"4__"); skip_iter_eq::<{ FORMAT }>(b"4_.", b"4."); - skip_iter_eq::<{ FORMAT }>(b"4__.", b"4_."); + skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"45_5"); - skip_iter_eq::<{ FORMAT }>(b"__45__5", b"_45__5"); + skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b".45_5"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"_.45__5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"4_5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5_"); + skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"4_5.5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5_.5"); + skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__45__", b"_45_"); + skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"45.56"); - skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"_45_.56"); + skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"4_5"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"_4__5_"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"4_5.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"_4__5_.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test] @@ -278,31 +278,31 @@ fn test_skip_iter_ilt() { skip_iter_eq::<{ FORMAT }>(b"1e", b"1e"); skip_iter_eq::<{ FORMAT }>(b"1", b"1"); skip_iter_eq::<{ FORMAT }>(b"_45", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__45", b"_45"); + skip_iter_eq::<{ FORMAT }>(b"__45", b"__45"); skip_iter_eq::<{ FORMAT }>(b"_.45", b".45"); - skip_iter_eq::<{ FORMAT }>(b"__.45", b"_.45"); + skip_iter_eq::<{ FORMAT }>(b"__.45", b"__.45"); skip_iter_eq::<{ FORMAT }>(b"4_5", b"45"); - skip_iter_eq::<{ FORMAT }>(b"4__5", b"4_5"); + skip_iter_eq::<{ FORMAT }>(b"4__5", b"4__5"); skip_iter_eq::<{ FORMAT }>(b"4_", b"4"); - skip_iter_eq::<{ FORMAT }>(b"4__", b"4_"); + skip_iter_eq::<{ FORMAT }>(b"4__", b"4__"); skip_iter_eq::<{ FORMAT }>(b"4_.", b"4."); - skip_iter_eq::<{ FORMAT }>(b"4__.", b"4_."); + skip_iter_eq::<{ FORMAT }>(b"4__.", b"4__."); skip_iter_eq::<{ FORMAT }>(b"_45_5", b"455"); - skip_iter_eq::<{ FORMAT }>(b"__45__5", b"_45_5"); + skip_iter_eq::<{ FORMAT }>(b"__45__5", b"__45__5"); skip_iter_eq::<{ FORMAT }>(b"_.45_5", b".455"); - skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"_.45_5"); + skip_iter_eq::<{ FORMAT }>(b"__.45__5", b"__.45__5"); skip_iter_eq::<{ FORMAT }>(b"4_5_", b"45"); - skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4_5_"); + skip_iter_eq::<{ FORMAT }>(b"4__5__", b"4__5__"); skip_iter_eq::<{ FORMAT }>(b"4_5_.5", b"45.5"); - skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4_5_.5"); + skip_iter_eq::<{ FORMAT }>(b"4__5__.5", b"4__5__.5"); skip_iter_eq::<{ FORMAT }>(b"_45_", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__45__", b"_45_"); + skip_iter_eq::<{ FORMAT }>(b"__45__", b"__45__"); skip_iter_eq::<{ FORMAT }>(b"_45_.56", b"45.56"); - skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"_45_.56"); + skip_iter_eq::<{ FORMAT }>(b"__45__.56", b"__45__.56"); skip_iter_eq::<{ FORMAT }>(b"_4_5_", b"45"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"_4_5_"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__", b"__4__5__"); skip_iter_eq::<{ FORMAT }>(b"_4_5_.56", b"45.56"); - skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"_4_5_.56"); + skip_iter_eq::<{ FORMAT }>(b"__4__5__.56", b"__4__5__.56"); } #[test]