From efb7a1b9c3b1257ec9974df25964db56c0da7f5f Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sun, 15 Sep 2024 13:59:58 -0500 Subject: [PATCH] Add in some better conventions, refactor more to safe fns. --- lexical-parse-float/src/parse.rs | 14 ++++++++------ lexical-util/src/iterator.rs | 33 ++++++++++++++++++++++---------- lexical-util/src/noskip.rs | 18 +---------------- lexical-util/src/skip.rs | 18 ++--------------- 4 files changed, 34 insertions(+), 49 deletions(-) diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs index b8fe3c15..56fc77d7 100644 --- a/lexical-parse-float/src/parse.rs +++ b/lexical-parse-float/src/parse.rs @@ -618,7 +618,8 @@ pub fn parse_partial_number<'a, const FORMAT: u128>( let mut implicit_exponent: i64; let int_end = n_digits as i64; let mut fraction_digits = None; - if byte.first_is(decimal_point) { + // TODO: Change this to something different from read_if_value but same idea + if byte.first_is_cased(decimal_point) { // SAFETY: byte cannot be empty due to first_is unsafe { byte.step_unchecked() }; let before = byte.clone(); @@ -658,9 +659,9 @@ pub fn parse_partial_number<'a, const FORMAT: u128>( // Handle scientific notation. let mut explicit_exponent = 0_i64; let is_exponent = if cfg!(feature = "format") && format.case_sensitive_exponent() { - byte.first_is(exponent_character) + byte.first_is_cased(exponent_character) } else { - byte.case_insensitive_first_is(exponent_character) + byte.first_is_uncased(exponent_character) }; if is_exponent { // Check float format syntax checks. @@ -708,9 +709,9 @@ pub fn parse_partial_number<'a, const FORMAT: u128>( #[cfg(feature = "format")] if base_suffix != 0 { let is_suffix: bool = if format.case_sensitive_base_suffix() { - byte.first_is(base_suffix) + byte.first_is_cased(base_suffix) } else { - byte.case_insensitive_first_is(base_suffix) + byte.first_is_uncased(base_suffix) }; if is_suffix { // SAFETY: safe since `byte.len() >= 1`. @@ -747,13 +748,14 @@ pub fn parse_partial_number<'a, const FORMAT: u128>( n_digits -= step; let mut zeros = start.clone(); let mut zeros_integer = zeros.integer_iter(); + // TODO: Change to read_if_value while zeros_integer.peek_is_cased(b'0') { n_digits = n_digits.saturating_sub(1); // TODO: Change to read_if // SAFETY: safe since zeros cannot be empty due to peek_is unsafe { zeros_integer.step_unchecked() }; } - if zeros.first_is(decimal_point) { + if zeros.first_is_cased(decimal_point) { // SAFETY: safe since zeros cannot be empty due to first_is unsafe { zeros.step_unchecked() }; } diff --git a/lexical-util/src/iterator.rs b/lexical-util/src/iterator.rs index d3bd8f86..6bbd7ed2 100644 --- a/lexical-util/src/iterator.rs +++ b/lexical-util/src/iterator.rs @@ -113,17 +113,13 @@ pub unsafe trait Iter<'a> { /// Check if the next element is a given value. #[inline(always)] - fn first_is(&self, value: u8) -> bool { - if let Some(&c) = self.first() { - c == value - } else { - false - } + fn first_is_cased(&self, value: u8) -> bool { + Some(&value) == self.first() } /// Check if the next element is a given value without case sensitivity. #[inline(always)] - fn case_insensitive_first_is(&self, value: u8) -> bool { + fn first_is_uncased(&self, value: u8) -> bool { if let Some(&c) = self.first() { c.to_ascii_lowercase() == value.to_ascii_lowercase() } else { @@ -218,8 +214,7 @@ pub unsafe trait Iter<'a> { /// the methods for `read_32`, `read_64`, etc. check the bounds /// of the underlying contiguous buffer and is only called on /// contiguous buffers. -pub unsafe trait DigitsIter<'a>: Iterator + Iter<'a> { - // TODO: Move some of these to `Iter` as required. +pub trait DigitsIter<'a>: Iterator + Iter<'a> { // TODO: Fix the documentation /// Get if the iterator cannot return any more elements. @@ -299,7 +294,21 @@ pub unsafe trait DigitsIter<'a>: Iterator + Iter<'a> { } } - // TODO(ahuszagh) Add `peek_is` to have cased or uncased + /// Check if the next element is a given value with optional case sensitivity. + #[inline(always)] + fn peek_is(&mut self, value: u8, is_cased: bool) -> bool { + if let Some(&c) = self.peek() { + if is_cased { + c == value + } else { + c.to_ascii_lowercase() == value.to_ascii_lowercase() + } + } else { + false + } + } + + // TODO: Add `peek_is` to have cased or uncased /// Peek the next value and consume it if the read value matches the /// expected one. @@ -321,6 +330,10 @@ pub unsafe trait DigitsIter<'a>: Iterator + Iter<'a> { } } + // TODO: Add read_is_value_cased + // TODO: Add read_is_value_uncased + // TODO: Add read_is_value + /// Skip zeros from the start of the iterator #[inline(always)] fn skip_zeros(&mut self) -> usize { diff --git a/lexical-util/src/noskip.rs b/lexical-util/src/noskip.rs index c670d5e0..39307e7f 100644 --- a/lexical-util/src/noskip.rs +++ b/lexical-util/src/noskip.rs @@ -74,22 +74,6 @@ impl<'a, const __: u128> Bytes<'a, __> { self.index >= self.slc.len() } - // TODO: Remove the peek_is, these shouldn't be on bytes - - /// Check if the next element is a given value. - #[inline(always)] - pub fn peek_is_cased(&mut self, value: u8) -> bool { - // TODO: These 2 need to be changed - self.first_is(value) - } - - /// Check if the next element is a given value without case sensitivity. - #[inline(always)] - pub fn peek_is_uncased(&mut self, value: u8) -> bool { - // TODO: These 2 need to be changed - self.case_insensitive_first_is(value) - } - /// Get iterator over integer digits. #[inline(always)] pub fn integer_iter<'b>(&'b mut self) -> DigitsIterator<'a, 'b, __> { @@ -330,7 +314,7 @@ unsafe impl<'a: 'b, 'b, const __: u128> Iter<'a> for DigitsIterator<'a, 'b, __> } } -unsafe impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for DigitsIterator<'a, 'b, FORMAT> { +impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for DigitsIterator<'a, 'b, FORMAT> { #[inline(always)] fn is_consumed(&mut self) -> bool { Self::is_done(self) diff --git a/lexical-util/src/skip.rs b/lexical-util/src/skip.rs index 41ebf181..38a89222 100644 --- a/lexical-util/src/skip.rs +++ b/lexical-util/src/skip.rs @@ -410,20 +410,6 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { } } - /// Check if the next element is a given value without case sensitivity. - #[inline(always)] - pub fn peek_is_uncased(&mut self, value: u8) -> bool { - // TODO: Fix this??? Should this be first??? - - // Don't assert not a digit separator, since this can occur when - // a different component does not allow digit separators there. - if let Some(&c) = self.first() { - c.to_ascii_lowercase() == value.to_ascii_lowercase() - } else { - false - } - } - /// Get iterator over integer digits. #[inline(always)] pub fn integer_iter<'b>(&'b mut self) -> IntegerDigitsIterator<'a, 'b, FORMAT> { @@ -769,7 +755,7 @@ macro_rules! skip_iterator_bytesiter_impl { skip_iterator_iter_base!(FORMAT, $mask); } - unsafe impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for $iterator<'a, 'b, FORMAT> { + impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for $iterator<'a, 'b, FORMAT> { skip_iterator_digits_iter_base!(); /// Peek the next value of the iterator, without consuming it. @@ -886,7 +872,7 @@ unsafe impl<'a: 'b, 'b, const FORMAT: u128> Iter<'a> for SpecialDigitsIterator<' skip_iterator_iter_base!(FORMAT, SPECIAL_DIGIT_SEPARATOR); } -unsafe impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> +impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for SpecialDigitsIterator<'a, 'b, FORMAT> { skip_iterator_digits_iter_base!();