From 5ee4d3eb79c25651fdfff395998f0e86d6b95581 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Sun, 17 Nov 2024 10:05:12 -0500 Subject: [PATCH 1/2] Expose lazy object and array iterators, closes and #406 Signed-off-by: Heinz N. Gies --- src/lib.rs | 36 +++++++++++++++++++----------------- src/value/lazy.rs | 6 ++++-- src/value/lazy/array.rs | 17 ++++++++++------- src/value/lazy/object.rs | 4 ++++ 4 files changed, 37 insertions(+), 26 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fc38d5ac..880b5ad3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -703,7 +703,7 @@ impl<'de> Deserializer<'de> { input: &[u8], structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType> { - Self::_find_structural_bits::(input, structural_indexes) + unsafe { Self::_find_structural_bits::(input, structural_indexes) } } #[cfg(all(target_feature = "simd128", not(feature = "portable")))] @@ -851,7 +851,7 @@ impl<'de> Deserializer<'de> { /// where it's know the tape isn't finished. #[cfg_attr(not(feature = "no-inline"), inline)] pub unsafe fn next_(&mut self) -> Node<'de> { - let r = *self.tape.get_kinda_unchecked(self.idx); + let r = *unsafe { self.tape.get_kinda_unchecked(self.idx) }; self.idx += 1; r } @@ -868,7 +868,7 @@ impl<'de> Deserializer<'de> { structural_indexes.clear(); structural_indexes.reserve(len / 8); - let mut utf8_validator = S::Utf8Validator::new(); + let mut utf8_validator = unsafe { S::Utf8Validator::new() }; // we have padded the input out to 64 byte multiple with the remainder being // zeros @@ -904,10 +904,10 @@ impl<'de> Deserializer<'de> { __builtin_prefetch(buf + idx + 128); #endif */ - let chunk = input.get_kinda_unchecked(idx..idx + 64); - utf8_validator.update_from_chunks(chunk); + let chunk = unsafe { input.get_kinda_unchecked(idx..idx + 64) }; + unsafe { utf8_validator.update_from_chunks(chunk) }; - let input = S::new(chunk); + let input = unsafe { S::new(chunk) }; // detect odd sequences of backslashes let odd_ends: u64 = input.find_odd_backslash_sequences(&mut prev_iter_ends_odd_backslash); @@ -924,10 +924,10 @@ impl<'de> Deserializer<'de> { // take the previous iterations structural bits, not our current iteration, // and flatten - S::flatten_bits(structural_indexes, idx as u32, structurals); + unsafe { S::flatten_bits(structural_indexes, idx as u32, structurals) }; let mut whitespace: u64 = 0; - input.find_whitespace_and_structurals(&mut whitespace, &mut structurals); + unsafe { input.find_whitespace_and_structurals(&mut whitespace, &mut structurals) }; // fixup structurals to reflect quotes and add pseudo-structural characters structurals = S::finalize_structurals( @@ -945,12 +945,14 @@ impl<'de> Deserializer<'de> { // risk invalidating the UTF-8 checks. if idx < len { let mut tmpbuf: [u8; SIMDINPUT_LENGTH] = [0x20; SIMDINPUT_LENGTH]; - tmpbuf - .as_mut_ptr() - .copy_from(input.as_ptr().add(idx), len - idx); - utf8_validator.update_from_chunks(&tmpbuf); + unsafe { + tmpbuf + .as_mut_ptr() + .copy_from(input.as_ptr().add(idx), len - idx) + }; + unsafe { utf8_validator.update_from_chunks(&tmpbuf) }; - let input = S::new(&tmpbuf); + let input = unsafe { S::new(&tmpbuf) }; // detect odd sequences of backslashes let odd_ends: u64 = @@ -968,10 +970,10 @@ impl<'de> Deserializer<'de> { // take the previous iterations structural bits, not our current iteration, // and flatten - S::flatten_bits(structural_indexes, idx as u32, structurals); + unsafe { S::flatten_bits(structural_indexes, idx as u32, structurals) }; let mut whitespace: u64 = 0; - input.find_whitespace_and_structurals(&mut whitespace, &mut structurals); + unsafe { input.find_whitespace_and_structurals(&mut whitespace, &mut structurals) }; // fixup structurals to reflect quotes and add pseudo-structural characters structurals = S::finalize_structurals( @@ -988,7 +990,7 @@ impl<'de> Deserializer<'de> { return Err(ErrorType::Syntax); } // finally, flatten out the remaining structurals from the last iteration - S::flatten_bits(structural_indexes, idx as u32, structurals); + unsafe { S::flatten_bits(structural_indexes, idx as u32, structurals) }; // a valid JSON file cannot have zero structural indexes - we should have // found something (note that we compare to 1 as we always add the root!) @@ -1000,7 +1002,7 @@ impl<'de> Deserializer<'de> { return Err(ErrorType::Syntax); } - if utf8_validator.finalize(None).is_err() { + if unsafe { utf8_validator.finalize(None).is_err() } { Err(ErrorType::InvalidUtf8) } else { Ok(()) diff --git a/src/value/lazy.rs b/src/value/lazy.rs index 579bc1ca..3c99d8cd 100644 --- a/src/value/lazy.rs +++ b/src/value/lazy.rs @@ -26,10 +26,12 @@ use crate::{borrowed, tape}; use std::borrow::Cow; use std::fmt; -mod array; +/// Lazy implemntation of the array trait and associated functionality +pub mod array; mod cmp; mod from; -mod object; +/// Lazy implementation of the object trait and associated functionality +pub mod object; mod trait_impls; pub use array::Array; diff --git a/src/value/lazy/array.rs b/src/value/lazy/array.rs index ab82c20a..a806724f 100644 --- a/src/value/lazy/array.rs +++ b/src/value/lazy/array.rs @@ -12,18 +12,21 @@ pub enum Array<'borrow, 'tape, 'input> { Value(&'borrow borrowed::Array<'input>), } -pub enum ArrayIter<'borrow, 'tape, 'input> { +/// Iterator over the values in an array +pub enum Iter<'borrow, 'tape, 'input> { + /// Tape variant Tape(tape::array::Iter<'tape, 'input>), + /// Value variant Value(std::slice::Iter<'borrow, borrowed::Value<'input>>), } -impl<'borrow, 'tape, 'input> Iterator for ArrayIter<'borrow, 'tape, 'input> { +impl<'borrow, 'tape, 'input> Iterator for Iter<'borrow, 'tape, 'input> { type Item = Value<'borrow, 'tape, 'input>; fn next(&mut self) -> Option { match self { - ArrayIter::Tape(t) => t.next().map(Value::Tape), - ArrayIter::Value(v) => v.next().map(Cow::Borrowed).map(Value::Value), + Iter::Tape(t) => t.next().map(Value::Tape), + Iter::Value(v) => v.next().map(Cow::Borrowed).map(Value::Value), } } } @@ -43,10 +46,10 @@ impl<'borrow, 'tape, 'input> Array<'borrow, 'tape, 'input> { /// Iterates over the values paris #[allow(clippy::pedantic)] // we want into_iter_without_iter but that lint doesn't exist in older clippy #[must_use] - pub fn iter<'i>(&'i self) -> ArrayIter<'i, 'tape, 'input> { + pub fn iter<'i>(&'i self) -> Iter<'i, 'tape, 'input> { match self { - Array::Tape(t) => ArrayIter::Tape(t.iter()), - Array::Value(v) => ArrayIter::Value(v.iter()), + Array::Tape(t) => Iter::Tape(t.iter()), + Array::Value(v) => Iter::Value(v.iter()), } } diff --git a/src/value/lazy/object.rs b/src/value/lazy/object.rs index 7720440a..39734e2a 100644 --- a/src/value/lazy/object.rs +++ b/src/value/lazy/object.rs @@ -14,18 +14,22 @@ pub enum Object<'borrow, 'tape, 'input> { /// Value variant Value(&'borrow borrowed::Object<'input>), } +/// Iterator over key valye paris in an object pub enum Iter<'borrow, 'tape, 'input> { /// Tape variant Tape(tape::object::Iter<'tape, 'input>), /// Value variant Value(halfbrown::Iter<'borrow, crate::cow::Cow<'input, str>, borrowed::Value<'input>>), } + +/// Iterator over the keys of an object pub enum Keys<'borrow, 'tape, 'input> { /// Tape variant Tape(tape::object::Keys<'tape, 'input>), /// Value variant Value(halfbrown::Keys<'borrow, crate::cow::Cow<'input, str>, borrowed::Value<'input>>), } +/// Iterator over the values of an object pub enum Values<'borrow, 'tape, 'input> { /// Tape variant Tape(tape::object::Values<'tape, 'input>), From b6e08d413e6ad2d172eac19dd7dbd7c877973f9d Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Sun, 17 Nov 2024 10:12:09 -0500 Subject: [PATCH 2/2] objey clippy Signed-off-by: Heinz N. Gies --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 880b5ad3..592572ea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -948,7 +948,7 @@ impl<'de> Deserializer<'de> { unsafe { tmpbuf .as_mut_ptr() - .copy_from(input.as_ptr().add(idx), len - idx) + .copy_from(input.as_ptr().add(idx), len - idx); }; unsafe { utf8_validator.update_from_chunks(&tmpbuf) };