Skip to content

Commit

Permalink
Expose lazy object and array iterators, closes and #406
Browse files Browse the repository at this point in the history
Signed-off-by: Heinz N. Gies <heinz@licenser.net>
  • Loading branch information
Licenser committed Nov 17, 2024
1 parent 70e840d commit 5ee4d3e
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 26 deletions.
36 changes: 19 additions & 17 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ impl<'de> Deserializer<'de> {
input: &[u8],
structural_indexes: &mut Vec<u32>,
) -> std::result::Result<(), ErrorType> {
Self::_find_structural_bits::<impls::neon::SimdInput>(input, structural_indexes)
unsafe { Self::_find_structural_bits::<impls::neon::SimdInput>(input, structural_indexes) }
}

#[cfg(all(target_feature = "simd128", not(feature = "portable")))]
Expand Down Expand Up @@ -851,7 +851,7 @@ impl<'de> Deserializer<'de> {
/// where it's know the tape isn't finished.
#[cfg_attr(not(feature = "no-inline"), inline)]
pub unsafe fn next_(&mut self) -> Node<'de> {
let r = *self.tape.get_kinda_unchecked(self.idx);
let r = *unsafe { self.tape.get_kinda_unchecked(self.idx) };
self.idx += 1;
r
}
Expand All @@ -868,7 +868,7 @@ impl<'de> Deserializer<'de> {
structural_indexes.clear();
structural_indexes.reserve(len / 8);

let mut utf8_validator = S::Utf8Validator::new();
let mut utf8_validator = unsafe { S::Utf8Validator::new() };

// we have padded the input out to 64 byte multiple with the remainder being
// zeros
Expand Down Expand Up @@ -904,10 +904,10 @@ impl<'de> Deserializer<'de> {
__builtin_prefetch(buf + idx + 128);
#endif
*/
let chunk = input.get_kinda_unchecked(idx..idx + 64);
utf8_validator.update_from_chunks(chunk);
let chunk = unsafe { input.get_kinda_unchecked(idx..idx + 64) };
unsafe { utf8_validator.update_from_chunks(chunk) };

let input = S::new(chunk);
let input = unsafe { S::new(chunk) };
// detect odd sequences of backslashes
let odd_ends: u64 =
input.find_odd_backslash_sequences(&mut prev_iter_ends_odd_backslash);
Expand All @@ -924,10 +924,10 @@ impl<'de> Deserializer<'de> {

// take the previous iterations structural bits, not our current iteration,
// and flatten
S::flatten_bits(structural_indexes, idx as u32, structurals);
unsafe { S::flatten_bits(structural_indexes, idx as u32, structurals) };

let mut whitespace: u64 = 0;
input.find_whitespace_and_structurals(&mut whitespace, &mut structurals);
unsafe { input.find_whitespace_and_structurals(&mut whitespace, &mut structurals) };

// fixup structurals to reflect quotes and add pseudo-structural characters
structurals = S::finalize_structurals(
Expand All @@ -945,12 +945,14 @@ impl<'de> Deserializer<'de> {
// risk invalidating the UTF-8 checks.
if idx < len {
let mut tmpbuf: [u8; SIMDINPUT_LENGTH] = [0x20; SIMDINPUT_LENGTH];
tmpbuf
.as_mut_ptr()
.copy_from(input.as_ptr().add(idx), len - idx);
utf8_validator.update_from_chunks(&tmpbuf);
unsafe {
tmpbuf
.as_mut_ptr()
.copy_from(input.as_ptr().add(idx), len - idx)
};
unsafe { utf8_validator.update_from_chunks(&tmpbuf) };

let input = S::new(&tmpbuf);
let input = unsafe { S::new(&tmpbuf) };

// detect odd sequences of backslashes
let odd_ends: u64 =
Expand All @@ -968,10 +970,10 @@ impl<'de> Deserializer<'de> {

// take the previous iterations structural bits, not our current iteration,
// and flatten
S::flatten_bits(structural_indexes, idx as u32, structurals);
unsafe { S::flatten_bits(structural_indexes, idx as u32, structurals) };

let mut whitespace: u64 = 0;
input.find_whitespace_and_structurals(&mut whitespace, &mut structurals);
unsafe { input.find_whitespace_and_structurals(&mut whitespace, &mut structurals) };

// fixup structurals to reflect quotes and add pseudo-structural characters
structurals = S::finalize_structurals(
Expand All @@ -988,7 +990,7 @@ impl<'de> Deserializer<'de> {
return Err(ErrorType::Syntax);
}
// finally, flatten out the remaining structurals from the last iteration
S::flatten_bits(structural_indexes, idx as u32, structurals);
unsafe { S::flatten_bits(structural_indexes, idx as u32, structurals) };

// a valid JSON file cannot have zero structural indexes - we should have
// found something (note that we compare to 1 as we always add the root!)
Expand All @@ -1000,7 +1002,7 @@ impl<'de> Deserializer<'de> {
return Err(ErrorType::Syntax);
}

if utf8_validator.finalize(None).is_err() {
if unsafe { utf8_validator.finalize(None).is_err() } {
Err(ErrorType::InvalidUtf8)
} else {
Ok(())
Expand Down
6 changes: 4 additions & 2 deletions src/value/lazy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ use crate::{borrowed, tape};
use std::borrow::Cow;
use std::fmt;

mod array;
/// Lazy implemntation of the array trait and associated functionality
pub mod array;
mod cmp;
mod from;
mod object;
/// Lazy implementation of the object trait and associated functionality
pub mod object;
mod trait_impls;

pub use array::Array;
Expand Down
17 changes: 10 additions & 7 deletions src/value/lazy/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,21 @@ pub enum Array<'borrow, 'tape, 'input> {
Value(&'borrow borrowed::Array<'input>),
}

pub enum ArrayIter<'borrow, 'tape, 'input> {
/// Iterator over the values in an array
pub enum Iter<'borrow, 'tape, 'input> {
/// Tape variant
Tape(tape::array::Iter<'tape, 'input>),
/// Value variant
Value(std::slice::Iter<'borrow, borrowed::Value<'input>>),
}

impl<'borrow, 'tape, 'input> Iterator for ArrayIter<'borrow, 'tape, 'input> {
impl<'borrow, 'tape, 'input> Iterator for Iter<'borrow, 'tape, 'input> {
type Item = Value<'borrow, 'tape, 'input>;

fn next(&mut self) -> Option<Self::Item> {
match self {
ArrayIter::Tape(t) => t.next().map(Value::Tape),
ArrayIter::Value(v) => v.next().map(Cow::Borrowed).map(Value::Value),
Iter::Tape(t) => t.next().map(Value::Tape),
Iter::Value(v) => v.next().map(Cow::Borrowed).map(Value::Value),

Check warning on line 29 in src/value/lazy/array.rs

View check run for this annotation

Codecov / codecov/patch

src/value/lazy/array.rs#L28-L29

Added lines #L28 - L29 were not covered by tests
}
}
}
Expand All @@ -43,10 +46,10 @@ impl<'borrow, 'tape, 'input> Array<'borrow, 'tape, 'input> {
/// Iterates over the values paris
#[allow(clippy::pedantic)] // we want into_iter_without_iter but that lint doesn't exist in older clippy
#[must_use]
pub fn iter<'i>(&'i self) -> ArrayIter<'i, 'tape, 'input> {
pub fn iter<'i>(&'i self) -> Iter<'i, 'tape, 'input> {

Check warning on line 49 in src/value/lazy/array.rs

View check run for this annotation

Codecov / codecov/patch

src/value/lazy/array.rs#L49

Added line #L49 was not covered by tests
match self {
Array::Tape(t) => ArrayIter::Tape(t.iter()),
Array::Value(v) => ArrayIter::Value(v.iter()),
Array::Tape(t) => Iter::Tape(t.iter()),
Array::Value(v) => Iter::Value(v.iter()),

Check warning on line 52 in src/value/lazy/array.rs

View check run for this annotation

Codecov / codecov/patch

src/value/lazy/array.rs#L51-L52

Added lines #L51 - L52 were not covered by tests
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/value/lazy/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,22 @@ pub enum Object<'borrow, 'tape, 'input> {
/// Value variant
Value(&'borrow borrowed::Object<'input>),
}
/// Iterator over key valye paris in an object
pub enum Iter<'borrow, 'tape, 'input> {
/// Tape variant
Tape(tape::object::Iter<'tape, 'input>),
/// Value variant
Value(halfbrown::Iter<'borrow, crate::cow::Cow<'input, str>, borrowed::Value<'input>>),
}

/// Iterator over the keys of an object
pub enum Keys<'borrow, 'tape, 'input> {
/// Tape variant
Tape(tape::object::Keys<'tape, 'input>),
/// Value variant
Value(halfbrown::Keys<'borrow, crate::cow::Cow<'input, str>, borrowed::Value<'input>>),
}
/// Iterator over the values of an object
pub enum Values<'borrow, 'tape, 'input> {
/// Tape variant
Tape(tape::object::Values<'tape, 'input>),
Expand Down

0 comments on commit 5ee4d3e

Please sign in to comment.