diff --git a/comparison/src/lib.rs b/comparison/src/lib.rs index e5a71542b..b6944f264 100644 --- a/comparison/src/lib.rs +++ b/comparison/src/lib.rs @@ -1,6 +1,6 @@ #![deny(rust_2018_idioms)] -use proptest::prelude::*; +use proptest::{prelude::*, collection::vec as propvec}; use std::hash::Hasher; #[cfg(test)] use twox_hash::{XxHash, XxHash32}; @@ -12,6 +12,39 @@ pub fn hash_once(mut hasher: impl Hasher, data: &[u8]) -> u64 { hasher.finish() } +#[cfg(test)] +fn hash_by_chunks(mut hasher: impl Hasher, mut data: &[u8], chunk_sizes: &[usize]) -> u64 { + for &chunk_size in chunk_sizes { + let (this_chunk, remaining) = data.split_at(chunk_size); + hasher.write(this_chunk); + data = remaining; + } + + hasher.finish() +} + +prop_compose! { + fn data_and_offset + () + (data in any::>()) + (index in 0..=data.len(), data in Just(data)) + -> (Vec, usize) + { + (data, index) + } +} + +prop_compose! { + fn data_and_chunk_sizes + () + (sizes in propvec(1..=256usize, 0..=100)) + (data in propvec(any::(), sizes.iter().sum::()), sizes in Just(sizes)) + -> (Vec, Vec) + { + (data, sizes) + } +} + proptest! { #![proptest_config(ProptestConfig::with_cases(100_000))] @@ -23,6 +56,15 @@ proptest! { our_result == their_result } + #[test] + fn same_results_as_c_with_offset_for_64_bit(seed: u64, (data, offset) in data_and_offset()) { + let data = &data[offset..]; + let our_result = hash_once(XxHash::with_seed(seed), data); + let their_result = c_xxhash::hash64(data, seed); + + our_result == their_result + } + #[test] fn same_results_as_c_for_32_bit(seed: u32, data: Vec) { let our_result = hash_once(XxHash32::with_seed(seed), &data); @@ -30,4 +72,33 @@ proptest! { our_result == their_result as u64 } + + #[test] + fn same_results_as_c_with_offset_for_32_bit(seed: u32, (data, offset) in data_and_offset()) { + let data = &data[offset..]; + let our_result = hash_once(XxHash32::with_seed(seed), data); + let their_result = c_xxhash::hash32(data, seed); + + our_result == their_result as u64 + } +} + +proptest! { + #![proptest_config(ProptestConfig::with_cases(1_000))] + + #[test] + fn same_results_with_many_chunks_as_one_for_64_bit(seed: u64, (data, chunk_sizes) in data_and_chunk_sizes()) { + let chunked_result = hash_by_chunks(XxHash::with_seed(seed), &data, &chunk_sizes); + let monolithic_result = hash_once(XxHash::with_seed(seed), &data); + + chunked_result == monolithic_result + } + + #[test] + fn same_results_with_many_chunks_as_one_for_32_bit(seed: u32, (data, chunk_sizes) in data_and_chunk_sizes()) { + let chunked_result = hash_by_chunks(XxHash32::with_seed(seed), &data, &chunk_sizes); + let monolithic_result = hash_once(XxHash32::with_seed(seed), &data); + + chunked_result == monolithic_result + } } diff --git a/src/lib.rs b/src/lib.rs index 5f637781d..e60b61577 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,7 +41,6 @@ use serde::{Serialize, Deserialize}; #[cfg(feature = "digest")] extern crate digest; -mod number_streams; mod thirty_two; #[cfg(feature = "digest")] @@ -51,8 +50,39 @@ pub use thirty_two::XxHash as XxHash32; #[cfg(feature = "std")] pub use thirty_two::RandomXxHashBuilder as RandomXxHashBuilder32; -use core::hash::Hasher; -use number_streams::NumberStreams; +use core::{cmp, hash::Hasher}; + +trait TransmutingByteSlices { + fn as_u64_arrays(&self) -> (&[u8], &[[u64; 4]], &[u8]); + fn as_u64s(&self) -> (&[u8], &[u64], &[u8]); + fn as_u32_arrays(&self) -> (&[u8], &[[u32; 4]], &[u8]); + fn as_u32s(&self) -> (&[u8], &[u32], &[u8]); +} + +// # Safety +// +// - Interpreting a properly-aligned set of bytes as a `u64` should be +// valid. +// - `align_to` guarantees to only transmute aligned data. +// - An array is a tightly-packed set of bytes (as shown by `impl +// TryFrom<&[u8]> for &[u8; N]`) +impl TransmutingByteSlices for [u8] { + fn as_u64_arrays(&self) -> (&[u8], &[[u64; 4]], &[u8]) { + unsafe { self.align_to::<[u64; 4]>() } + } + + fn as_u64s(&self) -> (&[u8], &[u64], &[u8]) { + unsafe { self.align_to::() } + } + + fn as_u32_arrays(&self) -> (&[u8], &[[u32; 4]], &[u8]) { + unsafe { self.align_to::<[u32; 4]>() } + } + + fn as_u32s(&self) -> (&[u8], &[u32], &[u8]) { + unsafe { self.align_to::() } + } +} const CHUNK_SIZE: usize = 32; @@ -78,8 +108,8 @@ pub struct XxHash { total_len: u64, seed: u64, core: XxCore, - buffer: [u8; CHUNK_SIZE], - buffer_usage: usize, + #[cfg_attr(feature="serialize", serde(flatten))] + buffer: Buffer, } impl XxCore { @@ -93,8 +123,9 @@ impl XxCore { } #[inline(always)] - fn ingest_chunks(&mut self, values: I) - where I: Iterator + fn ingest_chunks<'a, I>(&mut self, values: I) + where + I: IntoIterator, { #[inline(always)] fn ingest_one_number(mut current_value: u64, mut value: u64) -> u64 { @@ -113,13 +144,11 @@ impl XxCore { let mut v3 = self.v3; let mut v4 = self.v4; - let mut values = values.peekable(); - - while values.peek().is_some() { - v1 = ingest_one_number(v1, values.next().unwrap()); - v2 = ingest_one_number(v2, values.next().unwrap()); - v3 = ingest_one_number(v3, values.next().unwrap()); - v4 = ingest_one_number(v4, values.next().unwrap()); + for &[n1, n2, n3, n4] in values { + v1 = ingest_one_number(v1, n1); + v2 = ingest_one_number(v2, n2); + v3 = ingest_one_number(v3, n3); + v4 = ingest_one_number(v4, n4); } self.v1 = v1; @@ -170,6 +199,38 @@ impl core::fmt::Debug for XxCore { } } +#[cfg_attr(feature="serialize", derive(Serialize, Deserialize))] +#[derive(Debug, Copy, Clone, Default, PartialEq)] +struct Buffer { + #[cfg_attr(feature="serialize", serde(rename = "buffer"))] + data: [u8; CHUNK_SIZE], + #[cfg_attr(feature="serialize", serde(rename = "buffer_usage"))] + len: usize, +} + +impl Buffer { + fn data(&self) -> &[u8] { + &self.data[..self.len] + } + + /// Consumes as much of the parameter as it can, returning the unused part. + fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { + let to_use = cmp::min(self.available(), data.len()); + let (data, remaining) = data.split_at(to_use); + self.data[self.len..][..to_use].copy_from_slice(data); + self.len += to_use; + remaining + } + + fn available(&self) -> usize { + CHUNK_SIZE - self.len + } + + fn is_full(&self) -> bool { + self.len == CHUNK_SIZE + } +} + impl XxHash { /// Constructs the hash with an initial seed pub fn with_seed(seed: u64) -> XxHash { @@ -177,8 +238,29 @@ impl XxHash { total_len: 0, seed: seed, core: XxCore::with_seed(seed), - buffer: [0; CHUNK_SIZE], - buffer_usage: 0, + buffer: Buffer::default(), + } + } + + fn buffer_bytes(&mut self, mut data: &[u8]) { + while !data.is_empty() { + data = self.buffer.consume(data); + if self.buffer.is_full() { + let (unaligned_head, aligned, unaligned_tail) = + self.buffer.data[..].as_u64_arrays(); + debug_assert!( + unaligned_head.is_empty(), + "buffer was not aligned for 64-bit numbers" + ); + debug_assert_eq!( + aligned.len(), + 1, + "buffer did not have enough 64-bit numbers" + ); + debug_assert!(unaligned_tail.is_empty(), "buffer has trailing data"); + self.core.ingest_chunks(aligned); + self.buffer.len = 0; + } } } } @@ -191,51 +273,19 @@ impl Default for XxHash { impl Hasher for XxHash { fn write(&mut self, bytes: &[u8]) { - let mut bytes = bytes; + let (unaligned_head, aligned, unaligned_tail) = bytes.as_u64_arrays(); - self.total_len += bytes.len() as u64; + self.buffer_bytes(unaligned_head); - // Even with new data, we still don't have a full buffer. Wait - // until we have a full buffer. - if self.buffer_usage + bytes.len() < self.buffer.len() { - unsafe { - let tail = self.buffer.as_mut_ptr().offset(self.buffer_usage as isize); - core::ptr::copy_nonoverlapping(bytes.as_ptr(), tail, bytes.len()); - } - self.buffer_usage += bytes.len(); - return; - } + // Surprisingly, if we still have bytes in the buffer here, we + // don't do anything with them yet! This matches the C + // implementation. - // Some data left from previous update. Fill the buffer and - // consume it first. - if self.buffer_usage > 0 { - let bytes_to_use = self.buffer.len() - self.buffer_usage; - let (to_use, leftover) = bytes.split_at(bytes_to_use); + self.core.ingest_chunks(aligned); - unsafe { - let tail = self.buffer.as_mut_ptr().offset(self.buffer_usage as isize); - core::ptr::copy_nonoverlapping(to_use.as_ptr(), tail, bytes_to_use); - } - - let (iter, _) = self.buffer.u64_stream(); - - self.core.ingest_chunks(iter); - - bytes = leftover; - self.buffer_usage = 0; - } + self.buffer_bytes(unaligned_tail); - // Consume the input data in large chunks - let (iter, bytes) = bytes.u64_stream_with_stride(4); - self.core.ingest_chunks(iter); - - // Save any leftover data for the next call - if bytes.len() > 0 { - unsafe { - core::ptr::copy_nonoverlapping(bytes.as_ptr(), self.buffer.as_mut_ptr(), bytes.len()); - } - self.buffer_usage = bytes.len(); - } + self.total_len += bytes.len() as u64; } fn finish(&self) -> u64 { @@ -250,11 +300,15 @@ impl Hasher for XxHash { hash = hash.wrapping_add(self.total_len); - let buffered = &self.buffer[..self.buffer_usage]; - let (buffered_u64s, buffered) = buffered.u64_stream(); + let buffered = &self.buffer.data(); + let (before, buffered_u64s, buffered) = buffered.as_u64s(); + debug_assert!( + before.is_empty(), + "buffer was not aligned for 64-bit numbers" + ); - for mut k1 in buffered_u64s { - k1 = k1.wrapping_mul(PRIME_2); + for buffered_u64 in buffered_u64s { + let mut k1 = buffered_u64.wrapping_mul(PRIME_2); k1 = k1.rotate_left(31); k1 = k1.wrapping_mul(PRIME_1); hash ^= k1; @@ -263,18 +317,22 @@ impl Hasher for XxHash { hash = hash.wrapping_add(PRIME_4); } - let (buffered_u32s, buffered) = buffered.u32_stream(); + let (before, buffered_u32s, buffered_u8s) = buffered.as_u32s(); + debug_assert!( + before.is_empty(), + "buffer was not aligned for 32-bit numbers" + ); - for k1 in buffered_u32s { - let k1 = (k1 as u64).wrapping_mul(PRIME_1); + for &buffered_u32 in buffered_u32s { + let k1 = (buffered_u32 as u64).wrapping_mul(PRIME_1); hash ^= k1; hash = hash.rotate_left(23); hash = hash.wrapping_mul(PRIME_2); hash = hash.wrapping_add(PRIME_3); } - for buffered_u8 in buffered { - let k1 = (*buffered_u8 as u64).wrapping_mul(PRIME_5); + for &buffered_u8 in buffered_u8s { + let k1 = (buffered_u8 as u64).wrapping_mul(PRIME_5); hash ^= k1; hash = hash.rotate_left(11); hash = hash.wrapping_mul(PRIME_1); diff --git a/src/number_streams.rs b/src/number_streams.rs deleted file mode 100644 index 0240ae4c1..000000000 --- a/src/number_streams.rs +++ /dev/null @@ -1,216 +0,0 @@ -use core::marker::PhantomData; -use core::ptr; - -// We should use {u32,u64}::BYTES when they are stable; -const U32_BYTES: usize = 4; -const U64_BYTES: usize = 8; - -macro_rules! number_stream( - ($name:ident, $number_type:ty, $bytes_in_type:expr) => ( -#[derive(Debug,Copy,Clone)] -pub struct $name<'a> { - start: *const $number_type, - end: *const $number_type, - marker: PhantomData<&'a ()> -} - -impl<'a> $name<'a> { - fn new(bytes: &'a [u8]) -> ($name<'a>, &'a [u8]) { - $name::with_stride(bytes, 1) - } - - // Stride is a grouping of sequential numbers - // - // 0 1 2 3 4 5 6 7 - // ------------- We have 7 bytes - // --- --- --- X Each number takes 2 bytes - // --- --- XXX X With a stride of 2 - // - // Only four bytes will be consumed, the rest returned - fn with_stride(bytes: &'a [u8], stride: usize) -> ($name<'a>, &'a [u8]) { - let n_numbers_in_bytes = bytes.len() / $bytes_in_type; - let n_strides_in_numbers = n_numbers_in_bytes / stride; - let total_numbers = n_strides_in_numbers * stride; - - let (mine, theirs) = bytes.split_at(total_numbers * $bytes_in_type); - - let start = mine.as_ptr() as *const $number_type; - let end = unsafe { start.offset(total_numbers as isize) }; - - let me = $name { - start: start, - end: end, - marker: PhantomData, - }; - - (me, theirs) - } -} - -impl<'a> Iterator for $name<'a> { - type Item = $number_type; - - fn next(&mut self) -> Option<$number_type> { - if self.start >= self.end { return None } - - let v: $number_type = unsafe { ptr::read(self.start) }; - - self.start = unsafe { self.start.offset(1) }; - Some(v) - } - - fn size_hint(&self) -> (usize, Option) { - let cnt = self.count(); - (cnt, Some(cnt)) - } - - fn count(self) -> usize { - let total_bytes = self.end as usize - self.start as usize; - total_bytes / $bytes_in_type - } -} -)); - -number_stream!(U32FromBytes, u32, U32_BYTES); -number_stream!(U64FromBytes, u64, U64_BYTES); - -/// Converts a slice of bytes into an iterator of numbers. -/// -/// The data is always treated as little endian. Only complete ranges -/// of bytes are parsed as the number, any left-over bytes are returned. -// FIXME: Probably doesn't work on big endian machines. -pub trait NumberStreams { - /// Reads u32s from the bytes - fn u32_stream(&self) -> (U32FromBytes, &[u8]); - /// Reads u32s from the bytes, there will always be a multiple of `stride` numbers - fn u32_stream_with_stride(&self, stride: usize) -> (U32FromBytes, &[u8]); - /// Reads u64s from the bytes - fn u64_stream(&self) -> (U64FromBytes, &[u8]); - /// Reads u64s from the bytes, there will always be a multiple of `stride` numbers - fn u64_stream_with_stride(&self, stride: usize) -> (U64FromBytes, &[u8]); -} - -impl<'a> NumberStreams for [u8] { - fn u32_stream(&self) -> (U32FromBytes, &[u8]) { - U32FromBytes::new(self) - } - fn u32_stream_with_stride(&self, stride: usize) -> (U32FromBytes, &[u8]) { - U32FromBytes::with_stride(self, stride) - } - fn u64_stream(&self) -> (U64FromBytes, &[u8]) { - U64FromBytes::new(self) - } - fn u64_stream_with_stride(&self, stride: usize) -> (U64FromBytes, &[u8]) { - U64FromBytes::with_stride(self, stride) - } -} - -#[cfg(test)] -mod test { - use std::prelude::v1::*; - use ::std::slice; - use super::{U32_BYTES,U64_BYTES}; - use super::NumberStreams; - - fn u32_slice_as_u8(values: &[u32]) -> &[u8] { - unsafe { - slice::from_raw_parts( - values.as_ptr() as *const u8, - values.len() * U32_BYTES, - ) - } - } - - #[test] - fn can_read_u32_from_bytes() { - let orig_values: &[u32] = &[0,1]; - let as_u8 = u32_slice_as_u8(orig_values); - - let (iter, rest) = as_u8.u32_stream(); - let values: Vec<_> = iter.collect(); - - assert_eq!(&values[..], &orig_values[..]); - assert!(rest.is_empty()); - } - - #[test] - fn can_read_u32_from_bytes_with_leftovers() { - let orig_values: &[u32] = &[0,1]; - let mut as_u8: Vec<_> = u32_slice_as_u8(orig_values).into(); - as_u8.push(42); - - let (iter, rest) = as_u8.u32_stream(); - let values: Vec<_> = iter.collect(); - - assert_eq!(&values[..], &orig_values[..]); - assert_eq!(rest, [42]); - } - - #[test] - fn can_read_u32_from_bytes_with_a_stride() { - let orig_values: &[u32] = &[0,1,2]; - let as_u8 = u32_slice_as_u8(orig_values); - - let (iter, rest) = as_u8.u32_stream_with_stride(2); - let values: Vec<_> = iter.collect(); - - assert_eq!(&values[..], &orig_values[..2]); - - let (iter, rest) = rest.u32_stream(); - let values: Vec<_> = iter.collect(); - - assert_eq!(&values[..], &orig_values[2..]); - assert!(rest.is_empty()); - } - - fn u64_slice_as_u8(values: &[u64]) -> &[u8] { - unsafe { - slice::from_raw_parts( - values.as_ptr() as *const u8, - values.len() * U64_BYTES, - ) - } - } - - #[test] - fn can_read_u64_from_bytes() { - let orig_values: &[u64] = &[0,1]; - let as_u8 = u64_slice_as_u8(orig_values); - - let (iter, rest) = as_u8.u64_stream(); - let values: Vec<_> = iter.collect(); - - assert_eq!(&values[..], &orig_values[..]); - assert!(rest.is_empty()); - } - - #[test] - fn can_read_u64_from_bytes_with_leftovers() { - let orig_values: &[u64] = &[0,1]; - let mut as_u8: Vec<_> = u64_slice_as_u8(orig_values).into(); - as_u8.push(42); - - let (iter, rest) = as_u8.u64_stream(); - let values: Vec<_> = iter.collect(); - - assert_eq!(&values[..], &orig_values[..]); - assert_eq!(rest, [42]); - } - - #[test] - fn can_read_u64_from_bytes_with_a_stride() { - let orig_values: &[u64] = &[0,1,2]; - let as_u8 = u64_slice_as_u8(orig_values); - - let (iter, rest) = as_u8.u64_stream_with_stride(2); - let values: Vec<_> = iter.collect(); - - assert_eq!(&values[..], &orig_values[..2]); - - let (iter, rest) = rest.u64_stream(); - let values: Vec<_> = iter.collect(); - - assert_eq!(&values[..], &orig_values[2..]); - assert!(rest.is_empty()); - } -} diff --git a/src/thirty_two.rs b/src/thirty_two.rs index 950cf9c13..af3b5fdb3 100644 --- a/src/thirty_two.rs +++ b/src/thirty_two.rs @@ -1,7 +1,5 @@ -use core; - -use core::hash::Hasher; -use number_streams::NumberStreams; +use TransmutingByteSlices; +use core::{self, cmp, hash::Hasher}; #[cfg(feature="serialize")] use serde::{Serialize, Deserialize}; @@ -35,8 +33,8 @@ pub struct XxHash { total_len: u32, seed: u32, core: XxCore, - buffer: [u8; CHUNK_SIZE], - buffer_usage: usize, + #[cfg_attr(feature="serialize", serde(flatten))] + buffer: Buffer, } impl XxCore { @@ -50,8 +48,9 @@ impl XxCore { } #[inline(always)] - fn ingest_chunks(&mut self, values: I) - where I: Iterator + fn ingest_chunks<'a, I>(&mut self, values: I) + where + I: IntoIterator, { #[inline(always)] fn ingest_one_number(mut current_value: u32, mut value: u32) -> u32 { @@ -70,13 +69,11 @@ impl XxCore { let mut v3 = self.v3; let mut v4 = self.v4; - let mut values = values.peekable(); - - while values.peek().is_some() { - v1 = ingest_one_number(v1, values.next().unwrap()); - v2 = ingest_one_number(v2, values.next().unwrap()); - v3 = ingest_one_number(v3, values.next().unwrap()); - v4 = ingest_one_number(v4, values.next().unwrap()); + for &[n1, n2, n3, n4] in values { + v1 = ingest_one_number(v1, n1); + v2 = ingest_one_number(v2, n2); + v3 = ingest_one_number(v3, n3); + v4 = ingest_one_number(v4, n4); } self.v1 = v1; @@ -112,6 +109,38 @@ impl core::fmt::Debug for XxCore { } } +#[cfg_attr(feature="serialize", derive(Serialize, Deserialize))] +#[derive(Debug, Copy, Clone, Default, PartialEq)] +struct Buffer { + #[cfg_attr(feature="serialize", serde(rename = "buffer"))] + data: [u8; CHUNK_SIZE], + #[cfg_attr(feature="serialize", serde(rename = "buffer_usage"))] + len: usize, +} + +impl Buffer { + fn data(&self) -> &[u8] { + &self.data[..self.len] + } + + /// Consumes as much of the parameter as it can, returning the unused part. + fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { + let to_use = cmp::min(self.available(), data.len()); + let (data, remaining) = data.split_at(to_use); + self.data[self.len..][..to_use].copy_from_slice(data); + self.len += to_use; + remaining + } + + fn available(&self) -> usize { + CHUNK_SIZE - self.len + } + + fn is_full(&self) -> bool { + self.len == CHUNK_SIZE + } +} + impl XxHash { /// Constructs the hash with an initial seed pub fn with_seed(seed: u32) -> XxHash { @@ -119,8 +148,28 @@ impl XxHash { total_len: 0, seed: seed, core: XxCore::with_seed(seed), - buffer: [0; CHUNK_SIZE], - buffer_usage: 0, + buffer: Buffer::default(), + } + } + + fn buffer_bytes(&mut self, mut data: &[u8]) { + while !data.is_empty() { + data = self.buffer.consume(data); + if self.buffer.is_full() { + let (unaligned_head, aligned, unaligned_tail) = self.buffer.data.as_u32_arrays(); + debug_assert!( + unaligned_head.is_empty(), + "buffer was not aligned for 64-bit numbers" + ); + debug_assert_eq!( + aligned.len(), + 1, + "buffer did not have enough 64-bit numbers" + ); + debug_assert!(unaligned_tail.is_empty(), "buffer has trailing data"); + self.core.ingest_chunks(aligned); + self.buffer.len = 0; + } } } } @@ -133,51 +182,19 @@ impl Default for XxHash { impl Hasher for XxHash { fn write(&mut self, bytes: &[u8]) { - let mut bytes = bytes; - - self.total_len += bytes.len() as u32; + let (unaligned_head, aligned, unaligned_tail) = bytes.as_u32_arrays(); - // Even with new data, we still don't have a full buffer. Wait - // until we have a full buffer. - if self.buffer_usage + bytes.len() < self.buffer.len() { - unsafe { - let tail = self.buffer.as_mut_ptr().offset(self.buffer_usage as isize); - core::ptr::copy_nonoverlapping(bytes.as_ptr(), tail, bytes.len()); - } - self.buffer_usage += bytes.len(); - return; - } + self.buffer_bytes(unaligned_head); - // Some data left from previous update. Fill the buffer and - // consume it first. - if self.buffer_usage > 0 { - let bytes_to_use = self.buffer.len() - self.buffer_usage; - let (to_use, leftover) = bytes.split_at(bytes_to_use); + // Surprisingly, if we still have bytes in the buffer here, we + // don't do anything with them yet! This matches the C + // implementation. - unsafe { - let tail = self.buffer.as_mut_ptr().offset(self.buffer_usage as isize); - core::ptr::copy_nonoverlapping(to_use.as_ptr(), tail, bytes_to_use); - } + self.core.ingest_chunks(aligned); - let (iter, _) = self.buffer.u32_stream(); + self.buffer_bytes(unaligned_tail); - self.core.ingest_chunks(iter); - - bytes = leftover; - self.buffer_usage = 0; - } - - // Consume the input data in large chunks - let (iter, bytes) = bytes.u32_stream_with_stride(4); - self.core.ingest_chunks(iter); - - // Save any leftover data for the next call - if bytes.len() > 0 { - unsafe { - core::ptr::copy_nonoverlapping(bytes.as_ptr(), self.buffer.as_mut_ptr(), bytes.len()); - } - self.buffer_usage = bytes.len(); - } + self.total_len += bytes.len() as u32; } fn finish(&self) -> u64 { // NODIFF @@ -192,17 +209,21 @@ impl Hasher for XxHash { hash = hash.wrapping_add(self.total_len); - let buffered = &self.buffer[..self.buffer_usage]; - let (buffered_u32s, buffered) = buffered.u32_stream(); + let buffered = self.buffer.data(); + let (before, buffered_u32s, buffered_u8s) = buffered.as_u32s(); + debug_assert!( + before.is_empty(), + "buffer was not aligned for 32-bit numbers" + ); - for k1 in buffered_u32s { - let k1 = k1.wrapping_mul(PRIME_3); + for buffered_u32 in buffered_u32s { + let k1 = buffered_u32.wrapping_mul(PRIME_3); hash = hash.wrapping_add(k1); hash = hash.rotate_left(17); hash = hash.wrapping_mul(PRIME_4); } - for buffered_u8 in buffered { + for buffered_u8 in buffered_u8s { let k1 = (*buffered_u8 as u32).wrapping_mul(PRIME_5); hash = hash.wrapping_add(k1); hash = hash.rotate_left(11);