From e39ad3b169c620524a31a8d652203065e2bddfad Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 7 Aug 2023 15:43:37 +0200 Subject: [PATCH] Move `arrow-rs` interop to `arrow` module --- Cargo.toml | 3 +- src/array/fixed_size_primitive.rs | 88 ------------------------- src/arrow/array/fixed_size_primitive.rs | 76 +++++++++++++++++++++ src/arrow/array/mod.rs | 1 + src/arrow/bitmap.rs | 30 +++++++++ src/arrow/buffer.rs | 57 ++++++++++++++++ src/arrow/length.rs | 14 ++++ src/arrow/mod.rs | 11 ++++ src/bitmap/mod.rs | 39 +---------- src/buffer.rs | 60 ----------------- src/fixed_size.rs | 31 +++++---- src/length.rs | 12 ---- src/lib.rs | 3 + 13 files changed, 216 insertions(+), 209 deletions(-) create mode 100644 src/arrow/array/fixed_size_primitive.rs create mode 100644 src/arrow/array/mod.rs create mode 100644 src/arrow/bitmap.rs create mode 100644 src/arrow/buffer.rs create mode 100644 src/arrow/length.rs create mode 100644 src/arrow/mod.rs diff --git a/Cargo.toml b/Cargo.toml index d2af3cd1..0ac30eff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,8 @@ keywords.workspace = true categories.workspace = true [features] -default = ["arrow-array", "arrow-buffer", "derive", "unsafe"] +default = ["arrow", "derive", "unsafe"] +arrow = ["arrow-array", "arrow-buffer"] arrow-array = ["dep:arrow-array", "arrow-buffer"] arrow-buffer = ["dep:arrow-buffer"] derive = ["dep:narrow-derive"] diff --git a/src/array/fixed_size_primitive.rs b/src/array/fixed_size_primitive.rs index 2638b0ab..33329acd 100644 --- a/src/array/fixed_size_primitive.rs +++ b/src/array/fixed_size_primitive.rs @@ -136,77 +136,6 @@ impl BitmapRefMut for FixedSizePrimitiveArray< impl ValidityBitmap for FixedSizePrimitiveArray {} -#[cfg(feature = "arrow-array")] -mod arrow { - use super::FixedSizePrimitiveArray; - use crate::{ - bitmap::Bitmap, - buffer::{ArrowBuffer, BufferType}, - FixedSize, Length, - }; - use arrow_array::{types::ArrowPrimitiveType, PrimitiveArray}; - use arrow_buffer::{BooleanBuffer, NullBuffer, ScalarBuffer}; - - impl, Buffer: BufferType> - From> for PrimitiveArray - where - ::Buffer: Length + Into<::Buffer>, - { - fn from(value: FixedSizePrimitiveArray) -> Self { - let len = value.len(); - Self::new(ScalarBuffer::new(value.0.into().finish(), 0, len), None) - } - } - - impl, Buffer: BufferType> - From> for PrimitiveArray - where - ::Buffer: Length + Into<::Buffer>, - Bitmap: Into, - { - fn from(value: FixedSizePrimitiveArray) -> Self { - let len = value.len(); - Self::new( - ScalarBuffer::new(value.0.data.into().finish(), 0, len), - Some(NullBuffer::new(value.0.validity.into())), - ) - } - } - - #[cfg(test)] - mod test { - - #[test] - #[cfg(feature = "arrow-array")] - fn arrow_array() { - use crate::{array::Int8Array, bitmap::ValidityBitmap, buffer::ArrowBuffer}; - use arrow_array::{types::Int8Type, Array, PrimitiveArray}; - - let input = [1, 2, 3, 4]; - let array = input.into_iter().collect::>(); - let array = PrimitiveArray::::from(array); - assert_eq!(array.len(), 4); - - let input = [1, 2, 3, 4]; - let array = input.into_iter().collect::>(); - let array = PrimitiveArray::::from(array); - assert_eq!(array.len(), 4); - - let input = [Some(1), None, Some(3), Some(4)]; - let array = input.into_iter().collect::>(); - assert_eq!(array.null_count(), 1); - let array = PrimitiveArray::::from(array); - assert_eq!(array.len(), 4); - assert_eq!(array.null_count(), 1); - } - - #[test] - fn convert() {} - } -} - -pub use arrow::*; - #[cfg(test)] mod tests { use super::*; @@ -276,21 +205,4 @@ mod tests { mem::size_of::() + mem::size_of::() ); } - - #[test] - #[cfg(feature = "arrow-buffer")] - fn arrow_buffer() { - use crate::buffer::ArrowBuffer; - - let input = [1, 2, 3, 4]; - let mut array = input.into_iter().collect::>(); - assert_eq!(array.len(), 4); - // Use arrow_buffer - array.0.append_n(5, 5); - assert_eq!(array.0.as_slice(), &[1, 2, 3, 4, 5, 5, 5, 5, 5]); - - let input = [Some(1), None, Some(3), Some(4)]; - let array = input.into_iter().collect::>(); - assert_eq!(array.len(), 4); - } } diff --git a/src/arrow/array/fixed_size_primitive.rs b/src/arrow/array/fixed_size_primitive.rs new file mode 100644 index 00000000..b8e909b0 --- /dev/null +++ b/src/arrow/array/fixed_size_primitive.rs @@ -0,0 +1,76 @@ +use crate::{ + array::FixedSizePrimitiveArray, arrow::buffer::ArrowBuffer, bitmap::Bitmap, buffer::BufferType, + FixedSize, Length, +}; +use arrow_array::{types::ArrowPrimitiveType, PrimitiveArray}; +use arrow_buffer::{BooleanBuffer, NullBuffer, ScalarBuffer}; + +impl, Buffer: BufferType> + From> for PrimitiveArray +where + ::Buffer: Length + Into<::Buffer>, +{ + fn from(value: FixedSizePrimitiveArray) -> Self { + let len = value.len(); + Self::new(ScalarBuffer::new(value.0.into().finish(), 0, len), None) + } +} + +impl, Buffer: BufferType> + From> for PrimitiveArray +where + ::Buffer: Length + Into<::Buffer>, + Bitmap: Into, +{ + fn from(value: FixedSizePrimitiveArray) -> Self { + let len = value.len(); + Self::new( + ScalarBuffer::new(value.0.data.into().finish(), 0, len), + Some(NullBuffer::new(value.0.validity.into())), + ) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::array::Int8Array; + + #[test] + #[cfg(feature = "arrow-array")] + fn arrow_array() { + use crate::{array::Int8Array, bitmap::ValidityBitmap}; + use arrow_array::{types::Int8Type, Array, PrimitiveArray}; + + let input = [1, 2, 3, 4]; + let array = input.into_iter().collect::>(); + let array = PrimitiveArray::::from(array); + assert_eq!(array.len(), 4); + + let input = [1, 2, 3, 4]; + let array = input.into_iter().collect::>(); + let array = PrimitiveArray::::from(array); + assert_eq!(array.len(), 4); + + let input = [Some(1), None, Some(3), Some(4)]; + let array = input.into_iter().collect::>(); + assert_eq!(array.null_count(), 1); + let array = PrimitiveArray::::from(array); + assert_eq!(array.len(), 4); + assert_eq!(array.null_count(), 1); + } + + #[test] + fn arrow_buffer() { + let input = [1, 2, 3, 4]; + let mut array = input.into_iter().collect::>(); + assert_eq!(array.len(), 4); + // Use arrow_buffer + array.0.append_n(5, 5); + assert_eq!(array.0.as_slice(), &[1, 2, 3, 4, 5, 5, 5, 5, 5]); + + let input = [Some(1), None, Some(3), Some(4)]; + let array = input.into_iter().collect::>(); + assert_eq!(array.len(), 4); + } +} diff --git a/src/arrow/array/mod.rs b/src/arrow/array/mod.rs new file mode 100644 index 00000000..8c4f437f --- /dev/null +++ b/src/arrow/array/mod.rs @@ -0,0 +1 @@ +pub mod fixed_size_primitive; diff --git a/src/arrow/bitmap.rs b/src/arrow/bitmap.rs new file mode 100644 index 00000000..7f7fe51d --- /dev/null +++ b/src/arrow/bitmap.rs @@ -0,0 +1,30 @@ +use super::buffer::ArrowBuffer; +use crate::{bitmap::Bitmap, buffer::BufferType}; +use arrow_buffer::BooleanBuffer; + +impl From> for BooleanBuffer +where + ::Buffer: Into<::Buffer>, +{ + fn from(value: Bitmap) -> Self { + BooleanBuffer::new(value.buffer.into().finish(), value.offset, value.bits) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::Length; + + #[test] + fn arrow_buffer() { + let input = vec![true, false, true]; + let bitmap = input.into_iter().collect::>(); + assert_eq!(bitmap.len(), 3); + + let input = vec![true, false, true]; + let bitmap = input.into_iter().collect::>(); + assert_eq!(bitmap.len(), 3); + assert_eq!(bitmap.into_iter().collect::>(), [true, false, true]); + } +} diff --git a/src/arrow/buffer.rs b/src/arrow/buffer.rs new file mode 100644 index 00000000..9eb4296e --- /dev/null +++ b/src/arrow/buffer.rs @@ -0,0 +1,57 @@ +use crate::buffer::{Buffer, BufferMut, BufferType}; +use crate::FixedSize; +use arrow_buffer::{ArrowNativeType, BufferBuilder, ScalarBuffer}; + +/// A [BufferType] implementation for [arrow_buffer::BufferBuilder]. +pub struct ArrowBuffer; + +impl BufferType for ArrowBuffer { + type Buffer = BufferBuilder; +} + +impl Buffer for BufferBuilder { + fn as_slice(&self) -> &[T] { + BufferBuilder::as_slice(self) + } +} + +impl BufferMut for BufferBuilder { + fn as_mut_slice(&mut self) -> &mut [T] { + BufferBuilder::as_slice_mut(self) + } +} + +/// A [BufferType] implementation for [arrow_buffer::ScalarBuffer]. +pub struct ArrowScalarBuffer; + +impl BufferType for ArrowScalarBuffer { + type Buffer = ScalarBuffer; +} + +impl Buffer for ScalarBuffer { + fn as_slice(&self) -> &[T] { + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn arrow() { + let buffer = arrow_buffer::BufferBuilder::from_iter([1, 2, 3, 4]); + assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); + + let mut buffer = arrow_buffer::BufferBuilder::from_iter([1u64, 2, 3, 4]); + assert_eq!( + <_ as BufferMut>::as_mut_slice(&mut buffer), + &[1, 2, 3, 4] + ); + <_ as BufferMut>::as_mut_slice(&mut buffer)[3] = 42; + assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 42]); + + let buffer = arrow_buffer::ScalarBuffer::from_iter([1, 2, 3, 4]); + assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); + } +} diff --git a/src/arrow/length.rs b/src/arrow/length.rs new file mode 100644 index 00000000..bed0ca59 --- /dev/null +++ b/src/arrow/length.rs @@ -0,0 +1,14 @@ +use crate::Length; +use arrow_buffer::{ArrowNativeType, BufferBuilder, ScalarBuffer}; + +impl Length for BufferBuilder { + fn len(&self) -> usize { + BufferBuilder::len(self) + } +} + +impl Length for ScalarBuffer { + fn len(&self) -> usize { + self.as_ref().len() + } +} diff --git a/src/arrow/mod.rs b/src/arrow/mod.rs new file mode 100644 index 00000000..89d442ea --- /dev/null +++ b/src/arrow/mod.rs @@ -0,0 +1,11 @@ +#[cfg(feature = "arrow-array")] +pub mod array; + +#[cfg(feature = "arrow-buffer")] +pub mod bitmap; + +#[cfg(feature = "arrow-buffer")] +pub mod buffer; + +#[cfg(feature = "arrow-buffer")] +pub mod length; diff --git a/src/bitmap/mod.rs b/src/bitmap/mod.rs index ae44cdf3..36aa5e11 100644 --- a/src/bitmap/mod.rs +++ b/src/bitmap/mod.rs @@ -42,14 +42,14 @@ pub trait BitmapRefMut: BitmapRef { // todo(mb): implement ops pub struct Bitmap { /// The bits are stored in this buffer of bytes. - buffer: ::Buffer, + pub(crate) buffer: ::Buffer, /// The number of bits stored in the bitmap. - bits: usize, + pub(crate) bits: usize, /// An offset (in number of bits) in the buffer. This enables zero-copy /// slicing of the bitmap on non-byte boundaries. - offset: usize, + pub(crate) offset: usize, } impl BitmapRef for Bitmap { @@ -299,24 +299,6 @@ impl Length for Bitmap { impl ValidityBitmap for Bitmap {} -#[cfg(feature = "arrow-buffer")] -mod arrow { - use super::Bitmap; - use crate::buffer::{ArrowBuffer, BufferType}; - use arrow_buffer::BooleanBuffer; - - impl From> for BooleanBuffer - where - ::Buffer: Into<::Buffer>, - { - fn from(value: Bitmap) -> Self { - BooleanBuffer::new(value.buffer.into().finish(), 0, value.bits) - } - } -} - -pub use arrow::*; - #[cfg(test)] mod tests { use crate::buffer::{ArrayBuffer, BoxBuffer, BufferRefMut, SliceBuffer}; @@ -489,19 +471,4 @@ mod tests { mem::size_of::>() + 2 * mem::size_of::() ); } - - #[test] - #[cfg(feature = "arrow-buffer")] - fn arrow_buffer() { - use crate::buffer::ArrowBuffer; - - let input = vec![true, false, true]; - let bitmap = input.into_iter().collect::>(); - assert_eq!(bitmap.len(), 3); - - let input = vec![true, false, true]; - let bitmap = input.into_iter().collect::>(); - assert_eq!(bitmap.len(), 3); - assert_eq!(bitmap.into_iter().collect::>(), [true, false, true]); - } } diff --git a/src/buffer.rs b/src/buffer.rs index fbca21b9..b01f603b 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -358,48 +358,6 @@ impl BufferMut for Rc<[T]> { } } -#[cfg(feature = "arrow-buffer")] -mod arrow { - use super::{Buffer, BufferMut, BufferType}; - use crate::FixedSize; - use arrow_buffer::{ArrowNativeType, BufferBuilder, ScalarBuffer}; - - /// A [BufferType] implementation for [arrow_buffer::BufferBuilder]. - pub struct ArrowBuffer; - - impl BufferType for ArrowBuffer { - type Buffer = BufferBuilder; - } - - impl Buffer for BufferBuilder { - fn as_slice(&self) -> &[T] { - BufferBuilder::as_slice(self) - } - } - - impl BufferMut for BufferBuilder { - fn as_mut_slice(&mut self) -> &mut [T] { - BufferBuilder::as_slice_mut(self) - } - } - - /// A [BufferType] implementation for [arrow_buffer::ScalarBuffer]. - pub struct ArrowScalarBuffer; - - impl BufferType for ArrowScalarBuffer { - type Buffer = ScalarBuffer; - } - - impl Buffer for ScalarBuffer { - fn as_slice(&self) -> &[T] { - self - } - } -} - -#[cfg(feature = "arrow-buffer")] -pub use arrow::*; - #[cfg(test)] mod tests { use super::*; @@ -464,22 +422,4 @@ mod tests { &[0, 2, 3, 4, 5, 6] ); } - - #[test] - #[cfg(feature = "arrow-buffer")] - fn arrow() { - let buffer = arrow_buffer::BufferBuilder::from_iter([1, 2, 3, 4]); - assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); - - let mut buffer = arrow_buffer::BufferBuilder::from_iter([1u64, 2, 3, 4]); - assert_eq!( - <_ as BufferMut>::as_mut_slice(&mut buffer), - &[1, 2, 3, 4] - ); - <_ as BufferMut>::as_mut_slice(&mut buffer)[3] = 42; - assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 42]); - - let buffer = arrow_buffer::ScalarBuffer::from_iter([1, 2, 3, 4]); - assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); - } } diff --git a/src/fixed_size.rs b/src/fixed_size.rs index 015f123e..129b216c 100644 --- a/src/fixed_size.rs +++ b/src/fixed_size.rs @@ -3,6 +3,7 @@ use crate::array::ArrayType; use std::{fmt::Debug, mem}; +#[cfg(not(feature = "arrow-buffer"))] /// Subtrait for fixed-size types. /// /// This exists to be used as trait bound where one or more of the supertraits @@ -10,20 +11,34 @@ use std::{fmt::Debug, mem}; /// fixed-size types. /// /// This trait is sealed to prevent downstream implementations. -#[cfg(not(feature = "arrow-buffer"))] pub trait FixedSize: ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static { /// The fixed-size of this type in bytes. const SIZE: usize = mem::size_of::(); } #[cfg(feature = "arrow-buffer")] +/// Subtrait for fixed-size types. +/// +/// This exists to be used as trait bound where one or more of the supertraits +/// of this trait are required, and to restrict certain implementations to +/// fixed-size types. +/// +/// This trait is sealed to prevent downstream implementations. pub trait FixedSize: - arrow_buffer::ArrowNativeType + ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static + ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static + arrow_buffer::ArrowNativeType { /// The fixed-size of this type in bytes. const SIZE: usize = mem::size_of::(); } +mod sealed { + /// Used to seal [super::FixedSize]. + pub trait Sealed {} + + // Prevent downstream implementation of [super::FixedSize]. + impl Sealed for T where T: super::FixedSize {} +} + impl FixedSize for i8 {} impl FixedSize for i16 {} impl FixedSize for i32 {} @@ -43,19 +58,11 @@ impl FixedSize for f64 {} impl FixedSize for () {} -impl FixedSize for [T; N] {} - -mod sealed { - /// Used to seal [super::FixedSize]. - pub trait Sealed {} - - // Prevent downstream implementation of [super::FixedSize]. - impl Sealed for T where T: super::FixedSize {} -} +impl FixedSize for [T; N] {} #[cfg(test)] mod tests { - use crate::FixedSize; + use super::FixedSize; #[test] fn size() { diff --git a/src/length.rs b/src/length.rs index 6b6f904b..f685fd7d 100644 --- a/src/length.rs +++ b/src/length.rs @@ -90,15 +90,3 @@ impl Length for Option { } } } - -#[cfg(feature = "arrow-buffer")] -mod arrow { - use crate::Length; - use arrow_buffer::{ArrowNativeType, BufferBuilder}; - - impl Length for BufferBuilder { - fn len(&self) -> usize { - BufferBuilder::len(self) - } - } -} diff --git a/src/lib.rs b/src/lib.rs index ab19d8a5..c9caa723 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,6 +29,9 @@ pub(crate) mod validity; pub mod array; +#[cfg(any(feature = "arrow-array", feature = "arrow-buffer"))] +pub mod arrow; + // Re-export `narrow_derive` macros when the `derive` feature is enabled. #[cfg(feature = "derive")] pub use narrow_derive::ArrayType;