Skip to content

Commit

Permalink
Move arrow-rs interop to arrow module
Browse files Browse the repository at this point in the history
  • Loading branch information
mbrobbel committed Aug 7, 2023
1 parent c5c09f0 commit e39ad3b
Show file tree
Hide file tree
Showing 13 changed files with 216 additions and 209 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ keywords.workspace = true
categories.workspace = true

[features]
default = ["arrow-array", "arrow-buffer", "derive", "unsafe"]
default = ["arrow", "derive", "unsafe"]
arrow = ["arrow-array", "arrow-buffer"]
arrow-array = ["dep:arrow-array", "arrow-buffer"]
arrow-buffer = ["dep:arrow-buffer"]
derive = ["dep:narrow-derive"]
Expand Down
88 changes: 0 additions & 88 deletions src/array/fixed_size_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,77 +136,6 @@ impl<T: FixedSize, Buffer: BufferType> BitmapRefMut for FixedSizePrimitiveArray<

impl<T: FixedSize, Buffer: BufferType> ValidityBitmap for FixedSizePrimitiveArray<T, true, Buffer> {}

#[cfg(feature = "arrow-array")]
mod arrow {
use super::FixedSizePrimitiveArray;
use crate::{
bitmap::Bitmap,
buffer::{ArrowBuffer, BufferType},
FixedSize, Length,
};
use arrow_array::{types::ArrowPrimitiveType, PrimitiveArray};
use arrow_buffer::{BooleanBuffer, NullBuffer, ScalarBuffer};

impl<T: FixedSize, U: ArrowPrimitiveType<Native = T>, Buffer: BufferType>
From<FixedSizePrimitiveArray<T, false, Buffer>> for PrimitiveArray<U>
where
<Buffer as BufferType>::Buffer<T>: Length + Into<<ArrowBuffer as BufferType>::Buffer<T>>,
{
fn from(value: FixedSizePrimitiveArray<T, false, Buffer>) -> Self {
let len = value.len();
Self::new(ScalarBuffer::new(value.0.into().finish(), 0, len), None)
}
}

impl<T: FixedSize, U: ArrowPrimitiveType<Native = T>, Buffer: BufferType>
From<FixedSizePrimitiveArray<T, true, Buffer>> for PrimitiveArray<U>
where
<Buffer as BufferType>::Buffer<T>: Length + Into<<ArrowBuffer as BufferType>::Buffer<T>>,
Bitmap<Buffer>: Into<BooleanBuffer>,
{
fn from(value: FixedSizePrimitiveArray<T, true, Buffer>) -> Self {
let len = value.len();
Self::new(
ScalarBuffer::new(value.0.data.into().finish(), 0, len),
Some(NullBuffer::new(value.0.validity.into())),
)
}
}

#[cfg(test)]
mod test {

#[test]
#[cfg(feature = "arrow-array")]
fn arrow_array() {
use crate::{array::Int8Array, bitmap::ValidityBitmap, buffer::ArrowBuffer};
use arrow_array::{types::Int8Type, Array, PrimitiveArray};

let input = [1, 2, 3, 4];
let array = input.into_iter().collect::<Int8Array<false, ArrowBuffer>>();
let array = PrimitiveArray::<Int8Type>::from(array);
assert_eq!(array.len(), 4);

let input = [1, 2, 3, 4];
let array = input.into_iter().collect::<Int8Array<false>>();
let array = PrimitiveArray::<Int8Type>::from(array);
assert_eq!(array.len(), 4);

let input = [Some(1), None, Some(3), Some(4)];
let array = input.into_iter().collect::<Int8Array<true, ArrowBuffer>>();
assert_eq!(array.null_count(), 1);
let array = PrimitiveArray::<Int8Type>::from(array);
assert_eq!(array.len(), 4);
assert_eq!(array.null_count(), 1);
}

#[test]
fn convert() {}
}
}

pub use arrow::*;

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -276,21 +205,4 @@ mod tests {
mem::size_of::<Int8Array>() + mem::size_of::<Bitmap>()
);
}

#[test]
#[cfg(feature = "arrow-buffer")]
fn arrow_buffer() {
use crate::buffer::ArrowBuffer;

let input = [1, 2, 3, 4];
let mut array = input.into_iter().collect::<Int8Array<false, ArrowBuffer>>();
assert_eq!(array.len(), 4);
// Use arrow_buffer
array.0.append_n(5, 5);
assert_eq!(array.0.as_slice(), &[1, 2, 3, 4, 5, 5, 5, 5, 5]);

let input = [Some(1), None, Some(3), Some(4)];
let array = input.into_iter().collect::<Int8Array<true, ArrowBuffer>>();
assert_eq!(array.len(), 4);
}
}
76 changes: 76 additions & 0 deletions src/arrow/array/fixed_size_primitive.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use crate::{
array::FixedSizePrimitiveArray, arrow::buffer::ArrowBuffer, bitmap::Bitmap, buffer::BufferType,
FixedSize, Length,
};
use arrow_array::{types::ArrowPrimitiveType, PrimitiveArray};
use arrow_buffer::{BooleanBuffer, NullBuffer, ScalarBuffer};

impl<T: FixedSize, U: ArrowPrimitiveType<Native = T>, Buffer: BufferType>
From<FixedSizePrimitiveArray<T, false, Buffer>> for PrimitiveArray<U>
where
<Buffer as BufferType>::Buffer<T>: Length + Into<<ArrowBuffer as BufferType>::Buffer<T>>,
{
fn from(value: FixedSizePrimitiveArray<T, false, Buffer>) -> Self {
let len = value.len();
Self::new(ScalarBuffer::new(value.0.into().finish(), 0, len), None)
}
}

impl<T: FixedSize, U: ArrowPrimitiveType<Native = T>, Buffer: BufferType>
From<FixedSizePrimitiveArray<T, true, Buffer>> for PrimitiveArray<U>
where
<Buffer as BufferType>::Buffer<T>: Length + Into<<ArrowBuffer as BufferType>::Buffer<T>>,
Bitmap<Buffer>: Into<BooleanBuffer>,
{
fn from(value: FixedSizePrimitiveArray<T, true, Buffer>) -> Self {
let len = value.len();
Self::new(
ScalarBuffer::new(value.0.data.into().finish(), 0, len),
Some(NullBuffer::new(value.0.validity.into())),
)
}
}

#[cfg(test)]
mod test {
use super::*;
use crate::array::Int8Array;

#[test]
#[cfg(feature = "arrow-array")]
fn arrow_array() {
use crate::{array::Int8Array, bitmap::ValidityBitmap};
use arrow_array::{types::Int8Type, Array, PrimitiveArray};

let input = [1, 2, 3, 4];
let array = input.into_iter().collect::<Int8Array<false, ArrowBuffer>>();
let array = PrimitiveArray::<Int8Type>::from(array);
assert_eq!(array.len(), 4);

let input = [1, 2, 3, 4];
let array = input.into_iter().collect::<Int8Array<false>>();
let array = PrimitiveArray::<Int8Type>::from(array);
assert_eq!(array.len(), 4);

let input = [Some(1), None, Some(3), Some(4)];
let array = input.into_iter().collect::<Int8Array<true, ArrowBuffer>>();
assert_eq!(array.null_count(), 1);
let array = PrimitiveArray::<Int8Type>::from(array);
assert_eq!(array.len(), 4);
assert_eq!(array.null_count(), 1);
}

#[test]
fn arrow_buffer() {
let input = [1, 2, 3, 4];
let mut array = input.into_iter().collect::<Int8Array<false, ArrowBuffer>>();
assert_eq!(array.len(), 4);
// Use arrow_buffer
array.0.append_n(5, 5);
assert_eq!(array.0.as_slice(), &[1, 2, 3, 4, 5, 5, 5, 5, 5]);

let input = [Some(1), None, Some(3), Some(4)];
let array = input.into_iter().collect::<Int8Array<true, ArrowBuffer>>();
assert_eq!(array.len(), 4);
}
}
1 change: 1 addition & 0 deletions src/arrow/array/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod fixed_size_primitive;
30 changes: 30 additions & 0 deletions src/arrow/bitmap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use super::buffer::ArrowBuffer;
use crate::{bitmap::Bitmap, buffer::BufferType};
use arrow_buffer::BooleanBuffer;

impl<Buffer: BufferType> From<Bitmap<Buffer>> for BooleanBuffer
where
<Buffer as BufferType>::Buffer<u8>: Into<<ArrowBuffer as BufferType>::Buffer<u8>>,
{
fn from(value: Bitmap<Buffer>) -> Self {
BooleanBuffer::new(value.buffer.into().finish(), value.offset, value.bits)
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::Length;

#[test]
fn arrow_buffer() {
let input = vec![true, false, true];
let bitmap = input.into_iter().collect::<Bitmap<ArrowBuffer>>();
assert_eq!(bitmap.len(), 3);

let input = vec![true, false, true];
let bitmap = input.into_iter().collect::<Bitmap<ArrowBuffer>>();
assert_eq!(bitmap.len(), 3);
assert_eq!(bitmap.into_iter().collect::<Vec<_>>(), [true, false, true]);
}
}
57 changes: 57 additions & 0 deletions src/arrow/buffer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
use crate::buffer::{Buffer, BufferMut, BufferType};
use crate::FixedSize;
use arrow_buffer::{ArrowNativeType, BufferBuilder, ScalarBuffer};

/// A [BufferType] implementation for [arrow_buffer::BufferBuilder].
pub struct ArrowBuffer;

impl BufferType for ArrowBuffer {
type Buffer<T: FixedSize> = BufferBuilder<T>;
}

impl<T: FixedSize + ArrowNativeType> Buffer<T> for BufferBuilder<T> {
fn as_slice(&self) -> &[T] {
BufferBuilder::as_slice(self)
}
}

impl<T: FixedSize + ArrowNativeType> BufferMut<T> for BufferBuilder<T> {
fn as_mut_slice(&mut self) -> &mut [T] {
BufferBuilder::as_slice_mut(self)
}
}

/// A [BufferType] implementation for [arrow_buffer::ScalarBuffer].
pub struct ArrowScalarBuffer;

impl BufferType for ArrowScalarBuffer {
type Buffer<T: FixedSize> = ScalarBuffer<T>;
}

impl<T: FixedSize + ArrowNativeType> Buffer<T> for ScalarBuffer<T> {
fn as_slice(&self) -> &[T] {
self
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn arrow() {
let buffer = arrow_buffer::BufferBuilder::from_iter([1, 2, 3, 4]);
assert_eq!(<_ as Buffer<u32>>::as_slice(&buffer), &[1, 2, 3, 4]);

let mut buffer = arrow_buffer::BufferBuilder::from_iter([1u64, 2, 3, 4]);
assert_eq!(
<_ as BufferMut<u64>>::as_mut_slice(&mut buffer),
&[1, 2, 3, 4]
);
<_ as BufferMut<u64>>::as_mut_slice(&mut buffer)[3] = 42;
assert_eq!(<_ as Buffer<u64>>::as_slice(&buffer), &[1, 2, 3, 42]);

let buffer = arrow_buffer::ScalarBuffer::from_iter([1, 2, 3, 4]);
assert_eq!(<_ as Buffer<u32>>::as_slice(&buffer), &[1, 2, 3, 4]);
}
}
14 changes: 14 additions & 0 deletions src/arrow/length.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use crate::Length;
use arrow_buffer::{ArrowNativeType, BufferBuilder, ScalarBuffer};

impl<T: ArrowNativeType> Length for BufferBuilder<T> {
fn len(&self) -> usize {
BufferBuilder::len(self)
}
}

impl<T: ArrowNativeType> Length for ScalarBuffer<T> {
fn len(&self) -> usize {
self.as_ref().len()
}
}
11 changes: 11 additions & 0 deletions src/arrow/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#[cfg(feature = "arrow-array")]
pub mod array;

#[cfg(feature = "arrow-buffer")]
pub mod bitmap;

#[cfg(feature = "arrow-buffer")]
pub mod buffer;

#[cfg(feature = "arrow-buffer")]
pub mod length;
39 changes: 3 additions & 36 deletions src/bitmap/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ pub trait BitmapRefMut: BitmapRef {
// todo(mb): implement ops
pub struct Bitmap<Buffer: BufferType = VecBuffer> {
/// The bits are stored in this buffer of bytes.
buffer: <Buffer as BufferType>::Buffer<u8>,
pub(crate) buffer: <Buffer as BufferType>::Buffer<u8>,

/// The number of bits stored in the bitmap.
bits: usize,
pub(crate) bits: usize,

/// An offset (in number of bits) in the buffer. This enables zero-copy
/// slicing of the bitmap on non-byte boundaries.
offset: usize,
pub(crate) offset: usize,
}

impl<Buffer: BufferType> BitmapRef for Bitmap<Buffer> {
Expand Down Expand Up @@ -299,24 +299,6 @@ impl<Buffer: BufferType> Length for Bitmap<Buffer> {

impl<Buffer: BufferType> ValidityBitmap for Bitmap<Buffer> {}

#[cfg(feature = "arrow-buffer")]
mod arrow {
use super::Bitmap;
use crate::buffer::{ArrowBuffer, BufferType};
use arrow_buffer::BooleanBuffer;

impl<Buffer: BufferType> From<Bitmap<Buffer>> for BooleanBuffer
where
<Buffer as BufferType>::Buffer<u8>: Into<<ArrowBuffer as BufferType>::Buffer<u8>>,
{
fn from(value: Bitmap<Buffer>) -> Self {
BooleanBuffer::new(value.buffer.into().finish(), 0, value.bits)
}
}
}

pub use arrow::*;

#[cfg(test)]
mod tests {
use crate::buffer::{ArrayBuffer, BoxBuffer, BufferRefMut, SliceBuffer};
Expand Down Expand Up @@ -489,19 +471,4 @@ mod tests {
mem::size_of::<Box<[u8]>>() + 2 * mem::size_of::<usize>()
);
}

#[test]
#[cfg(feature = "arrow-buffer")]
fn arrow_buffer() {
use crate::buffer::ArrowBuffer;

let input = vec![true, false, true];
let bitmap = input.into_iter().collect::<Bitmap<ArrowBuffer>>();
assert_eq!(bitmap.len(), 3);

let input = vec![true, false, true];
let bitmap = input.into_iter().collect::<Bitmap<ArrowBuffer>>();
assert_eq!(bitmap.len(), 3);
assert_eq!(bitmap.into_iter().collect::<Vec<_>>(), [true, false, true]);
}
}
Loading

0 comments on commit e39ad3b

Please sign in to comment.