Skip to content

Commit

Permalink
Add string array wrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
mbrobbel committed Jul 1, 2021
1 parent 4f6e177 commit baecd70
Show file tree
Hide file tree
Showing 7 changed files with 220 additions and 35 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ A Rust implementation of [Apache Arrow](https://arrow.apache.org).
- [ ] Array
- [x] Fixed-size primitive
- [x] Boolean
- [ ] Variable-size binary
- [ ] Variable-size list
- [x] Variable-size binary
- [x] String array
- [x] Variable-size list
- [ ] Fixed-size list
- [ ] Struct
- [ ] Union
Expand Down Expand Up @@ -46,18 +47,21 @@ use narrow::{Array, BooleanArray, Float32Array, StructArray, Uint8Array};

#[derive(Array, Copy, Clone, Debug, PartialEq)]
pub struct Person {
name: String,
age: u8,
happy: bool,
distance: Option<f32>,
}

let persons = vec![
Person {
name: "A".to_string(),
age: 20,
happy: true,
distance: Some(1.5),
},
Person {
name: "B".to_string(),
age: 22,
happy: true,
distance: None,
Expand Down
3 changes: 2 additions & 1 deletion narrow-derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use syn::{
// todo(mb): support generics
// todo(mb): trait bounds in where clause when generic is type argument of other type e.g. Option<T>
// https://github.com/serde-rs/serde/blob/master/serde_derive/src/bound.rs
// todo(mb): convert iterators into original data structures e.g. Vec<String> from list array iterator (requires GATs)

/// Derive macro for the Array trait.
#[proc_macro_derive(Array, attributes(narrow))]
Expand Down Expand Up @@ -144,7 +145,7 @@ pub fn derive_array(input: TokenStream) -> TokenStream {
fn next(&mut self) -> Option<Self::Item> {
Some(#ident {
#(
#fields: self.#fields.next()?,
#fields: self.#fields.next()?.into(),
)*
})
}
Expand Down
3 changes: 3 additions & 0 deletions src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ pub use boolean::*;
mod variable_size_binary;
pub use variable_size_binary::*;

mod string;
pub use string::*;

mod variable_size_list;
pub use variable_size_list::*;

Expand Down
190 changes: 190 additions & 0 deletions src/array/string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
use crate::{
Array, ArrayType, Offset, OffsetValue, VariableSizeBinaryArray, VariableSizeBinaryArrayIter,
};
use std::{
iter::{FromIterator, Map},
ops::{Deref, Index},
};

/// Array with variable sized string (UTF-8) data.
#[derive(Debug)]
pub struct StringArray<T, const N: bool>(VariableSizeBinaryArray<T, N>)
where
T: OffsetValue;

/// Array with UTF-8 strings. Uses [i32] offsets.
pub type Utf8Array<const N: bool> = StringArray<i32, N>;

/// Array with UTF-8 strings. Uses [i64] offsets.
pub type LargeUtf8Array<const N: bool> = StringArray<i64, N>;

impl<T, const N: bool> Array for StringArray<T, N>
where
T: OffsetValue,
{
type Validity = Offset<T, N>;

fn validity(&self) -> &Self::Validity {
&self.0.validity()
}
}

impl ArrayType for String {
type Array = Utf8Array<false>;
}

impl ArrayType for Option<String> {
type Array = Utf8Array<true>;
}

impl ArrayType for &str {
type Array = Utf8Array<false>;
}

impl ArrayType for Option<&str> {
type Array = Utf8Array<true>;
}

impl<T, const N: bool> Deref for StringArray<T, N>
where
T: OffsetValue,
{
type Target = VariableSizeBinaryArray<T, N>;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<T, U> FromIterator<U> for StringArray<T, false>
where
T: OffsetValue,
U: AsRef<str> + AsRef<[u8]>,
{
fn from_iter<I>(iter: I) -> Self
where
I: IntoIterator<Item = U>,
{
Self(iter.into_iter().collect())
}
}

impl<T, U> FromIterator<Option<U>> for StringArray<T, true>
where
T: OffsetValue,
U: AsRef<str> + AsRef<[u8]>,
{
fn from_iter<I>(iter: I) -> Self
where
I: IntoIterator<Item = Option<U>>,
{
Self(iter.into_iter().collect())
}
}

impl<T> Index<usize> for StringArray<T, false>
where
T: OffsetValue,
{
type Output = str;

fn index(&self, index: usize) -> &Self::Output {
// todo(mb): bounds
// Safety
// - String data is always valid utf-8
unsafe { std::str::from_utf8_unchecked(self.0.index(index)) }
}
}

impl<'a, T> IntoIterator for &'a StringArray<T, false>
where
T: OffsetValue,
{
type Item = &'a str;
type IntoIter = Map<VariableSizeBinaryArrayIter<'a, T, false>, fn(&'a [u8]) -> &'a str>;

fn into_iter(self) -> Self::IntoIter {
self.0.into_iter().map(|slice|
// Safety
// - String array must contain valid utf8.
unsafe { std::str::from_utf8_unchecked(slice) })
}
}

impl<'a, T> IntoIterator for &'a StringArray<T, true>
where
T: OffsetValue,
{
type Item = Option<&'a str>;
type IntoIter =
Map<VariableSizeBinaryArrayIter<'a, T, true>, fn(Option<&'a [u8]>) -> Option<&'a str>>;

fn into_iter(self) -> Self::IntoIter {
self.0.into_iter().map(|opt| {
opt.map(|slice|
// Safety
// - String array must contain valid utf8.
unsafe { std::str::from_utf8_unchecked(slice) })
})
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn from_iter() {
let x = "hello";
let y = "world";
let z = "!";
let vec = vec![x, y, z];
let array = vec.iter().collect::<Utf8Array<false>>();
assert_eq!(array.len(), 3);
assert_eq!(array.data().len(), 11);
assert_eq!(&array[0], x);
assert_eq!(&array[1], y);
assert_eq!(&array[2], z);
let out = array.into_iter().collect::<Vec<_>>();
assert_eq!(vec, out);
let hello_world = array.into_iter().collect::<String>();
assert_eq!(hello_world, "helloworld!");

let x = "hello";
let y = "world!";
let vec = vec![Some(&x[..]), Some(&y), None, None, Some(&x), Some("")];
let array = vec.iter().copied().collect::<LargeUtf8Array<true>>();
assert_eq!(array.len(), 6);
let out = array.into_iter().collect::<Vec<_>>();
assert_eq!(vec, out);
}

#[test]
fn into_iter() {
let x = "hello";
let y = "world";
let z = "!";
let vec = vec![x, y, z];
let array = vec.iter().collect::<Utf8Array<false>>();
let mut iter = array.into_iter();
assert_eq!(iter.size_hint(), (3, Some(3)));
assert_eq!(iter.next(), Some(&x[..]));
assert_eq!(iter.next(), Some(&y[..]));
assert_eq!(iter.next(), Some(&z[..]));
assert_eq!(iter.next(), None);

let x = "hello";
let y = "world";
let vec = vec![Some(&x[..]), Some(&y), None, None, Some(&x), Some("")];
let array = vec.into_iter().collect::<Utf8Array<true>>();
let mut iter = array.into_iter();
assert_eq!(iter.size_hint(), (6, Some(6)));
assert_eq!(iter.next(), Some(Some(&x[..])));
assert_eq!(iter.next(), Some(Some(&y[..])));
assert_eq!(iter.next(), Some(None));
assert_eq!(iter.next(), Some(None));
assert_eq!(iter.next(), Some(Some(&x[..])));
assert_eq!(iter.next(), Some(Some("")));
assert_eq!(iter.next(), None);
}
}
1 change: 1 addition & 0 deletions src/array/variable_size_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ where
}

/// Iterator over elements of an array with variable-sized binary data.
// todo(mb): impl nth and advance_by
pub struct VariableSizeBinaryArrayIter<'a, T, const N: bool>
where
T: OffsetValue,
Expand Down
49 changes: 17 additions & 32 deletions src/array/variable_size_list.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{Array, ArrayType, NestedArray, Offset, OffsetValue, Uint8Array};
use crate::{Array, ArrayType, NestedArray, Offset, OffsetValue};
use std::iter::{FromIterator, Skip, Take};

/// Array with variable-sized lists of other array types.
/// Array with variable-sized lists of other arrays.
///
/// Uses `U` offset types.
/// The const generic parameter `N` indicates nullability of the list items.
Expand All @@ -17,30 +17,11 @@ where
offset: Offset<U, N>,
}

impl<T> ArrayType for Vec<T>
where
T: ArrayType,
// for<'a> &'a <T as ArrayType>::Array: IntoIterator,
{
// todo(mb): cfg?
type Array = ListArray<<T as ArrayType>::Array, false>;
}

impl ArrayType for String {
type Array = ListArray<Uint8Array<false>, false>;
}

impl ArrayType for Option<String> {
type Array = ListArray<Uint8Array<false>, true>;
}

impl ArrayType for &str {
type Array = ListArray<Uint8Array<false>, false>;
}
/// Array with variable-sized lists of other array types. Uses [i32] offsets.
pub type ListArray<T, const N: bool> = VariableSizeListArray<T, i32, N>;

impl ArrayType for Option<&str> {
type Array = ListArray<Uint8Array<false>, true>;
}
/// Array with variable-sized lists of other array types. Uses [i64] offsets.
pub type LargeListArray<T, const N: bool> = VariableSizeListArray<T, i64, N>;

impl<T, U, const N: bool> Array for VariableSizeListArray<T, U, N>
where
Expand All @@ -54,24 +35,26 @@ where
}
}

impl<T> ArrayType for Vec<T>
where
T: ArrayType,
{
// todo(mb): cfg?
type Array = ListArray<<T as ArrayType>::Array, false>;
}

impl<T, U, const N: bool> NestedArray for VariableSizeListArray<T, U, N>
where
T: Array,
U: OffsetValue,
{
type Child = T;

fn child(&self) -> &T {
fn child(&self) -> &Self::Child {
&self.data
}
}

/// Array with variable-sized lists of other array types. Uses [i32] offsets.
pub type ListArray<T, const N: bool> = VariableSizeListArray<T, i32, N>;

/// Array with variable-sized lists of other array types. Uses [i64] offsets.
pub type LargeListArray<T, const N: bool> = VariableSizeListArray<T, i64, N>;

impl<T, U, V> FromIterator<V> for VariableSizeListArray<T, U, false>
where
T: Array + FromIterator<<V as IntoIterator>::Item>,
Expand Down Expand Up @@ -143,6 +126,8 @@ where
}
}

/// Iterator over elements of an array with variable-sized lists of other arrays.
// todo(mb): impl nth and advance_by
pub struct VariableSizeListArrayIter<'a, T, U, const N: bool>
where
U: OffsetValue,
Expand Down
1 change: 1 addition & 0 deletions src/bitmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ impl FromIterator<bool> for Bitmap {
}
}

/// Iterator over bits in a bitmap.
pub type BitmapIter<'a> = BitValIter<'a, Lsb0, usize>;

impl<'a> IntoIterator for &'a Bitmap {
Expand Down

0 comments on commit baecd70

Please sign in to comment.