diff --git a/README.md b/README.md index fbbfac7c..a3db1825 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,9 @@ A Rust implementation of [Apache Arrow](https://arrow.apache.org). - [ ] Array - [x] Fixed-size primitive - [x] Boolean - - [ ] Variable-size binary - - [ ] Variable-size list + - [x] Variable-size binary + - [x] String array + - [x] Variable-size list - [ ] Fixed-size list - [ ] Struct - [ ] Union @@ -46,6 +47,7 @@ use narrow::{Array, BooleanArray, Float32Array, StructArray, Uint8Array}; #[derive(Array, Copy, Clone, Debug, PartialEq)] pub struct Person { + name: String, age: u8, happy: bool, distance: Option, @@ -53,11 +55,13 @@ pub struct Person { let persons = vec![ Person { + name: "A".to_string(), age: 20, happy: true, distance: Some(1.5), }, Person { + name: "B".to_string(), age: 22, happy: true, distance: None, diff --git a/narrow-derive/src/lib.rs b/narrow-derive/src/lib.rs index 03558924..cceb2476 100644 --- a/narrow-derive/src/lib.rs +++ b/narrow-derive/src/lib.rs @@ -10,6 +10,7 @@ use syn::{ // todo(mb): support generics // todo(mb): trait bounds in where clause when generic is type argument of other type e.g. Option // https://github.com/serde-rs/serde/blob/master/serde_derive/src/bound.rs +// todo(mb): convert iterators into original data structures e.g. Vec from list array iterator (requires GATs) /// Derive macro for the Array trait. #[proc_macro_derive(Array, attributes(narrow))] @@ -144,7 +145,7 @@ pub fn derive_array(input: TokenStream) -> TokenStream { fn next(&mut self) -> Option { Some(#ident { #( - #fields: self.#fields.next()?, + #fields: self.#fields.next()?.into(), )* }) } diff --git a/src/array/mod.rs b/src/array/mod.rs index e33f4fdc..31b7924e 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -7,6 +7,9 @@ pub use boolean::*; mod variable_size_binary; pub use variable_size_binary::*; +mod string; +pub use string::*; + mod variable_size_list; pub use variable_size_list::*; diff --git a/src/array/string.rs b/src/array/string.rs new file mode 100644 index 00000000..24a36d8c --- /dev/null +++ b/src/array/string.rs @@ -0,0 +1,190 @@ +use crate::{ + Array, ArrayType, Offset, OffsetValue, VariableSizeBinaryArray, VariableSizeBinaryArrayIter, +}; +use std::{ + iter::{FromIterator, Map}, + ops::{Deref, Index}, +}; + +/// Array with variable sized string (UTF-8) data. +#[derive(Debug)] +pub struct StringArray(VariableSizeBinaryArray) +where + T: OffsetValue; + +/// Array with UTF-8 strings. Uses [i32] offsets. +pub type Utf8Array = StringArray; + +/// Array with UTF-8 strings. Uses [i64] offsets. +pub type LargeUtf8Array = StringArray; + +impl Array for StringArray +where + T: OffsetValue, +{ + type Validity = Offset; + + fn validity(&self) -> &Self::Validity { + &self.0.validity() + } +} + +impl ArrayType for String { + type Array = Utf8Array; +} + +impl ArrayType for Option { + type Array = Utf8Array; +} + +impl ArrayType for &str { + type Array = Utf8Array; +} + +impl ArrayType for Option<&str> { + type Array = Utf8Array; +} + +impl Deref for StringArray +where + T: OffsetValue, +{ + type Target = VariableSizeBinaryArray; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl FromIterator for StringArray +where + T: OffsetValue, + U: AsRef + AsRef<[u8]>, +{ + fn from_iter(iter: I) -> Self + where + I: IntoIterator, + { + Self(iter.into_iter().collect()) + } +} + +impl FromIterator> for StringArray +where + T: OffsetValue, + U: AsRef + AsRef<[u8]>, +{ + fn from_iter(iter: I) -> Self + where + I: IntoIterator>, + { + Self(iter.into_iter().collect()) + } +} + +impl Index for StringArray +where + T: OffsetValue, +{ + type Output = str; + + fn index(&self, index: usize) -> &Self::Output { + // todo(mb): bounds + // Safety + // - String data is always valid utf-8 + unsafe { std::str::from_utf8_unchecked(self.0.index(index)) } + } +} + +impl<'a, T> IntoIterator for &'a StringArray +where + T: OffsetValue, +{ + type Item = &'a str; + type IntoIter = Map, fn(&'a [u8]) -> &'a str>; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter().map(|slice| + // Safety + // - String array must contain valid utf8. + unsafe { std::str::from_utf8_unchecked(slice) }) + } +} + +impl<'a, T> IntoIterator for &'a StringArray +where + T: OffsetValue, +{ + type Item = Option<&'a str>; + type IntoIter = + Map, fn(Option<&'a [u8]>) -> Option<&'a str>>; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter().map(|opt| { + opt.map(|slice| + // Safety + // - String array must contain valid utf8. + unsafe { std::str::from_utf8_unchecked(slice) }) + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn from_iter() { + let x = "hello"; + let y = "world"; + let z = "!"; + let vec = vec![x, y, z]; + let array = vec.iter().collect::>(); + assert_eq!(array.len(), 3); + assert_eq!(array.data().len(), 11); + assert_eq!(&array[0], x); + assert_eq!(&array[1], y); + assert_eq!(&array[2], z); + let out = array.into_iter().collect::>(); + assert_eq!(vec, out); + let hello_world = array.into_iter().collect::(); + assert_eq!(hello_world, "helloworld!"); + + let x = "hello"; + let y = "world!"; + let vec = vec![Some(&x[..]), Some(&y), None, None, Some(&x), Some("")]; + let array = vec.iter().copied().collect::>(); + assert_eq!(array.len(), 6); + let out = array.into_iter().collect::>(); + assert_eq!(vec, out); + } + + #[test] + fn into_iter() { + let x = "hello"; + let y = "world"; + let z = "!"; + let vec = vec![x, y, z]; + let array = vec.iter().collect::>(); + let mut iter = array.into_iter(); + assert_eq!(iter.size_hint(), (3, Some(3))); + assert_eq!(iter.next(), Some(&x[..])); + assert_eq!(iter.next(), Some(&y[..])); + assert_eq!(iter.next(), Some(&z[..])); + assert_eq!(iter.next(), None); + + let x = "hello"; + let y = "world"; + let vec = vec![Some(&x[..]), Some(&y), None, None, Some(&x), Some("")]; + let array = vec.into_iter().collect::>(); + let mut iter = array.into_iter(); + assert_eq!(iter.size_hint(), (6, Some(6))); + assert_eq!(iter.next(), Some(Some(&x[..]))); + assert_eq!(iter.next(), Some(Some(&y[..]))); + assert_eq!(iter.next(), Some(None)); + assert_eq!(iter.next(), Some(None)); + assert_eq!(iter.next(), Some(Some(&x[..]))); + assert_eq!(iter.next(), Some(Some(""))); + assert_eq!(iter.next(), None); + } +} diff --git a/src/array/variable_size_binary.rs b/src/array/variable_size_binary.rs index d964cf4d..22e6f743 100644 --- a/src/array/variable_size_binary.rs +++ b/src/array/variable_size_binary.rs @@ -110,6 +110,7 @@ where } /// Iterator over elements of an array with variable-sized binary data. +// todo(mb): impl nth and advance_by pub struct VariableSizeBinaryArrayIter<'a, T, const N: bool> where T: OffsetValue, diff --git a/src/array/variable_size_list.rs b/src/array/variable_size_list.rs index 3516a965..c0d43b94 100644 --- a/src/array/variable_size_list.rs +++ b/src/array/variable_size_list.rs @@ -1,7 +1,7 @@ -use crate::{Array, ArrayType, NestedArray, Offset, OffsetValue, Uint8Array}; +use crate::{Array, ArrayType, NestedArray, Offset, OffsetValue}; use std::iter::{FromIterator, Skip, Take}; -/// Array with variable-sized lists of other array types. +/// Array with variable-sized lists of other arrays. /// /// Uses `U` offset types. /// The const generic parameter `N` indicates nullability of the list items. @@ -17,30 +17,11 @@ where offset: Offset, } -impl ArrayType for Vec -where - T: ArrayType, - // for<'a> &'a ::Array: IntoIterator, -{ - // todo(mb): cfg? - type Array = ListArray<::Array, false>; -} - -impl ArrayType for String { - type Array = ListArray, false>; -} - -impl ArrayType for Option { - type Array = ListArray, true>; -} - -impl ArrayType for &str { - type Array = ListArray, false>; -} +/// Array with variable-sized lists of other array types. Uses [i32] offsets. +pub type ListArray = VariableSizeListArray; -impl ArrayType for Option<&str> { - type Array = ListArray, true>; -} +/// Array with variable-sized lists of other array types. Uses [i64] offsets. +pub type LargeListArray = VariableSizeListArray; impl Array for VariableSizeListArray where @@ -54,6 +35,14 @@ where } } +impl ArrayType for Vec +where + T: ArrayType, +{ + // todo(mb): cfg? + type Array = ListArray<::Array, false>; +} + impl NestedArray for VariableSizeListArray where T: Array, @@ -61,17 +50,11 @@ where { type Child = T; - fn child(&self) -> &T { + fn child(&self) -> &Self::Child { &self.data } } -/// Array with variable-sized lists of other array types. Uses [i32] offsets. -pub type ListArray = VariableSizeListArray; - -/// Array with variable-sized lists of other array types. Uses [i64] offsets. -pub type LargeListArray = VariableSizeListArray; - impl FromIterator for VariableSizeListArray where T: Array + FromIterator<::Item>, @@ -143,6 +126,8 @@ where } } +/// Iterator over elements of an array with variable-sized lists of other arrays. +// todo(mb): impl nth and advance_by pub struct VariableSizeListArrayIter<'a, T, U, const N: bool> where U: OffsetValue, diff --git a/src/bitmap.rs b/src/bitmap.rs index a22c82c5..e7029a65 100644 --- a/src/bitmap.rs +++ b/src/bitmap.rs @@ -202,6 +202,7 @@ impl FromIterator for Bitmap { } } +/// Iterator over bits in a bitmap. pub type BitmapIter<'a> = BitValIter<'a, Lsb0, usize>; impl<'a> IntoIterator for &'a Bitmap {