Skip to content

Commit

Permalink
add from_unary to PrimitiveArray
Browse files Browse the repository at this point in the history
  • Loading branch information
etseidl committed Aug 19, 2024
1 parent 69b42d9 commit 5db457c
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 45 deletions.
40 changes: 1 addition & 39 deletions arrow-array/src/array/fixed_size_binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::primitive_array::PrimitiveArray;
use crate::array::print_long_array;
use crate::iterator::FixedSizeBinaryIter;
use crate::types::ArrowPrimitiveType;
use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, Scalar};
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::{bit_util, ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, ScalarBuffer};
use arrow_buffer::{bit_util, ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
Expand Down Expand Up @@ -466,42 +464,6 @@ impl FixedSizeBinaryArray {
pub fn iter(&self) -> FixedSizeBinaryIter<'_> {
FixedSizeBinaryIter::new(self)
}

/// Applies a unary infallible function to a fixed-size binary array, producing a
/// new primitive array.
///
/// This is the fastest way to perform an operation on a fixed-size binary array
/// when the benefits of a vectorized operation outweigh the cost of
/// branching nulls and non-nulls.
///
/// # Null Handling
///
/// Applies the function for all values, including those on null slots. This
/// will often allow the compiler to generate faster vectorized code, but
/// requires that the operation must be infallible (not error/panic) for any
/// value of the corresponding type or this function may panic.
pub fn unary<F, O>(&self, op: F) -> PrimitiveArray<O>
where
O: ArrowPrimitiveType,
F: Fn(&[u8]) -> O::Native,
{
let num_vals = self.len();
let length = self.value_length as usize;
let src = self.value_data.as_slice();
let mut dst = vec![O::Native::default(); num_vals];

// Performance note: not using src.chunks() as that was considerably slower than
// calculating slices of src directly.
for (i, dsti) in dst.iter_mut().enumerate().take(num_vals) {
let idx = length * i;
*dsti = op(&src[idx..idx + length])
}

PrimitiveArray::new(
ScalarBuffer::new(Buffer::from_vec(dst), 0, num_vals),
self.nulls().cloned(),
)
}
}

impl From<ArrayData> for FixedSizeBinaryArray {
Expand Down
26 changes: 26 additions & 0 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1016,6 +1016,32 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
PrimitiveArray::new(values, Some(nulls))
}

/// Applies a unary infallible function to each value in an array, producing a
/// new primitive array.
///
/// # Null Handling
///
/// Applies the function for all values, including those on null slots. This
/// will often allow the compiler to generate faster vectorized code, but
/// requires that the operation must be infallible (not error/panic) for any
/// value of the corresponding type or this function may panic.
pub fn from_unary<U: ArrayAccessor, F>(left: U, mut op: F) -> Self
where
F: FnMut(U::Item) -> T::Native,
{
let nulls = left.logical_nulls();
let mut values: Vec<T::Native> = vec![T::Native::default(); left.len()];

for (i, val) in values.iter_mut().enumerate().take(left.len()) {
// SAFETY: i in range 0..len
unsafe {
*val = op(left.value_unchecked(i));
}
}
let values = ScalarBuffer::from(values);
Self::new(values, nulls)
}

/// Returns a `PrimitiveBuilder` for this array, suitable for mutating values
/// in place.
///
Expand Down
12 changes: 6 additions & 6 deletions parquet/src/arrow/array_reader/fixed_len_byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,12 @@ impl ArrayReader for FixedLenByteArrayReader {
let array: ArrayRef = match &self.data_type {
ArrowType::Decimal128(p, s) => {
let f = |b: &[u8]| i128::from_be_bytes(sign_extend_be(b));
Arc::new((binary.unary(&f) as Decimal128Array).with_precision_and_scale(*p, *s)?)
Arc::new(Decimal128Array::from_unary(&binary, f).with_precision_and_scale(*p, *s)?)
as ArrayRef
}
ArrowType::Decimal256(p, s) => {
let f = |b: &[u8]| i256::from_be_bytes(sign_extend_be(b));
Arc::new((binary.unary(&f) as Decimal256Array).with_precision_and_scale(*p, *s)?)
Arc::new(Decimal256Array::from_unary(&binary, f).with_precision_and_scale(*p, *s)?)
as ArrayRef
}
ArrowType::Interval(unit) => {
Expand All @@ -180,7 +180,7 @@ impl ArrayReader for FixedLenByteArrayReader {
match unit {
IntervalUnit::YearMonth => {
let f = |b: &[u8]| i32::from_le_bytes(b[0..4].try_into().unwrap());
Arc::new(binary.unary(&f) as IntervalYearMonthArray) as ArrayRef
Arc::new(IntervalYearMonthArray::from_unary(&binary, f)) as ArrayRef
}
IntervalUnit::DayTime => {
let f = |b: &[u8]| {
Expand All @@ -189,16 +189,16 @@ impl ArrayReader for FixedLenByteArrayReader {
i32::from_le_bytes(b[8..12].try_into().unwrap()),
)
};
Arc::new(binary.unary(&f) as IntervalDayTimeArray) as ArrayRef
Arc::new(IntervalDayTimeArray::from_unary(&binary, f)) as ArrayRef
}
IntervalUnit::MonthDayNano => {
return Err(nyi_err!("MonthDayNano intervals not supported"));
}
}
}
ArrowType::Float16 => {
let f = |b: &[u8]| f16::from_le_bytes(b.try_into().unwrap());
Arc::new(binary.unary(&f) as Float16Array) as ArrayRef
let f = |b: &[u8]| f16::from_le_bytes(b[..2].try_into().unwrap());
Arc::new(Float16Array::from_unary(&binary, f)) as ArrayRef
}
_ => Arc::new(binary) as ArrayRef,
};
Expand Down

0 comments on commit 5db457c

Please sign in to comment.