diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs index 2faa93d7b977..e393e2b15ae6 100644 --- a/arrow-array/src/array/fixed_size_binary_array.rs +++ b/arrow-array/src/array/fixed_size_binary_array.rs @@ -15,13 +15,11 @@ // specific language governing permissions and limitations // under the License. -use crate::array::primitive_array::PrimitiveArray; use crate::array::print_long_array; use crate::iterator::FixedSizeBinaryIter; -use crate::types::ArrowPrimitiveType; use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, Scalar}; use arrow_buffer::buffer::NullBuffer; -use arrow_buffer::{bit_util, ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, ScalarBuffer}; +use arrow_buffer::{bit_util, ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer}; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::{ArrowError, DataType}; use std::any::Any; @@ -466,42 +464,6 @@ impl FixedSizeBinaryArray { pub fn iter(&self) -> FixedSizeBinaryIter<'_> { FixedSizeBinaryIter::new(self) } - - /// Applies a unary infallible function to a fixed-size binary array, producing a - /// new primitive array. - /// - /// This is the fastest way to perform an operation on a fixed-size binary array - /// when the benefits of a vectorized operation outweigh the cost of - /// branching nulls and non-nulls. - /// - /// # Null Handling - /// - /// Applies the function for all values, including those on null slots. This - /// will often allow the compiler to generate faster vectorized code, but - /// requires that the operation must be infallible (not error/panic) for any - /// value of the corresponding type or this function may panic. - pub fn unary(&self, op: F) -> PrimitiveArray - where - O: ArrowPrimitiveType, - F: Fn(&[u8]) -> O::Native, - { - let num_vals = self.len(); - let length = self.value_length as usize; - let src = self.value_data.as_slice(); - let mut dst = vec![O::Native::default(); num_vals]; - - // Performance note: not using src.chunks() as that was considerably slower than - // calculating slices of src directly. - for (i, dsti) in dst.iter_mut().enumerate().take(num_vals) { - let idx = length * i; - *dsti = op(&src[idx..idx + length]) - } - - PrimitiveArray::new( - ScalarBuffer::new(Buffer::from_vec(dst), 0, num_vals), - self.nulls().cloned(), - ) - } } impl From for FixedSizeBinaryArray { diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index db14845b08d9..354538f70736 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -1016,6 +1016,32 @@ impl PrimitiveArray { PrimitiveArray::new(values, Some(nulls)) } + /// Applies a unary infallible function to each value in an array, producing a + /// new primitive array. + /// + /// # Null Handling + /// + /// Applies the function for all values, including those on null slots. This + /// will often allow the compiler to generate faster vectorized code, but + /// requires that the operation must be infallible (not error/panic) for any + /// value of the corresponding type or this function may panic. + pub fn from_unary(left: U, mut op: F) -> Self + where + F: FnMut(U::Item) -> T::Native, + { + let nulls = left.logical_nulls(); + let mut values: Vec = vec![T::Native::default(); left.len()]; + + for (i, val) in values.iter_mut().enumerate().take(left.len()) { + // SAFETY: i in range 0..len + unsafe { + *val = op(left.value_unchecked(i)); + } + } + let values = ScalarBuffer::from(values); + Self::new(values, nulls) + } + /// Returns a `PrimitiveBuilder` for this array, suitable for mutating values /// in place. /// diff --git a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs index e3e9192bdff9..7731ba08937c 100644 --- a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs +++ b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs @@ -166,12 +166,12 @@ impl ArrayReader for FixedLenByteArrayReader { let array: ArrayRef = match &self.data_type { ArrowType::Decimal128(p, s) => { let f = |b: &[u8]| i128::from_be_bytes(sign_extend_be(b)); - Arc::new((binary.unary(&f) as Decimal128Array).with_precision_and_scale(*p, *s)?) + Arc::new(Decimal128Array::from_unary(&binary, f).with_precision_and_scale(*p, *s)?) as ArrayRef } ArrowType::Decimal256(p, s) => { let f = |b: &[u8]| i256::from_be_bytes(sign_extend_be(b)); - Arc::new((binary.unary(&f) as Decimal256Array).with_precision_and_scale(*p, *s)?) + Arc::new(Decimal256Array::from_unary(&binary, f).with_precision_and_scale(*p, *s)?) as ArrayRef } ArrowType::Interval(unit) => { @@ -180,7 +180,7 @@ impl ArrayReader for FixedLenByteArrayReader { match unit { IntervalUnit::YearMonth => { let f = |b: &[u8]| i32::from_le_bytes(b[0..4].try_into().unwrap()); - Arc::new(binary.unary(&f) as IntervalYearMonthArray) as ArrayRef + Arc::new(IntervalYearMonthArray::from_unary(&binary, f)) as ArrayRef } IntervalUnit::DayTime => { let f = |b: &[u8]| { @@ -189,7 +189,7 @@ impl ArrayReader for FixedLenByteArrayReader { i32::from_le_bytes(b[8..12].try_into().unwrap()), ) }; - Arc::new(binary.unary(&f) as IntervalDayTimeArray) as ArrayRef + Arc::new(IntervalDayTimeArray::from_unary(&binary, f)) as ArrayRef } IntervalUnit::MonthDayNano => { return Err(nyi_err!("MonthDayNano intervals not supported")); @@ -197,8 +197,8 @@ impl ArrayReader for FixedLenByteArrayReader { } } ArrowType::Float16 => { - let f = |b: &[u8]| f16::from_le_bytes(b.try_into().unwrap()); - Arc::new(binary.unary(&f) as Float16Array) as ArrayRef + let f = |b: &[u8]| f16::from_le_bytes(b[..2].try_into().unwrap()); + Arc::new(Float16Array::from_unary(&binary, f)) as ArrayRef } _ => Arc::new(binary) as ArrayRef, };