From 9631b101e60d278c9c23fd65d7084fc2f9033eea Mon Sep 17 00:00:00 2001 From: psvri Date: Thu, 18 Aug 2022 18:28:02 +0000 Subject: [PATCH 1/5] Refactoring primitive array builder --- arrow/examples/builders.rs | 2 +- arrow/src/array/array_dictionary.rs | 12 ++-- arrow/src/array/array_primitive.rs | 2 +- arrow/src/array/builder/buffer_builder.rs | 2 +- .../array/builder/fixed_size_list_builder.rs | 2 +- .../src/array/builder/generic_list_builder.rs | 6 +- arrow/src/array/builder/map_builder.rs | 2 +- arrow/src/array/builder/mod.rs | 4 +- arrow/src/array/builder/primitive_builder.rs | 10 ++- .../builder/primitive_dictionary_builder.rs | 12 ++-- .../builder/string_dictionary_builder.rs | 10 +-- arrow/src/array/builder/struct_builder.rs | 64 +++++++++---------- arrow/src/array/data.rs | 4 +- arrow/src/array/equal/list.rs | 4 +- arrow/src/array/equal/mod.rs | 6 +- arrow/src/array/transform/mod.rs | 30 +++++---- arrow/src/compute/kernels/arithmetic.rs | 56 ++++++++-------- arrow/src/compute/kernels/arity.rs | 8 +-- arrow/src/compute/kernels/cast.rs | 32 +++++----- arrow/src/compute/kernels/comparison.rs | 36 +++++------ arrow/src/compute/kernels/filter.rs | 4 +- arrow/src/compute/kernels/take.rs | 4 +- arrow/src/compute/kernels/temporal.rs | 22 +++---- arrow/src/json/reader.rs | 2 +- arrow/src/util/integration_util.rs | 22 +++---- 25 files changed, 184 insertions(+), 174 deletions(-) diff --git a/arrow/examples/builders.rs b/arrow/examples/builders.rs index 45d92f54dd1..bacd550bdfd 100644 --- a/arrow/examples/builders.rs +++ b/arrow/examples/builders.rs @@ -34,7 +34,7 @@ fn main() { // u64, i8, i16, i32, i64, f32, f64) // Create a new builder with a capacity of 100 - let mut primitive_array_builder = Int32Builder::new(100); + let mut primitive_array_builder = Int32Builder::with_capacity(100); // Append an individual primitive value primitive_array_builder.append_value(55); diff --git a/arrow/src/array/array_dictionary.rs b/arrow/src/array/array_dictionary.rs index 2acb51750d1..4977c029a04 100644 --- a/arrow/src/array/array_dictionary.rs +++ b/arrow/src/array/array_dictionary.rs @@ -329,7 +329,7 @@ impl<'a, T: ArrowDictionaryKeyType> FromIterator> for Dictionary fn from_iter>>(iter: I) -> Self { let it = iter.into_iter(); let (lower, _) = it.size_hint(); - let key_builder = PrimitiveBuilder::::new(lower); + let key_builder = PrimitiveBuilder::::with_capacity(lower); let value_builder = StringBuilder::new(256); let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); it.for_each(|i| { @@ -367,7 +367,7 @@ impl<'a, T: ArrowDictionaryKeyType> FromIterator<&'a str> for DictionaryArray fn from_iter>(iter: I) -> Self { let it = iter.into_iter(); let (lower, _) = it.size_hint(); - let key_builder = PrimitiveBuilder::::new(lower); + let key_builder = PrimitiveBuilder::::with_capacity(lower); let value_builder = StringBuilder::new(256); let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); it.for_each(|i| { @@ -581,8 +581,8 @@ mod tests { #[test] fn test_dictionary_array_fmt_debug() { - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(12345678).unwrap(); builder.append_null(); @@ -593,8 +593,8 @@ mod tests { format!("{:?}", array) ); - let key_builder = PrimitiveBuilder::::new(20); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(20); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); for _ in 0..20 { builder.append(1).unwrap(); diff --git a/arrow/src/array/array_primitive.rs b/arrow/src/array/array_primitive.rs index a10104d980e..cc85d2a7c92 100644 --- a/arrow/src/array/array_primitive.rs +++ b/arrow/src/array/array_primitive.rs @@ -91,7 +91,7 @@ impl PrimitiveArray { // Returns a new primitive array builder pub fn builder(capacity: usize) -> PrimitiveBuilder { - PrimitiveBuilder::::new(capacity) + PrimitiveBuilder::::with_capacity(capacity) } /// Returns the primitive value at index `i`. diff --git a/arrow/src/array/builder/buffer_builder.rs b/arrow/src/array/builder/buffer_builder.rs index dd116f035af..a6a81dfd6c0 100644 --- a/arrow/src/array/builder/buffer_builder.rs +++ b/arrow/src/array/builder/buffer_builder.rs @@ -457,7 +457,7 @@ mod tests { #[test] fn test_append_values() { - let mut a = Int8Builder::new(0); + let mut a = Int8Builder::new(); a.append_value(1); a.append_null(); a.append_value(-2); diff --git a/arrow/src/array/builder/fixed_size_list_builder.rs b/arrow/src/array/builder/fixed_size_list_builder.rs index 343ce3657b3..da850d15624 100644 --- a/arrow/src/array/builder/fixed_size_list_builder.rs +++ b/arrow/src/array/builder/fixed_size_list_builder.rs @@ -156,7 +156,7 @@ mod tests { #[test] fn test_fixed_size_list_array_builder() { - let values_builder = Int32Builder::new(10); + let values_builder = Int32Builder::new(); let mut builder = FixedSizeListBuilder::new(values_builder, 3); // [[0, 1, 2], null, [3, null, 5], [6, 7, null]] diff --git a/arrow/src/array/builder/generic_list_builder.rs b/arrow/src/array/builder/generic_list_builder.rs index 686156df13b..1beda711417 100644 --- a/arrow/src/array/builder/generic_list_builder.rs +++ b/arrow/src/array/builder/generic_list_builder.rs @@ -161,7 +161,7 @@ mod tests { use crate::datatypes::DataType; fn _test_generic_list_array_builder() { - let values_builder = Int32Builder::new(10); + let values_builder = Int32Builder::with_capacity(10); let mut builder = GenericListBuilder::::new(values_builder); // [[0, 1, 2], [3, 4, 5], [6, 7]] @@ -206,7 +206,7 @@ mod tests { } fn _test_generic_list_array_builder_nulls() { - let values_builder = Int32Builder::new(10); + let values_builder = Int32Builder::with_capacity(10); let mut builder = GenericListBuilder::::new(values_builder); // [[0, 1, 2], null, [3, null, 5], [6, 7]] @@ -265,7 +265,7 @@ mod tests { #[test] fn test_list_list_array_builder() { - let primitive_builder = Int32Builder::new(10); + let primitive_builder = Int32Builder::with_capacity(10); let values_builder = ListBuilder::new(primitive_builder); let mut builder = ListBuilder::new(values_builder); diff --git a/arrow/src/array/builder/map_builder.rs b/arrow/src/array/builder/map_builder.rs index 7e68abd5f1a..dbc35d309ba 100644 --- a/arrow/src/array/builder/map_builder.rs +++ b/arrow/src/array/builder/map_builder.rs @@ -206,7 +206,7 @@ mod tests { #[test] fn test_map_array_builder() { let string_builder = StringBuilder::new(4); - let int_builder = Int32Builder::new(4); + let int_builder = Int32Builder::with_capacity(4); let mut builder = MapBuilder::new(None, string_builder, int_builder); diff --git a/arrow/src/array/builder/mod.rs b/arrow/src/array/builder/mod.rs index 77dd907f6ee..ecacfc1ea93 100644 --- a/arrow/src/array/builder/mod.rs +++ b/arrow/src/array/builder/mod.rs @@ -73,8 +73,8 @@ pub use union_builder::UnionBuilder; /// # fn main() -> std::result::Result<(), ArrowError> { /// // Create /// let mut data_builders: Vec> = vec![ -/// Box::new(Float64Builder::new(1024)), -/// Box::new(Int64Builder::new(1024)), +/// Box::new(Float64Builder::new()), +/// Box::new(Int64Builder::new()), /// Box::new(StringBuilder::new(1024)), /// ]; /// diff --git a/arrow/src/array/builder/primitive_builder.rs b/arrow/src/array/builder/primitive_builder.rs index 3b9db1f01e6..fad0079fe9d 100644 --- a/arrow/src/array/builder/primitive_builder.rs +++ b/arrow/src/array/builder/primitive_builder.rs @@ -66,7 +66,13 @@ impl ArrayBuilder for PrimitiveBuilder { impl PrimitiveBuilder { /// Creates a new primitive array builder - pub fn new(capacity: usize) -> Self { + #[allow(clippy::new_without_default)] + pub fn new() -> Self { + Self::with_capacity(1024) + } + + /// Creates a new primitive array builder with capacity + pub fn with_capacity(capacity: usize) -> Self { Self { values_builder: BufferBuilder::::new(capacity), null_buffer_builder: NullBufferBuilder::new(capacity), @@ -355,7 +361,7 @@ mod tests { #[test] fn test_primitive_array_builder_finish() { - let mut builder = Int32Builder::new(5); + let mut builder = Int32Builder::new(); builder.append_slice(&[2, 4, 6, 8]); let mut arr = builder.finish(); assert_eq!(4, arr.len()); diff --git a/arrow/src/array/builder/primitive_dictionary_builder.rs b/arrow/src/array/builder/primitive_dictionary_builder.rs index 71656f985d1..71223c68828 100644 --- a/arrow/src/array/builder/primitive_dictionary_builder.rs +++ b/arrow/src/array/builder/primitive_dictionary_builder.rs @@ -60,8 +60,8 @@ impl Eq for Value {} /// }; /// use arrow::datatypes::{UInt8Type, UInt32Type}; /// -/// let key_builder = PrimitiveBuilder::::new(3); -/// let value_builder = PrimitiveBuilder::::new(2); +/// let key_builder = PrimitiveBuilder::::with_capacity(3); +/// let value_builder = PrimitiveBuilder::::with_capacity(2); /// let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); /// builder.append(12345678).unwrap(); /// builder.append_null(); @@ -211,8 +211,8 @@ mod tests { #[test] fn test_primitive_dictionary_builder() { - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(12345678).unwrap(); builder.append_null(); @@ -239,8 +239,8 @@ mod tests { #[test] #[should_panic(expected = "DictionaryKeyOverflowError")] fn test_primitive_dictionary_overflow() { - let key_builder = PrimitiveBuilder::::new(257); - let value_builder = PrimitiveBuilder::::new(257); + let key_builder = PrimitiveBuilder::::with_capacity(257); + let value_builder = PrimitiveBuilder::::with_capacity(257); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); // 256 unique keys. for i in 0..256 { diff --git a/arrow/src/array/builder/string_dictionary_builder.rs b/arrow/src/array/builder/string_dictionary_builder.rs index 15a36a64c14..d962d0638e2 100644 --- a/arrow/src/array/builder/string_dictionary_builder.rs +++ b/arrow/src/array/builder/string_dictionary_builder.rs @@ -42,7 +42,7 @@ use std::sync::Arc; /// // Create a dictionary array indexed by bytes whose values are Strings. /// // It can thus hold up to 256 distinct string values. /// -/// let key_builder = PrimitiveBuilder::::new(100); +/// let key_builder = PrimitiveBuilder::::with_capacity(100); /// let value_builder = StringBuilder::new(100); /// let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); /// @@ -111,7 +111,7 @@ where /// /// let dictionary_values = StringArray::from(vec![None, Some("abc"), Some("def")]); /// - /// let mut builder = StringDictionaryBuilder::new_with_dictionary(PrimitiveBuilder::::new(3), &dictionary_values).unwrap(); + /// let mut builder = StringDictionaryBuilder::new_with_dictionary(PrimitiveBuilder::::with_capacity(3), &dictionary_values).unwrap(); /// builder.append("def").unwrap(); /// builder.append_null(); /// builder.append("abc").unwrap(); @@ -290,7 +290,7 @@ mod tests { #[test] fn test_string_dictionary_builder() { - let key_builder = PrimitiveBuilder::::new(5); + let key_builder = PrimitiveBuilder::::with_capacity(5); let value_builder = StringBuilder::new(2); let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); builder.append("abc").unwrap(); @@ -317,7 +317,7 @@ mod tests { fn test_string_dictionary_builder_with_existing_dictionary() { let dictionary = StringArray::from(vec![None, Some("def"), Some("abc")]); - let key_builder = PrimitiveBuilder::::new(6); + let key_builder = PrimitiveBuilder::::with_capacity(6); let mut builder = StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary) .unwrap(); @@ -349,7 +349,7 @@ mod tests { let dictionary: Vec> = vec![None]; let dictionary = StringArray::from(dictionary); - let key_builder = PrimitiveBuilder::::new(4); + let key_builder = PrimitiveBuilder::::with_capacity(4); let mut builder = StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary) .unwrap(); diff --git a/arrow/src/array/builder/struct_builder.rs b/arrow/src/array/builder/struct_builder.rs index 01a792b5d19..6ea559cdd10 100644 --- a/arrow/src/array/builder/struct_builder.rs +++ b/arrow/src/array/builder/struct_builder.rs @@ -97,16 +97,16 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box unimplemented!(), DataType::Boolean => Box::new(BooleanBuilder::new(capacity)), - DataType::Int8 => Box::new(Int8Builder::new(capacity)), - DataType::Int16 => Box::new(Int16Builder::new(capacity)), - DataType::Int32 => Box::new(Int32Builder::new(capacity)), - DataType::Int64 => Box::new(Int64Builder::new(capacity)), - DataType::UInt8 => Box::new(UInt8Builder::new(capacity)), - DataType::UInt16 => Box::new(UInt16Builder::new(capacity)), - DataType::UInt32 => Box::new(UInt32Builder::new(capacity)), - DataType::UInt64 => Box::new(UInt64Builder::new(capacity)), - DataType::Float32 => Box::new(Float32Builder::new(capacity)), - DataType::Float64 => Box::new(Float64Builder::new(capacity)), + DataType::Int8 => Box::new(Int8Builder::with_capacity(capacity)), + DataType::Int16 => Box::new(Int16Builder::with_capacity(capacity)), + DataType::Int32 => Box::new(Int32Builder::with_capacity(capacity)), + DataType::Int64 => Box::new(Int64Builder::with_capacity(capacity)), + DataType::UInt8 => Box::new(UInt8Builder::with_capacity(capacity)), + DataType::UInt16 => Box::new(UInt16Builder::with_capacity(capacity)), + DataType::UInt32 => Box::new(UInt32Builder::with_capacity(capacity)), + DataType::UInt64 => Box::new(UInt64Builder::with_capacity(capacity)), + DataType::Float32 => Box::new(Float32Builder::with_capacity(capacity)), + DataType::Float64 => Box::new(Float64Builder::with_capacity(capacity)), DataType::Binary => Box::new(BinaryBuilder::new(capacity)), DataType::FixedSizeBinary(len) => { Box::new(FixedSizeBinaryBuilder::new(capacity, *len)) @@ -115,52 +115,52 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box Box::new(StringBuilder::new(capacity)), - DataType::Date32 => Box::new(Date32Builder::new(capacity)), - DataType::Date64 => Box::new(Date64Builder::new(capacity)), + DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)), + DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)), DataType::Time32(TimeUnit::Second) => { - Box::new(Time32SecondBuilder::new(capacity)) + Box::new(Time32SecondBuilder::with_capacity(capacity)) } DataType::Time32(TimeUnit::Millisecond) => { - Box::new(Time32MillisecondBuilder::new(capacity)) + Box::new(Time32MillisecondBuilder::with_capacity(capacity)) } DataType::Time64(TimeUnit::Microsecond) => { - Box::new(Time64MicrosecondBuilder::new(capacity)) + Box::new(Time64MicrosecondBuilder::with_capacity(capacity)) } DataType::Time64(TimeUnit::Nanosecond) => { - Box::new(Time64NanosecondBuilder::new(capacity)) + Box::new(Time64NanosecondBuilder::with_capacity(capacity)) } DataType::Timestamp(TimeUnit::Second, _) => { - Box::new(TimestampSecondBuilder::new(capacity)) + Box::new(TimestampSecondBuilder::with_capacity(capacity)) } DataType::Timestamp(TimeUnit::Millisecond, _) => { - Box::new(TimestampMillisecondBuilder::new(capacity)) + Box::new(TimestampMillisecondBuilder::with_capacity(capacity)) } DataType::Timestamp(TimeUnit::Microsecond, _) => { - Box::new(TimestampMicrosecondBuilder::new(capacity)) + Box::new(TimestampMicrosecondBuilder::with_capacity(capacity)) } DataType::Timestamp(TimeUnit::Nanosecond, _) => { - Box::new(TimestampNanosecondBuilder::new(capacity)) + Box::new(TimestampNanosecondBuilder::with_capacity(capacity)) } DataType::Interval(IntervalUnit::YearMonth) => { - Box::new(IntervalYearMonthBuilder::new(capacity)) + Box::new(IntervalYearMonthBuilder::with_capacity(capacity)) } DataType::Interval(IntervalUnit::DayTime) => { - Box::new(IntervalDayTimeBuilder::new(capacity)) + Box::new(IntervalDayTimeBuilder::with_capacity(capacity)) } DataType::Interval(IntervalUnit::MonthDayNano) => { - Box::new(IntervalMonthDayNanoBuilder::new(capacity)) + Box::new(IntervalMonthDayNanoBuilder::with_capacity(capacity)) } DataType::Duration(TimeUnit::Second) => { - Box::new(DurationSecondBuilder::new(capacity)) + Box::new(DurationSecondBuilder::with_capacity(capacity)) } DataType::Duration(TimeUnit::Millisecond) => { - Box::new(DurationMillisecondBuilder::new(capacity)) + Box::new(DurationMillisecondBuilder::with_capacity(capacity)) } DataType::Duration(TimeUnit::Microsecond) => { - Box::new(DurationMicrosecondBuilder::new(capacity)) + Box::new(DurationMicrosecondBuilder::with_capacity(capacity)) } DataType::Duration(TimeUnit::Nanosecond) => { - Box::new(DurationNanosecondBuilder::new(capacity)) + Box::new(DurationNanosecondBuilder::with_capacity(capacity)) } DataType::Struct(fields) => { Box::new(StructBuilder::from_fields(fields.clone(), capacity)) @@ -256,7 +256,7 @@ mod tests { #[test] fn test_struct_array_builder() { let string_builder = StringBuilder::new(4); - let int_builder = Int32Builder::new(4); + let int_builder = Int32Builder::new(); let mut fields = Vec::new(); let mut field_builders = Vec::new(); @@ -320,7 +320,7 @@ mod tests { #[test] fn test_struct_array_builder_finish() { - let int_builder = Int32Builder::new(10); + let int_builder = Int32Builder::new(); let bool_builder = BooleanBuilder::new(10); let mut fields = Vec::new(); @@ -411,7 +411,7 @@ mod tests { #[test] fn test_struct_array_builder_field_builder_type_mismatch() { - let int_builder = Int32Builder::new(10); + let int_builder = Int32Builder::with_capacity(10); let mut fields = Vec::new(); let mut field_builders = Vec::new(); @@ -425,7 +425,7 @@ mod tests { #[test] #[should_panic(expected = "StructBuilder and field_builders are of unequal lengths.")] fn test_struct_array_builder_unequal_field_builders_lengths() { - let mut int_builder = Int32Builder::new(10); + let mut int_builder = Int32Builder::with_capacity(10); let mut bool_builder = BooleanBuilder::new(10); int_builder.append_value(1); @@ -450,7 +450,7 @@ mod tests { expected = "Number of fields is not equal to the number of field_builders." )] fn test_struct_array_builder_unequal_field_field_builders() { - let int_builder = Int32Builder::new(10); + let int_builder = Int32Builder::with_capacity(10); let mut fields = Vec::new(); let mut field_builders = Vec::new(); diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index 3993d51d9b8..67e1539cfab 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -2676,7 +2676,7 @@ mod tests { Field::new("b", DataType::Boolean, true), ], vec![ - Box::new(Int32Builder::new(5)), + Box::new(Int32Builder::with_capacity(5)), Box::new(BooleanBuilder::new(5)), ], ); @@ -2821,7 +2821,7 @@ mod tests { #[test] #[cfg(not(feature = "force_validate"))] fn test_decimal_full_validation() { - let values_builder = UInt8Builder::new(10); + let values_builder = UInt8Builder::with_capacity(10); let byte_width = 16; let mut fixed_size_builder = FixedSizeListBuilder::new(values_builder, byte_width); diff --git a/arrow/src/array/equal/list.rs b/arrow/src/array/equal/list.rs index d032b1f04b5..b3bca9a6922 100644 --- a/arrow/src/array/equal/list.rs +++ b/arrow/src/array/equal/list.rs @@ -160,7 +160,7 @@ mod tests { #[test] fn list_array_non_zero_nulls() { // Tests handling of list arrays with non-empty null ranges - let mut builder = ListBuilder::new(Int64Builder::new(10)); + let mut builder = ListBuilder::new(Int64Builder::with_capacity(10)); builder.values().append_value(1); builder.values().append_value(2); builder.values().append_value(3); @@ -168,7 +168,7 @@ mod tests { builder.append(false); let array1 = builder.finish(); - let mut builder = ListBuilder::new(Int64Builder::new(10)); + let mut builder = ListBuilder::new(Int64Builder::with_capacity(10)); builder.values().append_value(1); builder.values().append_value(2); builder.values().append_value(3); diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs index 975fbd2b360..3e956227d83 100644 --- a/arrow/src/array/equal/mod.rs +++ b/arrow/src/array/equal/mod.rs @@ -610,7 +610,7 @@ mod tests { } fn create_list_array, T: AsRef<[Option]>>(data: T) -> ArrayData { - let mut builder = ListBuilder::new(Int32Builder::new(10)); + let mut builder = ListBuilder::new(Int32Builder::with_capacity(10)); for d in data.as_ref() { if let Some(v) = d { builder.values().append_slice(v.as_ref()); @@ -935,7 +935,7 @@ mod tests { fn create_fixed_size_list_array, T: AsRef<[Option]>>( data: T, ) -> ArrayData { - let mut builder = FixedSizeListBuilder::new(Int32Builder::new(10), 3); + let mut builder = FixedSizeListBuilder::new(Int32Builder::with_capacity(10), 3); for d in data.as_ref() { if let Some(v) = d { @@ -1246,7 +1246,7 @@ mod tests { fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData { let values = StringArray::from(values.to_vec()); let mut builder = StringDictionaryBuilder::new_with_dictionary( - PrimitiveBuilder::::new(3), + PrimitiveBuilder::::with_capacity(3), &values, ) .unwrap(); diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs index dafbc05c67a..dada10316aa 100644 --- a/arrow/src/array/transform/mod.rs +++ b/arrow/src/array/transform/mod.rs @@ -807,7 +807,7 @@ mod tests { #[test] fn test_list_null_offset() { - let int_builder = Int64Builder::new(24); + let int_builder = Int64Builder::with_capacity(24); let mut builder = ListBuilder::::new(int_builder); builder.values().append_slice(&[1, 2, 3]); builder.append(true); @@ -824,7 +824,7 @@ mod tests { let result = mutable.freeze(); let array = ListArray::from(result); - let int_builder = Int64Builder::new(24); + let int_builder = Int64Builder::with_capacity(24); let mut builder = ListBuilder::::new(int_builder); builder.values().append_slice(&[1, 2, 3]); builder.append(true); @@ -964,7 +964,7 @@ mod tests { fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData { let values = StringArray::from(values.to_vec()); let mut builder = StringDictionaryBuilder::new_with_dictionary( - PrimitiveBuilder::::new(3), + PrimitiveBuilder::::with_capacity(3), &values, ) .unwrap(); @@ -1175,7 +1175,8 @@ mod tests { #[test] fn test_list_append() { - let mut builder = ListBuilder::::new(Int64Builder::new(24)); + let mut builder = + ListBuilder::::new(Int64Builder::with_capacity(24)); builder.values().append_slice(&[1, 2, 3]); builder.append(true); builder.values().append_slice(&[4, 5]); @@ -1185,7 +1186,7 @@ mod tests { builder.append(true); let a = builder.finish(); - let a_builder = Int64Builder::new(24); + let a_builder = Int64Builder::with_capacity(24); let mut a_builder = ListBuilder::::new(a_builder); a_builder.values().append_slice(&[12, 13]); a_builder.append(true); @@ -1241,7 +1242,8 @@ mod tests { #[test] fn test_list_nulls_append() -> Result<()> { - let mut builder = ListBuilder::::new(Int64Builder::new(32)); + let mut builder = + ListBuilder::::new(Int64Builder::with_capacity(32)); builder.values().append_slice(&[1, 2, 3]); builder.append(true); builder.values().append_slice(&[4, 5]); @@ -1255,7 +1257,8 @@ mod tests { let a = builder.finish(); let a = a.data(); - let mut builder = ListBuilder::::new(Int64Builder::new(32)); + let mut builder = + ListBuilder::::new(Int64Builder::with_capacity(32)); builder.values().append_slice(&[12, 13]); builder.append(true); builder.append(false); @@ -1322,7 +1325,8 @@ mod tests { #[test] fn test_list_append_with_capacities() { - let mut builder = ListBuilder::::new(Int64Builder::new(24)); + let mut builder = + ListBuilder::::new(Int64Builder::with_capacity(24)); builder.values().append_slice(&[1, 2, 3]); builder.append(true); builder.values().append_slice(&[4, 5]); @@ -1332,7 +1336,7 @@ mod tests { builder.append(true); let a = builder.finish(); - let a_builder = Int64Builder::new(24); + let a_builder = Int64Builder::with_capacity(24); let mut a_builder = ListBuilder::::new(a_builder); a_builder.values().append_slice(&[12, 13]); a_builder.append(true); @@ -1356,8 +1360,8 @@ mod tests { fn test_map_nulls_append() -> Result<()> { let mut builder = MapBuilder::::new( None, - Int64Builder::new(32), - Int64Builder::new(32), + Int64Builder::with_capacity(32), + Int64Builder::with_capacity(32), ); builder.keys().append_slice(&[1, 2, 3]); builder.values().append_slice(&[1, 2, 3]); @@ -1378,8 +1382,8 @@ mod tests { let mut builder = MapBuilder::::new( None, - Int64Builder::new(32), - Int64Builder::new(32), + Int64Builder::with_capacity(32), + Int64Builder::with_capacity(32), ); builder.keys().append_slice(&[12, 13]); diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index c7ccc09c1d2..fff687e18b3 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -1229,8 +1229,8 @@ mod tests { #[test] fn test_primitive_array_add_dyn_dict() { - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(5).unwrap(); builder.append(6).unwrap(); @@ -1239,8 +1239,8 @@ mod tests { builder.append(9).unwrap(); let a = builder.finish(); - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(6).unwrap(); builder.append(7).unwrap(); @@ -1270,8 +1270,8 @@ mod tests { assert!(c.is_null(3)); assert_eq!(10, c.value(4)); - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(5).unwrap(); builder.append_null(); @@ -1313,8 +1313,8 @@ mod tests { #[test] fn test_primitive_array_subtract_dyn_dict() { - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(15).unwrap(); builder.append(8).unwrap(); @@ -1323,8 +1323,8 @@ mod tests { builder.append(20).unwrap(); let a = builder.finish(); - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(6).unwrap(); builder.append(7).unwrap(); @@ -1354,8 +1354,8 @@ mod tests { assert!(c.is_null(3)); assert_eq!(8, c.value(4)); - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(5).unwrap(); builder.append_null(); @@ -1397,8 +1397,8 @@ mod tests { #[test] fn test_primitive_array_multiply_dyn_dict() { - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(5).unwrap(); builder.append(6).unwrap(); @@ -1407,8 +1407,8 @@ mod tests { builder.append(9).unwrap(); let a = builder.finish(); - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(6).unwrap(); builder.append(7).unwrap(); @@ -1441,8 +1441,8 @@ mod tests { #[test] fn test_primitive_array_divide_dyn_dict() { - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(15).unwrap(); builder.append(6).unwrap(); @@ -1451,8 +1451,8 @@ mod tests { builder.append(9).unwrap(); let a = builder.finish(); - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(5).unwrap(); builder.append(3).unwrap(); @@ -1482,8 +1482,8 @@ mod tests { assert!(c.is_null(3)); assert_eq!(18, c.value(4)); - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(5).unwrap(); builder.append_null(); @@ -1668,8 +1668,8 @@ mod tests { assert!(c.is_null(3)); assert_eq!(4, c.value(4)); - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(5).unwrap(); builder.append_null(); @@ -1936,14 +1936,14 @@ mod tests { #[test] #[should_panic(expected = "DivideByZero")] fn test_primitive_array_divide_dyn_by_zero_dict() { - let key_builder = PrimitiveBuilder::::new(1); - let value_builder = PrimitiveBuilder::::new(1); + let key_builder = PrimitiveBuilder::::with_capacity(1); + let value_builder = PrimitiveBuilder::::with_capacity(1); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(15).unwrap(); let a = builder.finish(); - let key_builder = PrimitiveBuilder::::new(1); - let value_builder = PrimitiveBuilder::::new(1); + let key_builder = PrimitiveBuilder::::with_capacity(1); + let value_builder = PrimitiveBuilder::::with_capacity(1); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(0).unwrap(); let b = builder.finish(); diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs index 3af9306d526..be9d56ebb19 100644 --- a/arrow/src/compute/kernels/arity.rs +++ b/arrow/src/compute/kernels/arity.rs @@ -228,8 +228,8 @@ mod tests { #[test] fn test_unary_dict_and_unary_dyn() { - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(5).unwrap(); builder.append(6).unwrap(); @@ -239,8 +239,8 @@ mod tests { builder.append(9).unwrap(); let dictionary_array = builder.finish(); - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(6).unwrap(); builder.append(7).unwrap(); diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index 4f59c00fcb5..76a521a38eb 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -357,7 +357,7 @@ where macro_rules! cast_decimal_to_integer { ($ARRAY:expr, $SCALE : ident, $VALUE_BUILDER: ident, $NATIVE_TYPE : ident, $DATA_TYPE : expr) => {{ let array = $ARRAY.as_any().downcast_ref::().unwrap(); - let mut value_builder = $VALUE_BUILDER::new(array.len()); + let mut value_builder = $VALUE_BUILDER::with_capacity(array.len()); let div: i128 = 10_i128.pow(*$SCALE as u32); let min_bound = ($NATIVE_TYPE::MIN) as i128; let max_bound = ($NATIVE_TYPE::MAX) as i128; @@ -388,7 +388,7 @@ macro_rules! cast_decimal_to_float { ($ARRAY:expr, $SCALE : ident, $VALUE_BUILDER: ident, $NATIVE_TYPE : ty) => {{ let array = $ARRAY.as_any().downcast_ref::().unwrap(); let div = 10_f64.powi(*$SCALE as i32); - let mut value_builder = $VALUE_BUILDER::new(array.len()); + let mut value_builder = $VALUE_BUILDER::with_capacity(array.len()); for i in 0..array.len() { if array.is_null(i) { value_builder.append_null(); @@ -1166,7 +1166,7 @@ pub fn cast_with_options( let from_size = time_unit_multiple(from_unit) * SECONDS_IN_DAY; // Int32Array::from_iter(tim.iter) - let mut b = Date32Builder::new(array.len()); + let mut b = Date32Builder::with_capacity(array.len()); for i in 0..array.len() { if time_array.is_null(i) { @@ -2327,8 +2327,8 @@ where .downcast_ref::>() .unwrap(); - let keys_builder = PrimitiveBuilder::::new(values.len()); - let values_builder = PrimitiveBuilder::::new(values.len()); + let keys_builder = PrimitiveBuilder::::with_capacity(values.len()); + let values_builder = PrimitiveBuilder::::with_capacity(values.len()); let mut b = PrimitiveDictionaryBuilder::new(keys_builder, values_builder); // copy each element one at a time @@ -2354,7 +2354,7 @@ where let cast_values = cast_with_options(array, &DataType::Utf8, cast_options)?; let values = cast_values.as_any().downcast_ref::().unwrap(); - let keys_builder = PrimitiveBuilder::::new(values.len()); + let keys_builder = PrimitiveBuilder::::with_capacity(values.len()); let values_builder = StringBuilder::new(values.len()); let mut b = StringDictionaryBuilder::new(keys_builder, values_builder); @@ -4752,7 +4752,7 @@ mod tests { // FROM a dictionary with of Utf8 values use DataType::*; - let keys_builder = PrimitiveBuilder::::new(10); + let keys_builder = PrimitiveBuilder::::new(); let values_builder = StringBuilder::new(10); let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder); builder.append("one").unwrap(); @@ -4813,8 +4813,8 @@ mod tests { // that are out of bounds for a particular other kind of // index. - let keys_builder = PrimitiveBuilder::::new(10); - let values_builder = PrimitiveBuilder::::new(10); + let keys_builder = PrimitiveBuilder::::new(); + let values_builder = PrimitiveBuilder::::new(); let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder); // add 200 distinct values (which can be stored by a @@ -4844,7 +4844,7 @@ mod tests { // Same test as test_cast_dict_to_dict_bad_index_value but use // string values (and encode the expected behavior here); - let keys_builder = PrimitiveBuilder::::new(10); + let keys_builder = PrimitiveBuilder::::new(); let values_builder = StringBuilder::new(10); let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder); @@ -4875,8 +4875,8 @@ mod tests { // FROM a dictionary with of INT32 values use DataType::*; - let keys_builder = PrimitiveBuilder::::new(10); - let values_builder = PrimitiveBuilder::::new(10); + let keys_builder = PrimitiveBuilder::::new(); + let values_builder = PrimitiveBuilder::::new(); let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder); builder.append(1).unwrap(); builder.append_null(); @@ -4899,7 +4899,7 @@ mod tests { fn test_cast_primitive_array_to_dict() { use DataType::*; - let mut builder = PrimitiveBuilder::::new(10); + let mut builder = PrimitiveBuilder::::new(); builder.append_value(1); builder.append_null(); builder.append_value(3); @@ -5375,9 +5375,9 @@ mod tests { /// Creates a dictionary with primitive dictionary values, and keys of type K fn make_dictionary_primitive() -> ArrayRef { - let keys_builder = PrimitiveBuilder::::new(2); + let keys_builder = PrimitiveBuilder::::new(); // Pick Int32 arbitrarily for dictionary values - let values_builder = PrimitiveBuilder::::new(2); + let values_builder = PrimitiveBuilder::::new(); let mut b = PrimitiveDictionaryBuilder::new(keys_builder, values_builder); b.append(1).unwrap(); b.append(2).unwrap(); @@ -5386,7 +5386,7 @@ mod tests { /// Creates a dictionary with utf8 values, and keys of type K fn make_dictionary_utf8() -> ArrayRef { - let keys_builder = PrimitiveBuilder::::new(2); + let keys_builder = PrimitiveBuilder::::new(); // Pick Int32 arbitrarily for dictionary values let values_builder = StringBuilder::new(2); let mut b = StringDictionaryBuilder::new(keys_builder, values_builder); diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index 02e0b65a3e0..398344fd4cd 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -4202,8 +4202,8 @@ mod tests { #[test] fn test_eq_dyn_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(123).unwrap(); builder.append_null(); @@ -4246,8 +4246,8 @@ mod tests { #[test] fn test_lt_dyn_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(123).unwrap(); builder.append_null(); @@ -4289,8 +4289,8 @@ mod tests { } #[test] fn test_lt_eq_dyn_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::new(); + let value_builder = PrimitiveBuilder::::new(); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(123).unwrap(); builder.append_null(); @@ -4333,8 +4333,8 @@ mod tests { #[test] fn test_gt_dyn_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(123).unwrap(); builder.append_null(); @@ -4377,8 +4377,8 @@ mod tests { #[test] fn test_gt_eq_dyn_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::new(); + let value_builder = PrimitiveBuilder::::new(); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(22).unwrap(); builder.append_null(); @@ -4421,8 +4421,8 @@ mod tests { #[test] fn test_neq_dyn_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::new(); + let value_builder = PrimitiveBuilder::::new(); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(22).unwrap(); builder.append_null(); @@ -4565,7 +4565,7 @@ mod tests { #[test] fn test_eq_dyn_utf8_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(3); + let key_builder = PrimitiveBuilder::::new(); let value_builder = StringBuilder::new(100); let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); builder.append("abc").unwrap(); @@ -4593,7 +4593,7 @@ mod tests { } #[test] fn test_lt_dyn_utf8_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(3); + let key_builder = PrimitiveBuilder::::new(); let value_builder = StringBuilder::new(100); let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); builder.append("abc").unwrap(); @@ -4622,7 +4622,7 @@ mod tests { } #[test] fn test_lt_eq_dyn_utf8_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(3); + let key_builder = PrimitiveBuilder::::new(); let value_builder = StringBuilder::new(100); let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); builder.append("abc").unwrap(); @@ -4651,7 +4651,7 @@ mod tests { } #[test] fn test_gt_eq_dyn_utf8_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(3); + let key_builder = PrimitiveBuilder::::new(); let value_builder = StringBuilder::new(100); let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); builder.append("abc").unwrap(); @@ -4681,7 +4681,7 @@ mod tests { #[test] fn test_gt_dyn_utf8_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(3); + let key_builder = PrimitiveBuilder::::new(); let value_builder = StringBuilder::new(100); let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); builder.append("abc").unwrap(); @@ -4710,7 +4710,7 @@ mod tests { } #[test] fn test_neq_dyn_utf8_scalar_with_dict() { - let key_builder = PrimitiveBuilder::::new(3); + let key_builder = PrimitiveBuilder::::new(); let value_builder = StringBuilder::new(100); let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); builder.append("abc").unwrap(); diff --git a/arrow/src/compute/kernels/filter.rs b/arrow/src/compute/kernels/filter.rs index 95a1bfa4da4..90cfab327a6 100644 --- a/arrow/src/compute/kernels/filter.rs +++ b/arrow/src/compute/kernels/filter.rs @@ -1416,7 +1416,7 @@ mod tests { #[test] fn test_filter_map() { let mut builder = - MapBuilder::new(None, StringBuilder::new(16), Int64Builder::new(4)); + MapBuilder::new(None, StringBuilder::new(16), Int64Builder::with_capacity(4)); // [{"key1": 1}, {"key2": 2, "key3": 3}, null, {"key1": 1} builder.keys().append_value("key1"); builder.values().append_value(1); @@ -1438,7 +1438,7 @@ mod tests { let got = filter(&maparray, &indices).unwrap(); let mut builder = - MapBuilder::new(None, StringBuilder::new(8), Int64Builder::new(2)); + MapBuilder::new(None, StringBuilder::new(8), Int64Builder::with_capacity(2)); builder.keys().append_value("key1"); builder.values().append_value(1); builder.append(true).unwrap(); diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs index fb8f7565188..6bddab6ec6f 100644 --- a/arrow/src/compute/kernels/take.rs +++ b/arrow/src/compute/kernels/take.rs @@ -1084,7 +1084,7 @@ mod tests { ], vec![ Box::new(BooleanBuilder::new(values.len())), - Box::new(Int32Builder::new(values.len())), + Box::new(Int32Builder::with_capacity(values.len())), ], ); @@ -2039,7 +2039,7 @@ mod tests { #[test] fn test_take_dict() { - let keys_builder = Int16Builder::new(8); + let keys_builder = Int16Builder::new(); let values_builder = StringBuilder::new(4); let mut dict_builder = StringDictionaryBuilder::new(keys_builder, values_builder); diff --git a/arrow/src/compute/kernels/temporal.rs b/arrow/src/compute/kernels/temporal.rs index d917a6ef3c9..b24a6333f5f 100644 --- a/arrow/src/compute/kernels/temporal.rs +++ b/arrow/src/compute/kernels/temporal.rs @@ -174,7 +174,7 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::new(array.len()); + let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Time32(_) | &DataType::Time64(_) => { extract_component_from_array!(array, b, hour, value_as_time) @@ -205,7 +205,7 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::new(array.len()); + let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, _) => { extract_component_from_array!(array, b, year, value_as_datetime) @@ -222,7 +222,7 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::new(array.len()); + let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { extract_component_from_array!(array, b, quarter, value_as_datetime) @@ -250,7 +250,7 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::new(array.len()); + let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { extract_component_from_array!(array, b, month, value_as_datetime) @@ -283,7 +283,7 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::new(array.len()); + let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { extract_component_from_array!( @@ -321,7 +321,7 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::new(array.len()); + let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { extract_component_from_array!( @@ -354,7 +354,7 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::new(array.len()); + let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { extract_component_from_array!(array, b, day, value_as_datetime) @@ -383,7 +383,7 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::new(array.len()); + let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { extract_component_from_array!(array, b, ordinal, value_as_datetime) @@ -411,7 +411,7 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::new(array.len()); + let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date64 | &DataType::Timestamp(_, None) => { extract_component_from_array!(array, b, minute, value_as_datetime) @@ -439,7 +439,7 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::new(array.len()); + let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => { @@ -457,7 +457,7 @@ where T: ArrowTemporalType + ArrowNumericType, i64: std::convert::From, { - let mut b = Int32Builder::new(array.len()); + let mut b = Int32Builder::with_capacity(array.len()); match array.data_type() { &DataType::Date64 | &DataType::Timestamp(_, None) => { extract_component_from_array!(array, b, second, value_as_datetime) diff --git a/arrow/src/json/reader.rs b/arrow/src/json/reader.rs index 66fdc691887..ee8f5bc1216 100644 --- a/arrow/src/json/reader.rs +++ b/arrow/src/json/reader.rs @@ -901,7 +901,7 @@ impl Decoder { where T: ArrowPrimitiveType + ArrowDictionaryKeyType, { - let key_builder = PrimitiveBuilder::::new(row_len); + let key_builder = PrimitiveBuilder::::with_capacity(row_len); let values_builder = StringBuilder::new(row_len * 5); StringDictionaryBuilder::new(key_builder, values_builder) } diff --git a/arrow/src/util/integration_util.rs b/arrow/src/util/integration_util.rs index ee5c947a2ff..24a07267272 100644 --- a/arrow/src/util/integration_util.rs +++ b/arrow/src/util/integration_util.rs @@ -300,7 +300,7 @@ pub fn array_from_json( Ok(Arc::new(b.finish())) } DataType::Int8 => { - let mut b = Int8Builder::new(json_col.count); + let mut b = Int8Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity .as_ref() @@ -321,7 +321,7 @@ pub fn array_from_json( Ok(Arc::new(b.finish())) } DataType::Int16 => { - let mut b = Int16Builder::new(json_col.count); + let mut b = Int16Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity .as_ref() @@ -340,7 +340,7 @@ pub fn array_from_json( | DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => { - let mut b = Int32Builder::new(json_col.count); + let mut b = Int32Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity .as_ref() @@ -362,7 +362,7 @@ pub fn array_from_json( | DataType::Timestamp(_, _) | DataType::Duration(_) | DataType::Interval(IntervalUnit::DayTime) => { - let mut b = Int64Builder::new(json_col.count); + let mut b = Int64Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity .as_ref() @@ -418,7 +418,7 @@ pub fn array_from_json( compute::cast(&array, field.data_type()) } DataType::UInt8 => { - let mut b = UInt8Builder::new(json_col.count); + let mut b = UInt8Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity .as_ref() @@ -434,7 +434,7 @@ pub fn array_from_json( Ok(Arc::new(b.finish())) } DataType::UInt16 => { - let mut b = UInt16Builder::new(json_col.count); + let mut b = UInt16Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity .as_ref() @@ -450,7 +450,7 @@ pub fn array_from_json( Ok(Arc::new(b.finish())) } DataType::UInt32 => { - let mut b = UInt32Builder::new(json_col.count); + let mut b = UInt32Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity .as_ref() @@ -466,7 +466,7 @@ pub fn array_from_json( Ok(Arc::new(b.finish())) } DataType::UInt64 => { - let mut b = UInt64Builder::new(json_col.count); + let mut b = UInt64Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity .as_ref() @@ -498,7 +498,7 @@ pub fn array_from_json( Ok(Arc::new(b.finish())) } DataType::Interval(IntervalUnit::MonthDayNano) => { - let mut b = IntervalMonthDayNanoBuilder::new(json_col.count); + let mut b = IntervalMonthDayNanoBuilder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity .as_ref() @@ -541,7 +541,7 @@ pub fn array_from_json( Ok(Arc::new(b.finish())) } DataType::Float32 => { - let mut b = Float32Builder::new(json_col.count); + let mut b = Float32Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity .as_ref() @@ -557,7 +557,7 @@ pub fn array_from_json( Ok(Arc::new(b.finish())) } DataType::Float64 => { - let mut b = Float64Builder::new(json_col.count); + let mut b = Float64Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity .as_ref() From 41cb5ef560382dbdb2527ccd561e2de09ca311b2 Mon Sep 17 00:00:00 2001 From: psvri Date: Thu, 18 Aug 2022 19:00:11 +0000 Subject: [PATCH 2/5] Bug fixes --- arrow/benches/builder.rs | 4 ++-- arrow/benches/string_dictionary_builder.rs | 2 +- arrow/benches/take_kernels.rs | 2 +- arrow/src/util/pretty.rs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs index 691cd068312..1174377bfd3 100644 --- a/arrow/benches/builder.rs +++ b/arrow/benches/builder.rs @@ -43,7 +43,7 @@ fn bench_primitive(c: &mut Criterion) { )); group.bench_function("bench_primitive", |b| { b.iter(|| { - let mut builder = Int64Builder::new(64); + let mut builder = Int64Builder::with_capacity(64); for _ in 0..NUM_BATCHES { builder.append_slice(&data[..]); } @@ -57,7 +57,7 @@ fn bench_primitive_nulls(c: &mut Criterion) { let mut group = c.benchmark_group("bench_primitive_nulls"); group.bench_function("bench_primitive_nulls", |b| { b.iter(|| { - let mut builder = UInt8Builder::new(64); + let mut builder = UInt8Builder::with_capacity(64); for _ in 0..NUM_BATCHES * BATCH_SIZE { builder.append_null(); } diff --git a/arrow/benches/string_dictionary_builder.rs b/arrow/benches/string_dictionary_builder.rs index bc014bec155..267288650d2 100644 --- a/arrow/benches/string_dictionary_builder.rs +++ b/arrow/benches/string_dictionary_builder.rs @@ -43,7 +43,7 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { let strings = build_strings(dict_size, total_size, key_len); b.iter(|| { - let keys = Int32Builder::new(strings.len()); + let keys = Int32Builder::with_capacity(strings.len()); let values = StringBuilder::new((key_len + 1) * dict_size); let mut builder = StringDictionaryBuilder::new(keys, values); diff --git a/arrow/benches/take_kernels.rs b/arrow/benches/take_kernels.rs index 42b38b6e5d2..c4677cc7261 100644 --- a/arrow/benches/take_kernels.rs +++ b/arrow/benches/take_kernels.rs @@ -30,7 +30,7 @@ use arrow::{array::*, util::bench_util::*}; fn create_random_index(size: usize, null_density: f32) -> UInt32Array { let mut rng = seedable_rng(); - let mut builder = UInt32Builder::new(size); + let mut builder = UInt32Builder::with_capacity(size); for _ in 0..size { if rng.gen::() < null_density { builder.append_null(); diff --git a/arrow/src/util/pretty.rs b/arrow/src/util/pretty.rs index 6f4d9e34a99..3ebecdb9957 100644 --- a/arrow/src/util/pretty.rs +++ b/arrow/src/util/pretty.rs @@ -241,7 +241,7 @@ mod tests { DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)); let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)])); - let keys_builder = PrimitiveBuilder::::new(10); + let keys_builder = PrimitiveBuilder::::with_capacity(10); let values_builder = StringBuilder::new(10); let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder); From 31a3cff802cb70b23336ec5bcdebc3a3b5fb27a7 Mon Sep 17 00:00:00 2001 From: psvri Date: Thu, 18 Aug 2022 19:28:23 +0000 Subject: [PATCH 3/5] fix parquet erros --- parquet/src/arrow/arrow_reader/mod.rs | 3 ++- parquet/src/arrow/arrow_writer/levels.rs | 2 +- parquet/src/arrow/arrow_writer/mod.rs | 8 ++++---- parquet/src/arrow/buffer/converter.rs | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 1247e4399e6..86d44ccc027 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -1972,7 +1972,8 @@ mod tests { ) .unwrap(); for _ in 0..2 { - let mut list_builder = ListBuilder::new(Int32Builder::new(batch_size)); + let mut list_builder = + ListBuilder::new(Int32Builder::with_capacity(batch_size)); for _ in 0..(batch_size) { list_builder.append(true); } diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs index 9a90d40d5a8..49f997ac81f 100644 --- a/parquet/src/arrow/arrow_writer/levels.rs +++ b/parquet/src/arrow/arrow_writer/levels.rs @@ -1188,7 +1188,7 @@ mod tests { Field::new("item", DataType::Struct(vec![int_field.clone()]), true); let list_field = Field::new("list", DataType::List(Box::new(item_field)), true); - let int_builder = Int32Builder::new(10); + let int_builder = Int32Builder::with_capacity(10); let struct_builder = StructBuilder::new(vec![int_field], vec![Box::new(int_builder)]); let mut list_builder = ListBuilder::new(struct_builder); diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index d09cb712ea2..badd4414a8e 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -1653,8 +1653,8 @@ mod tests { )])); // create some data - let key_builder = PrimitiveBuilder::::new(3); - let value_builder = PrimitiveBuilder::::new(2); + let key_builder = PrimitiveBuilder::::with_capacity(3); + let value_builder = PrimitiveBuilder::::with_capacity(2); let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); builder.append(12345678).unwrap(); builder.append_null(); @@ -1792,8 +1792,8 @@ mod tests { let int_field = Field::new("a", DataType::Int32, true); let int_field2 = Field::new("b", DataType::Int32, true); - let int_builder = Int32Builder::new(10); - let int_builder2 = Int32Builder::new(10); + let int_builder = Int32Builder::with_capacity(10); + let int_builder2 = Int32Builder::with_capacity(10); let struct_builder = StructBuilder::new( vec![int_field, int_field2], diff --git a/parquet/src/arrow/buffer/converter.rs b/parquet/src/arrow/buffer/converter.rs index aeca548bde7..73c2bf670d8 100644 --- a/parquet/src/arrow/buffer/converter.rs +++ b/parquet/src/arrow/buffer/converter.rs @@ -130,7 +130,7 @@ impl Converter>, IntervalYearMonthArray> &self, source: Vec>, ) -> Result { - let mut builder = IntervalYearMonthBuilder::new(source.len()); + let mut builder = IntervalYearMonthBuilder::with_capacity(source.len()); for v in source { match v { Some(array) => builder.append_value(i32::from_le_bytes( @@ -155,7 +155,7 @@ impl Converter>, IntervalDayTimeArray> &self, source: Vec>, ) -> Result { - let mut builder = IntervalDayTimeBuilder::new(source.len()); + let mut builder = IntervalDayTimeBuilder::with_capacity(source.len()); for v in source { match v { Some(array) => builder.append_value(i64::from_le_bytes( From 0256e75cfdc79efd2949903913e3144cffe76872 Mon Sep 17 00:00:00 2001 From: psvri Date: Sat, 20 Aug 2022 08:21:05 +0000 Subject: [PATCH 4/5] Fixing cargo fmt --- arrow/src/array/builder/struct_builder.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/arrow/src/array/builder/struct_builder.rs b/arrow/src/array/builder/struct_builder.rs index 905f2522753..48fcc86868b 100644 --- a/arrow/src/array/builder/struct_builder.rs +++ b/arrow/src/array/builder/struct_builder.rs @@ -320,7 +320,6 @@ mod tests { #[test] fn test_struct_array_builder_finish() { - let int_builder = Int32Builder::new(); let bool_builder = BooleanBuilder::new(); From 3e8cdbf277babac81ded29215e4e34218c8e0083 Mon Sep 17 00:00:00 2001 From: psvri Date: Sat, 20 Aug 2022 16:45:59 +0000 Subject: [PATCH 5/5] impl defualt for primitive builder --- arrow/src/array/builder/primitive_builder.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arrow/src/array/builder/primitive_builder.rs b/arrow/src/array/builder/primitive_builder.rs index fad0079fe9d..38c8b447147 100644 --- a/arrow/src/array/builder/primitive_builder.rs +++ b/arrow/src/array/builder/primitive_builder.rs @@ -64,14 +64,19 @@ impl ArrayBuilder for PrimitiveBuilder { } } +impl Default for PrimitiveBuilder { + fn default() -> Self { + Self::new() + } +} + impl PrimitiveBuilder { /// Creates a new primitive array builder - #[allow(clippy::new_without_default)] pub fn new() -> Self { Self::with_capacity(1024) } - /// Creates a new primitive array builder with capacity + /// Creates a new primitive array builder with capacity no of items pub fn with_capacity(capacity: usize) -> Self { Self { values_builder: BufferBuilder::::new(capacity),