From 904a95d9bed707c995b801f09a3464fb1244c1fd Mon Sep 17 00:00:00 2001 From: Willi Raschkowski Date: Sun, 9 Feb 2025 01:31:57 +0000 Subject: [PATCH 1/8] Support setting key field metadata in MapBuilder --- arrow-array/src/builder/map_builder.rs | 83 ++++++++++++++++++++--- arrow-array/src/builder/struct_builder.rs | 1 + 2 files changed, 76 insertions(+), 8 deletions(-) diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs index 1d89d427aae1..97e4cf901db5 100644 --- a/arrow-array/src/builder/map_builder.rs +++ b/arrow-array/src/builder/map_builder.rs @@ -61,6 +61,7 @@ pub struct MapBuilder { field_names: MapFieldNames, key_builder: K, value_builder: V, + key_field: Option, value_field: Option, } @@ -107,13 +108,32 @@ impl MapBuilder { field_names: field_names.unwrap_or_default(), key_builder, value_builder, + key_field: None, value_field: None, } } /// Override the field passed to [`MapBuilder::new`] /// - /// By default a nullable field is created with the name `values` + /// By default, a nullable field is created with the name `keys` + /// + /// This function panics if the given field is nullable as map keys are not + /// allowed to be null + /// + /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the + /// field's data type does not match that of `K` + pub fn with_keys_field(self, field: impl Into) -> Self { + let field: FieldRef = field.into(); + assert!(!field.is_nullable(), "Key field must not be nullable"); + Self { + key_field: Some(field), + ..self + } + } + + /// Override the field passed to [`MapBuilder::new`] + /// + /// By default, a nullable field is created with the name `values` /// /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the /// field's data type does not match that of `V` @@ -194,11 +214,14 @@ impl MapBuilder { keys_arr.null_count() ); - let keys_field = Arc::new(Field::new( - self.field_names.key.as_str(), - keys_arr.data_type().clone(), - false, // always non-nullable - )); + let keys_field = match &self.key_field { + Some(f) => f.clone(), + None => Arc::new(Field::new( + self.field_names.key.as_str(), + keys_arr.data_type().clone(), + false, // always non-nullable + )), + }; let values_field = match &self.value_field { Some(f) => f.clone(), None => Arc::new(Field::new( @@ -262,10 +285,10 @@ impl ArrayBuilder for MapBuilder { #[cfg(test)] mod tests { + use super::*; use crate::builder::{make_builder, Int32Builder, StringBuilder}; use crate::{Int32Array, StringArray}; - - use super::*; + use std::collections::HashMap; #[test] #[should_panic(expected = "Keys array must have no null values, found 1 null value(s)")] @@ -377,4 +400,48 @@ mod tests { ) ); } + + #[test] + fn test_with_keys_field() { + let mut key_metadata = HashMap::new(); + key_metadata.insert("foo".to_string(), "bar".to_string()); + let key_field = Arc::new( + Field::new("keys", DataType::Int32, false).with_metadata(key_metadata.clone()), + ); + let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()) + .with_keys_field(key_field.clone()); + builder.keys().append_value(1); + builder.values().append_value(2); + builder.append(true).unwrap(); + let map = builder.finish(); + + assert_eq!(map.len(), 1); + assert_eq!( + map.data_type(), + &DataType::Map( + Arc::new(Field::new( + "entries", + DataType::Struct( + vec![ + Arc::new( + Field::new("keys", DataType::Int32, false) + .with_metadata(key_metadata) + ), + Arc::new(Field::new("values", DataType::Int32, true)) + ] + .into() + ), + false, + )), + false + ) + ); + } + + #[test] + #[should_panic(expected = "Key field must not be nullable")] + fn test_with_nullable_keys_field() { + let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()) + .with_keys_field(Arc::new(Field::new("keys", DataType::Int32, true))); + } } diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 4a40c2201746..1a298de52e13 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -296,6 +296,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box Date: Sun, 9 Feb 2025 12:05:03 +0000 Subject: [PATCH 2/8] Return ArrowError for nullable keys --- arrow-array/src/builder/map_builder.rs | 23 ++++++++++++++--------- arrow-array/src/builder/struct_builder.rs | 3 ++- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs index 97e4cf901db5..b14722cda080 100644 --- a/arrow-array/src/builder/map_builder.rs +++ b/arrow-array/src/builder/map_builder.rs @@ -117,18 +117,21 @@ impl MapBuilder { /// /// By default, a nullable field is created with the name `keys` /// - /// This function panics if the given field is nullable as map keys are not - /// allowed to be null + /// Returns an error if the given field is nullable. /// /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the /// field's data type does not match that of `K` - pub fn with_keys_field(self, field: impl Into) -> Self { + pub fn with_keys_field(self, field: impl Into) -> Result { let field: FieldRef = field.into(); - assert!(!field.is_nullable(), "Key field must not be nullable"); - Self { + if field.is_nullable() { + return Err(ArrowError::InvalidArgumentError(format!( + "Key field must not be nullable: {field:?}" + ))); + } + Ok(Self { key_field: Some(field), ..self - } + }) } /// Override the field passed to [`MapBuilder::new`] @@ -409,7 +412,8 @@ mod tests { Field::new("keys", DataType::Int32, false).with_metadata(key_metadata.clone()), ); let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()) - .with_keys_field(key_field.clone()); + .with_keys_field(key_field.clone()) + .unwrap(); builder.keys().append_value(1); builder.values().append_value(2); builder.append(true).unwrap(); @@ -439,9 +443,10 @@ mod tests { } #[test] - #[should_panic(expected = "Key field must not be nullable")] fn test_with_nullable_keys_field() { - let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()) + let result = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()) .with_keys_field(Arc::new(Field::new("keys", DataType::Int32, true))); + + assert!(result.is_err()); } } diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 1a298de52e13..201b87210962 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -297,10 +297,11 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box panic!("The field of Map data type {t:?} should has a child Struct field"), + t => panic!("The field of Map data type {t:?} should have a child Struct field"), }, DataType::Struct(fields) => Box::new(StructBuilder::from_fields(fields.clone(), capacity)), t @ DataType::Dictionary(key_type, value_type) => { From 30215506632d51884e9766cade7873edb6d83952 Mon Sep 17 00:00:00 2001 From: Willi Raschkowski Date: Tue, 11 Feb 2025 22:49:31 +0000 Subject: [PATCH 3/8] Panic instead of returning Result --- arrow-array/src/builder/map_builder.rs | 20 +++++++------------- arrow-array/src/builder/struct_builder.rs | 1 - 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs index b14722cda080..8c408ea9bacf 100644 --- a/arrow-array/src/builder/map_builder.rs +++ b/arrow-array/src/builder/map_builder.rs @@ -121,17 +121,13 @@ impl MapBuilder { /// /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the /// field's data type does not match that of `K` - pub fn with_keys_field(self, field: impl Into) -> Result { + pub fn with_keys_field(self, field: impl Into) -> Self { let field: FieldRef = field.into(); - if field.is_nullable() { - return Err(ArrowError::InvalidArgumentError(format!( - "Key field must not be nullable: {field:?}" - ))); - } - Ok(Self { + assert!(!field.is_nullable(), "Keys field must not be nullable"); + Self { key_field: Some(field), ..self - }) + } } /// Override the field passed to [`MapBuilder::new`] @@ -412,8 +408,7 @@ mod tests { Field::new("keys", DataType::Int32, false).with_metadata(key_metadata.clone()), ); let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()) - .with_keys_field(key_field.clone()) - .unwrap(); + .with_keys_field(key_field.clone()); builder.keys().append_value(1); builder.values().append_value(2); builder.append(true).unwrap(); @@ -443,10 +438,9 @@ mod tests { } #[test] + #[should_panic(expected = "Keys field must not be nullable")] fn test_with_nullable_keys_field() { - let result = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()) + MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()) .with_keys_field(Arc::new(Field::new("keys", DataType::Int32, true))); - - assert!(result.is_err()); } } diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 201b87210962..5cebc6485e0c 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -297,7 +297,6 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box Date: Tue, 11 Feb 2025 23:02:04 +0000 Subject: [PATCH 4/8] Update doc --- arrow-array/src/builder/map_builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs index 8c408ea9bacf..1ca2fe1d044c 100644 --- a/arrow-array/src/builder/map_builder.rs +++ b/arrow-array/src/builder/map_builder.rs @@ -117,7 +117,7 @@ impl MapBuilder { /// /// By default, a nullable field is created with the name `keys` /// - /// Returns an error if the given field is nullable. + /// Panics if the given field is nullable as map keys are not allowed to be null /// /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the /// field's data type does not match that of `K` From 2e5f1be09dc2ce8badf9b1ee56623281b6757ad6 Mon Sep 17 00:00:00 2001 From: Willi Raschkowski Date: Tue, 11 Feb 2025 23:05:13 +0000 Subject: [PATCH 5/8] Move non-nullable keys field validation into finish --- arrow-array/src/builder/map_builder.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs index 1ca2fe1d044c..a06414b57da0 100644 --- a/arrow-array/src/builder/map_builder.rs +++ b/arrow-array/src/builder/map_builder.rs @@ -122,10 +122,8 @@ impl MapBuilder { /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the /// field's data type does not match that of `K` pub fn with_keys_field(self, field: impl Into) -> Self { - let field: FieldRef = field.into(); - assert!(!field.is_nullable(), "Keys field must not be nullable"); Self { - key_field: Some(field), + key_field: Some(field.into()), ..self } } @@ -214,7 +212,10 @@ impl MapBuilder { ); let keys_field = match &self.key_field { - Some(f) => f.clone(), + Some(f) => { + assert!(!f.is_nullable(), "Keys field must not be nullable"); + f.clone() + } None => Arc::new(Field::new( self.field_names.key.as_str(), keys_arr.data_type().clone(), @@ -440,7 +441,13 @@ mod tests { #[test] #[should_panic(expected = "Keys field must not be nullable")] fn test_with_nullable_keys_field() { - MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()) + let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()) .with_keys_field(Arc::new(Field::new("keys", DataType::Int32, true))); + + builder.keys().append_value(1); + builder.values().append_value(2); + builder.append(true).unwrap(); + + builder.finish(); } } From ee07f5b8222e45f042c5f1804ed40ab073e024d2 Mon Sep 17 00:00:00 2001 From: Willi Raschkowski Date: Tue, 11 Feb 2025 23:15:01 +0000 Subject: [PATCH 6/8] Oops, update doc after moving panic to finish --- arrow-array/src/builder/map_builder.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs index a06414b57da0..20eb151538aa 100644 --- a/arrow-array/src/builder/map_builder.rs +++ b/arrow-array/src/builder/map_builder.rs @@ -117,10 +117,8 @@ impl MapBuilder { /// /// By default, a nullable field is created with the name `keys` /// - /// Panics if the given field is nullable as map keys are not allowed to be null - /// /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the - /// field's data type does not match that of `K` + /// field's data type does not match that of `K` or the field is nullable pub fn with_keys_field(self, field: impl Into) -> Self { Self { key_field: Some(field.into()), From e46da4a1f844015860ac1c8f4753a35252940144 Mon Sep 17 00:00:00 2001 From: Willi Raschkowski Date: Tue, 11 Feb 2025 23:23:42 +0000 Subject: [PATCH 7/8] Assert that keys_field type mismatch results in panic --- arrow-array/src/builder/map_builder.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs index 20eb151538aa..219e904f6231 100644 --- a/arrow-array/src/builder/map_builder.rs +++ b/arrow-array/src/builder/map_builder.rs @@ -448,4 +448,17 @@ mod tests { builder.finish(); } + + #[test] + #[should_panic(expected = "Incorrect datatype")] + fn test_keys_field_type_mismatch() { + let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()) + .with_keys_field(Arc::new(Field::new("keys", DataType::Utf8, false))); + + builder.keys().append_value(1); + builder.values().append_value(2); + builder.append(true).unwrap(); + + builder.finish(); + } } From 212c7fafd241dd81249024f1013601e3afa33609 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Wed, 12 Feb 2025 10:03:48 +0000 Subject: [PATCH 8/8] Update arrow-array/src/builder/map_builder.rs --- arrow-array/src/builder/map_builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs index 219e904f6231..012a454e76c9 100644 --- a/arrow-array/src/builder/map_builder.rs +++ b/arrow-array/src/builder/map_builder.rs @@ -115,7 +115,7 @@ impl MapBuilder { /// Override the field passed to [`MapBuilder::new`] /// - /// By default, a nullable field is created with the name `keys` + /// By default, a non-nullable field is created with the name `keys` /// /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the /// field's data type does not match that of `K` or the field is nullable