From 650d3ea654133df7d953b0a0884a880464d91aae Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 1 Oct 2024 10:37:23 -0400 Subject: [PATCH 1/4] Refactor tests for union sorting properties --- .../physical-expr/src/equivalence/ordering.rs | 25 +- .../src/equivalence/properties.rs | 813 ++++++++++-------- 2 files changed, 469 insertions(+), 369 deletions(-) diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs index 65423033d5e0..2b3b430b2fbd 100644 --- a/datafusion/physical-expr/src/equivalence/ordering.rs +++ b/datafusion/physical-expr/src/equivalence/ordering.rs @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -use std::fmt::Display; -use std::hash::Hash; -use std::sync::Arc; - use crate::equivalence::add_offset_to_expr; use crate::{LexOrdering, PhysicalExpr, PhysicalSortExpr}; use arrow_schema::SortOptions; +use std::fmt::Display; +use std::hash::Hash; +use std::sync::Arc; +use std::vec::IntoIter; /// An `OrderingEquivalenceClass` object keeps track of different alternative /// orderings than can describe a schema. For example, consider the following table: @@ -36,7 +36,7 @@ use arrow_schema::SortOptions; /// /// Here, both `vec![a ASC, b ASC]` and `vec![c DESC, d ASC]` describe the table /// ordering. In this case, we say that these orderings are equivalent. -#[derive(Debug, Clone, Eq, PartialEq, Hash)] +#[derive(Debug, Clone, Eq, PartialEq, Hash, Default)] pub struct OrderingEquivalenceClass { pub orderings: Vec, } @@ -44,7 +44,7 @@ pub struct OrderingEquivalenceClass { impl OrderingEquivalenceClass { /// Creates new empty ordering equivalence class. pub fn empty() -> Self { - Self { orderings: vec![] } + Default::default() } /// Clears (empties) this ordering equivalence class. @@ -197,6 +197,15 @@ impl OrderingEquivalenceClass { } } +impl IntoIterator for OrderingEquivalenceClass { + type Item = LexOrdering; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.orderings.into_iter() + } +} + /// This function constructs a duplicate-free `LexOrdering` by filtering out /// duplicate entries that have same physical expression inside. For example, /// `vec![a ASC, a DESC]` collapses to `vec![a ASC]`. @@ -229,10 +238,10 @@ impl Display for OrderingEquivalenceClass { write!(f, "[")?; let mut iter = self.orderings.iter(); if let Some(ordering) = iter.next() { - write!(f, "{}", PhysicalSortExpr::format_list(ordering))?; + write!(f, "[{}]", PhysicalSortExpr::format_list(ordering))?; } for ordering in iter { - write!(f, "{}", PhysicalSortExpr::format_list(ordering))?; + write!(f, ", [{}]", PhysicalSortExpr::format_list(ordering))?; } write!(f, "]")?; Ok(()) diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs index dc59a1eb835b..367429778bca 100644 --- a/datafusion/physical-expr/src/equivalence/properties.rs +++ b/datafusion/physical-expr/src/equivalence/properties.rs @@ -2708,379 +2708,428 @@ mod tests { )) } - #[tokio::test] - async fn test_union_equivalence_properties_multi_children() -> Result<()> { - let schema = create_test_schema()?; + #[test] + fn test_union_equivalence_properties_multi_children_1() { + let schema = create_test_schema().unwrap(); let schema2 = append_fields(&schema, "1"); let schema3 = append_fields(&schema, "2"); - let test_cases = vec![ - // --------- TEST CASE 1 ---------- - ( - vec![ - // Children 1 - ( - // Orderings - vec![vec!["a", "b", "c"]], - Arc::clone(&schema), - ), - // Children 2 - ( - // Orderings - vec![vec!["a1", "b1", "c1"]], - Arc::clone(&schema2), - ), - // Children 3 - ( - // Orderings - vec![vec!["a2", "b2"]], - Arc::clone(&schema3), - ), - ], - // Expected - vec![vec!["a", "b"]], - ), - // --------- TEST CASE 2 ---------- - ( - vec![ - // Children 1 - ( - // Orderings - vec![vec!["a", "b", "c"]], - Arc::clone(&schema), - ), - // Children 2 - ( - // Orderings - vec![vec!["a1", "b1", "c1"]], - Arc::clone(&schema2), - ), - // Children 3 - ( - // Orderings - vec![vec!["a2", "b2", "c2"]], - Arc::clone(&schema3), - ), - ], - // Expected - vec![vec!["a", "b", "c"]], - ), - // --------- TEST CASE 3 ---------- - ( - vec![ - // Children 1 - ( - // Orderings - vec![vec!["a", "b"]], - Arc::clone(&schema), - ), - // Children 2 - ( - // Orderings - vec![vec!["a1", "b1", "c1"]], - Arc::clone(&schema2), - ), - // Children 3 - ( - // Orderings - vec![vec!["a2", "b2", "c2"]], - Arc::clone(&schema3), - ), - ], - // Expected - vec![vec!["a", "b"]], - ), - // --------- TEST CASE 4 ---------- - ( - vec![ - // Children 1 - ( - // Orderings - vec![vec!["a", "b"]], - Arc::clone(&schema), - ), - // Children 2 - ( - // Orderings - vec![vec!["a1", "b1"]], - Arc::clone(&schema2), - ), - // Children 3 - ( - // Orderings - vec![vec!["b2", "c2"]], - Arc::clone(&schema3), - ), - ], - // Expected - vec![], - ), - // --------- TEST CASE 5 ---------- - ( - vec![ - // Children 1 - ( - // Orderings - vec![vec!["a", "b"], vec!["c"]], - Arc::clone(&schema), - ), - // Children 2 - ( - // Orderings - vec![vec!["a1", "b1"], vec!["c1"]], - Arc::clone(&schema2), - ), - ], - // Expected - vec![vec!["a", "b"], vec!["c"]], - ), - ]; - for (children, expected) in test_cases { - let children_eqs = children - .iter() - .map(|(orderings, schema)| { - let orderings = orderings - .iter() - .map(|ordering| { - ordering - .iter() - .map(|name| PhysicalSortExpr { - expr: col(name, schema).unwrap(), - options: SortOptions::default(), - }) - .collect::>() - }) - .collect::>(); - EquivalenceProperties::new_with_orderings( - Arc::clone(schema), - &orderings, - ) - }) - .collect::>(); - let actual = calculate_union(children_eqs, Arc::clone(&schema))?; + UnionEquivalenceTest::new(&schema) + // Children 1 + .with_child_sort(vec![vec!["a", "b", "c"]], &schema) + // Children 2 + .with_child_sort(vec![vec!["a1", "b1", "c1"]], &schema2) + // Children 3 + .with_child_sort(vec![vec!["a2", "b2"]], &schema3) + .with_expected_sort(vec![vec!["a", "b"]]) + .run() + } - let expected_ordering = expected - .into_iter() - .map(|ordering| { - ordering - .into_iter() - .map(|name| PhysicalSortExpr { - expr: col(name, &schema).unwrap(), - options: SortOptions::default(), - }) - .collect::>() - }) - .collect::>(); - let expected = EquivalenceProperties::new_with_orderings( - Arc::clone(&schema), - &expected_ordering, - ); - assert_eq_properties_same( - &actual, - &expected, - format!("expected: {:?}, actual: {:?}", expected, actual), - ); - } - Ok(()) + #[test] + fn test_union_equivalence_properties_multi_children_2() { + let schema = create_test_schema().unwrap(); + let schema2 = append_fields(&schema, "1"); + let schema3 = append_fields(&schema, "2"); + UnionEquivalenceTest::new(&schema) + // Children 1 + .with_child_sort(vec![vec!["a", "b", "c"]], &schema) + // Children 2 + .with_child_sort(vec![vec!["a1", "b1", "c1"]], &schema2) + // Children 3 + .with_child_sort(vec![vec!["a2", "b2", "c2"]], &schema3) + .with_expected_sort(vec![vec!["a", "b", "c"]]) + .run() } - #[tokio::test] - async fn test_union_equivalence_properties_binary() -> Result<()> { - let schema = create_test_schema()?; + #[test] + fn test_union_equivalence_properties_multi_children_3() { + let schema = create_test_schema().unwrap(); let schema2 = append_fields(&schema, "1"); - let col_a = &col("a", &schema)?; - let col_b = &col("b", &schema)?; - let col_c = &col("c", &schema)?; - let col_a1 = &col("a1", &schema2)?; - let col_b1 = &col("b1", &schema2)?; - let options = SortOptions::default(); - let options_desc = !SortOptions::default(); - let test_cases = [ - //-----------TEST CASE 1----------// - ( - ( - // First child orderings - vec![ - // [a ASC] - (vec![(col_a, options)]), - ], - // First child constants - vec![col_b, col_c], - Arc::clone(&schema), - ), - ( - // Second child orderings - vec![ - // [b ASC] - (vec![(col_b, options)]), - ], - // Second child constants - vec![col_a, col_c], - Arc::clone(&schema), - ), - ( - // Union expected orderings - vec![ - // [a ASC] - vec![(col_a, options)], - // [b ASC] - vec![(col_b, options)], - ], - // Union - vec![col_c], - ), - ), - //-----------TEST CASE 2----------// + let schema3 = append_fields(&schema, "2"); + UnionEquivalenceTest::new(&schema) + // Children 1 + .with_child_sort(vec![vec!["a", "b"]], &schema) + // Children 2 + .with_child_sort(vec![vec!["a1", "b1", "c1"]], &schema2) + // Children 3 + .with_child_sort(vec![vec!["a2", "b2", "c2"]], &schema3) + .with_expected_sort(vec![vec!["a", "b"]]) + .run() + } + + #[test] + fn test_union_equivalence_properties_multi_children_4() { + let schema = create_test_schema().unwrap(); + let schema2 = append_fields(&schema, "1"); + let schema3 = append_fields(&schema, "2"); + UnionEquivalenceTest::new(&schema) + // Children 1 + .with_child_sort(vec![vec!["a", "b"]], &schema) + // Children 2 + .with_child_sort(vec![vec!["a1", "b1"]], &schema2) + // Children 3 + .with_child_sort(vec![vec!["b2", "c2"]], &schema3) + .with_expected_sort(vec![]) + .run() + } + + #[test] + fn test_union_equivalence_properties_multi_children_5() { + let schema = create_test_schema().unwrap(); + let schema2 = append_fields(&schema, "1"); + UnionEquivalenceTest::new(&schema) + // Children 1 + .with_child_sort(vec![vec!["a", "b"], vec!["c"]], &schema) + // Children 2 + .with_child_sort(vec![vec!["a1", "b1"], vec!["c1"]], &schema2) + .with_expected_sort(vec![vec!["a", "b"], vec!["c"]]) + .run() + } + + #[test] + fn test_union_equivalence_properties_constants_1() { + let schema = create_test_schema().unwrap(); + UnionEquivalenceTest::new(&schema) + .with_child_sort_and_const_exprs( + // First child: [a ASC], const [b, c] + vec![vec!["a"]], + vec!["b", "c"], + &schema, + ) + .with_child_sort_and_const_exprs( + // Second child: [b ASC], const [a, c] + vec![vec!["b"]], + vec!["a", "c"], + &schema, + ) + .with_expected_sort_and_const_exprs( + // Union expected orderings: [[a ASC], [b ASC]], const [c] + vec![vec!["a"], vec!["b"]], + vec!["c"], + ) + .run() + } + + #[test] + fn test_union_equivalence_properties_constants_2() { + let schema = create_test_schema().unwrap(); + UnionEquivalenceTest::new(&schema) // Meet ordering between [a ASC], [a ASC, b ASC] should be [a ASC] - ( - ( - // First child orderings - vec![ - // [a ASC] - vec![(col_a, options)], - ], - // No constant - vec![], - Arc::clone(&schema), - ), - ( - // Second child orderings - vec![ - // [a ASC, b ASC] - vec![(col_a, options), (col_b, options)], - ], - // No constant - vec![], - Arc::clone(&schema), - ), - ( - // Union orderings - vec![ - // [a ASC] - vec![(col_a, options)], - ], - // No constant - vec![], - ), - ), - //-----------TEST CASE 3----------// + .with_child_sort_and_const_exprs( + // First child: [a ASC], const [] + vec![vec!["a"]], + vec![], + &schema, + ) + .with_child_sort_and_const_exprs( + // Second child: [a ASC, b ASC], const [] + vec![vec!["a", "b"]], + vec![], + &schema, + ) + .with_expected_sort_and_const_exprs( + // Union orderings: [a ASC], const [] + vec![vec!["a"]], + vec![], + ) + .run() + } + + #[test] + fn test_union_equivalence_properties_constants_3() { + let schema = create_test_schema().unwrap(); + UnionEquivalenceTest::new(&schema) // Meet ordering between [a ASC], [a DESC] should be [] - ( - ( - // First child orderings - vec![ - // [a ASC] - vec![(col_a, options)], - ], - // No constant - vec![], - Arc::clone(&schema), - ), - ( - // Second child orderings - vec![ - // [a DESC] - vec![(col_a, options_desc)], - ], - // No constant - vec![], - Arc::clone(&schema), - ), - ( - // Union doesn't have any ordering - vec![], - // No constant - vec![], - ), - ), - //-----------TEST CASE 4----------// - // Meet ordering between [a ASC], [a1 ASC, b1 ASC] should be [a ASC] - // Where a, and a1 ath the same index for their corresponding schemas. - ( - ( - // First child orderings - vec![ - // [a ASC] - vec![(col_a, options)], - ], - // No constant - vec![], - Arc::clone(&schema), - ), - ( - // Second child orderings - vec![ - // [a1 ASC, b1 ASC] - vec![(col_a1, options), (col_b1, options)], - ], - // No constant - vec![], - Arc::clone(&schema2), - ), - ( - // Union orderings - vec![ - // [a ASC] - vec![(col_a, options)], - ], - // No constant - vec![], - ), - ), - ]; + .with_child_sort_and_const_exprs( + // First child: [a ASC], const [] + vec![vec!["a"]], + vec![], + &schema, + ) + .with_child_sort_and_const_exprs( + // Second child orderings: [a DESC], const [] + vec![vec!["a DESC"]], + vec![], + &schema, + ) + .with_expected_sort_and_const_exprs( + // Union doesn't have any ordering or constant + vec![], + vec![], + ) + .run() + } - for ( - test_idx, - ( - (first_child_orderings, first_child_constants, first_schema), - (second_child_orderings, second_child_constants, second_schema), - (union_orderings, union_constants), - ), - ) in test_cases.iter().enumerate() - { - let first_orderings = first_child_orderings - .iter() - .map(|ordering| convert_to_sort_exprs(ordering)) - .collect::>(); - let first_constants = first_child_constants - .iter() - .map(|expr| ConstExpr::new(Arc::clone(expr))) - .collect::>(); - let mut lhs = EquivalenceProperties::new(Arc::clone(first_schema)); - lhs = lhs.with_constants(first_constants); - lhs.add_new_orderings(first_orderings); + #[test] + fn test_union_equivalence_properties_constants_4() { + let schema = create_test_schema().unwrap(); + let schema2 = append_fields(&schema, "1"); + UnionEquivalenceTest::new(&schema) + .with_child_sort_and_const_exprs( + // First child orderings: [a ASC], const [] + vec![vec!["a"]], + vec![], + &schema, + ) + .with_child_sort_and_const_exprs( + // Second child orderings: [a1 ASC, b1 ASC], const [] + vec![vec!["a1", "b1"]], + vec![], + &schema2, + ) + .with_expected_sort_and_const_exprs( + // Union orderings: + // should be [a ASC] + // + // Where a, and a1 ath the same index for their corresponding + // schemas. + vec![vec!["a"]], + vec![], + ) + .run() + } - let second_orderings = second_child_orderings - .iter() - .map(|ordering| convert_to_sort_exprs(ordering)) - .collect::>(); - let second_constants = second_child_constants - .iter() - .map(|expr| ConstExpr::new(Arc::clone(expr))) - .collect::>(); - let mut rhs = EquivalenceProperties::new(Arc::clone(second_schema)); - rhs = rhs.with_constants(second_constants); - rhs.add_new_orderings(second_orderings); + #[test] + #[ignore] + // ignored due to https://github.com/apache/datafusion/issues/12446 + fn test_union_equivalence_properties_constants() { + let schema = create_test_schema().unwrap(); + UnionEquivalenceTest::new(&schema) + .with_child_sort_and_const_exprs( + // First child orderings: [a ASC, c ASC], const [b] + vec![vec!["a", "c"]], + vec!["b"], + &schema, + ) + .with_child_sort_and_const_exprs( + // Second child orderings: [b ASC, c ASC], const [a] + vec![vec!["b", "c"]], + vec!["a"], + &schema, + ) + .with_expected_sort_and_const_exprs( + // Union orderings: [ + // [a ASC, b ASC, c ASC], + // [b ASC, a ASC, c ASC] + // ], const [] + vec![vec!["a", "b", "c"], vec!["b", "a", "c"]], + vec![], + ) + .run() + } + + #[test] + #[ignore] + // ignored due to https://github.com/apache/datafusion/issues/12446 + fn test_union_equivalence_properties_constants_desc() { + let schema = create_test_schema().unwrap(); + UnionEquivalenceTest::new(&schema) + .with_child_sort_and_const_exprs( + // NB `b DESC` in the second child + // First child orderings: [a ASC, c ASC], const [b] + vec![vec!["a", "c"]], + vec!["b"], + &schema, + ) + .with_child_sort_and_const_exprs( + // Second child orderings: [b ASC, c ASC], const [a] + vec![vec!["b DESC", "c"]], + vec!["a"], + &schema, + ) + .with_expected_sort_and_const_exprs( + // Union orderings: [ + // [a ASC, b ASC, c ASC], + // [b ASC, a ASC, c ASC] + // ], const [] + vec![vec!["a", "b DESC", "c"], vec!["b DESC", "a", "c"]], + vec![], + ) + .run() + } + + #[test] + #[ignore] + // ignored due to https://github.com/apache/datafusion/issues/12446 + fn test_union_equivalence_properties_constants_middle() { + let schema = create_test_schema().unwrap(); + UnionEquivalenceTest::new(&schema) + .with_child_sort_and_const_exprs( + // First child: [a ASC, b ASC, d ASC], const [c] + vec![vec!["a", "b", "d"]], + vec!["c"], + &schema, + ) + .with_child_sort_and_const_exprs( + // Second child: [a ASC, c ASC, d ASC], const [b] + vec![vec!["a", "c", "d"]], + vec!["b"], + &schema, + ) + .with_expected_sort_and_const_exprs( + // Union orderings: + // [a, b, d] (c constant) + // [a, c, d] (b constant) + vec![vec!["a", "c", "b", "d"], vec!["a", "b", "c", "d"]], + vec![], + ) + .run() + } + + #[test] + #[ignore] + // ignored due to https://github.com/apache/datafusion/issues/12446 + fn test_union_equivalence_properties_constants_middle_desc() { + let schema = create_test_schema().unwrap(); + UnionEquivalenceTest::new(&schema) + .with_child_sort_and_const_exprs( + // NB `b DESC` in the first child + // + // First child: [a ASC, b DESC, d ASC], const [c] + vec![vec!["a", "b DESC", "d"]], + vec!["c"], + &schema, + ) + .with_child_sort_and_const_exprs( + // Second child: [a ASC, c ASC, d ASC], const [b] + vec![vec!["a", "c", "d"]], + vec!["b"], + &schema, + ) + .with_expected_sort_and_const_exprs( + // Union orderings: + // [a, b, d] (c constant) + // [a, c, d] (b constant) + vec![vec!["a", "c", "b DESC", "d"], vec!["a", "b DESC", "c", "d"]], + vec![], + ) + .run() + } + + // TODO tests with multiple constants - let union_expected_orderings = union_orderings + #[derive(Debug)] + struct UnionEquivalenceTest { + /// The schema of the output of the Union + output_schema: SchemaRef, + /// The equivalence properties of each child to the union + child_properties: Vec, + /// The expected output properties of the union. Must be set before + /// running `build` + expected_properties: Option, + } + + impl UnionEquivalenceTest { + fn new(output_schema: &SchemaRef) -> Self { + Self { + output_schema: Arc::clone(output_schema), + child_properties: vec![], + expected_properties: None, + } + } + + /// Add a union input with the specified orderings + /// + /// See [`Self::make_props`] for the format of the strings in `orderings` + fn with_child_sort( + mut self, + orderings: Vec>, + schema: &SchemaRef, + ) -> Self { + let properties = self.make_props(orderings, vec![], schema); + self.child_properties.push(properties); + self + } + + /// Add a union input with the specified orderings and constant + /// equivalences + /// + /// See [`Self::make_props`] for the format of the strings in + /// `orderings` and `constants` + fn with_child_sort_and_const_exprs( + mut self, + orderings: Vec>, + constants: Vec<&'static str>, + schema: &SchemaRef, + ) -> Self { + let properties = self.make_props(orderings, constants, schema); + self.child_properties.push(properties); + self + } + + /// Set the expected output sort order for the union of the children + /// + /// See [`Self::make_props`] for the format of the strings in `orderings` + fn with_expected_sort(mut self, orderings: Vec>) -> Self { + let properties = self.make_props(orderings, vec![], &self.output_schema); + self.expected_properties = Some(properties); + self + } + + /// Set the expected output sort order and constant expressions for the + /// union of the children + /// + /// See [`Self::make_props`] for the format of the strings in + /// `orderings` and `constants`. + fn with_expected_sort_and_const_exprs( + mut self, + orderings: Vec>, + constants: Vec<&'static str>, + ) -> Self { + let properties = self.make_props(orderings, constants, &self.output_schema); + self.expected_properties = Some(properties); + self + } + + /// compute the union's output equivalence properties from the child + /// properties, and compare them to the expected properties + fn run(self) { + let Self { + output_schema, + child_properties, + expected_properties, + } = self; + let expected_properties = + expected_properties.expect("expected_properties not set"); + let actual_properties = + calculate_union(child_properties, Arc::clone(&output_schema)) + .expect("failed to calculate union equivalence properties"); + assert_eq_properties_same( + &actual_properties, + &expected_properties, + format!( + "expected: {expected_properties:?}\nactual: {actual_properties:?}" + ), + ); + } + + /// Make equivalence properties for the specified columns named in orderings and constants + /// + /// orderings: strings formatted like `"a"` or `"a DESC"`. See [`parse_sort_expr`] + /// constants: strings formatted like `"a"`. + fn make_props( + &self, + orderings: Vec>, + constants: Vec<&'static str>, + schema: &SchemaRef, + ) -> EquivalenceProperties { + let orderings = orderings .iter() - .map(|ordering| convert_to_sort_exprs(ordering)) + .map(|ordering| { + ordering + .iter() + .map(|name| parse_sort_expr(name, schema)) + .collect::>() + }) .collect::>(); - let union_constants = union_constants + + let constants = constants .iter() - .map(|expr| ConstExpr::new(Arc::clone(expr))) + .map(|col_name| ConstExpr::new(col(col_name, schema).unwrap())) .collect::>(); - let mut union_expected_eq = EquivalenceProperties::new(Arc::clone(&schema)); - union_expected_eq = union_expected_eq.with_constants(union_constants); - union_expected_eq.add_new_orderings(union_expected_orderings); - let actual_union_eq = calculate_union_binary(lhs, rhs)?; - let err_msg = format!( - "Error in test id: {:?}, test case: {:?}", - test_idx, test_cases[test_idx] - ); - assert_eq_properties_same(&actual_union_eq, &union_expected_eq, err_msg); + EquivalenceProperties::new_with_orderings(Arc::clone(schema), &orderings) + .with_constants(constants) } - Ok(()) } fn assert_eq_properties_same( @@ -3091,21 +3140,63 @@ mod tests { // Check whether constants are same let lhs_constants = lhs.constants(); let rhs_constants = rhs.constants(); - assert_eq!(lhs_constants.len(), rhs_constants.len(), "{}", err_msg); for rhs_constant in rhs_constants { assert!( const_exprs_contains(lhs_constants, rhs_constant.expr()), - "{}", - err_msg + "{err_msg}\nlhs: {lhs}\nrhs: {rhs}" ); } + assert_eq!( + lhs_constants.len(), + rhs_constants.len(), + "{err_msg}\nlhs: {lhs}\nrhs: {rhs}" + ); // Check whether orderings are same. let lhs_orderings = lhs.oeq_class(); let rhs_orderings = &rhs.oeq_class.orderings; - assert_eq!(lhs_orderings.len(), rhs_orderings.len(), "{}", err_msg); for rhs_ordering in rhs_orderings { - assert!(lhs_orderings.contains(rhs_ordering), "{}", err_msg); + assert!( + lhs_orderings.contains(rhs_ordering), + "{err_msg}\nlhs: {lhs}\nrhs: {rhs}" + ); } + assert_eq!( + lhs_orderings.len(), + rhs_orderings.len(), + "{err_msg}\nlhs: {lhs}\nrhs: {rhs}" + ); + } + + /// Converts a string to a physical sort expression + /// + /// # Example + /// * `"a"` -> (`"a"`, `SortOptions::default()`) + /// * `"a ASC"` -> (`"a"`, `SortOptions { descending: false, nulls_first: false }`) + fn parse_sort_expr(name: &str, schema: &SchemaRef) -> PhysicalSortExpr { + let mut parts = name.split_whitespace(); + let name = parts.next().expect("empty sort expression"); + let mut sort_expr = PhysicalSortExpr::new( + col(name, schema).expect("invalid column name"), + SortOptions::default(), + ); + + if let Some(options) = parts.next() { + sort_expr = match options { + "ASC" => sort_expr.asc(), + "DESC" => sort_expr.desc(), + _ => panic!( + "unknown sort options. Expected 'ASC' or 'DESC', got {}", + options + ), + } + } + + assert!( + parts.next().is_none(), + "unexpected tokens in column name. Expected 'name' / 'name ASC' / 'name DESC' but got '{name}'" + ); + + sort_expr } } From ff919224b1de8ac893f07810d3b1b82f068d4e3c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 1 Oct 2024 11:22:04 -0400 Subject: [PATCH 2/4] update doc test --- datafusion/physical-expr/src/equivalence/properties.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs index 367429778bca..f9cc9ff42ab2 100644 --- a/datafusion/physical-expr/src/equivalence/properties.rs +++ b/datafusion/physical-expr/src/equivalence/properties.rs @@ -118,7 +118,7 @@ use itertools::Itertools; /// PhysicalSortExpr::new_default(col_c).desc(), /// ]); /// -/// assert_eq!(eq_properties.to_string(), "order: [a@0 ASC,c@2 DESC], const: [b@1]") +/// assert_eq!(eq_properties.to_string(), "order: [[a@0 ASC,c@2 DESC]], const: [b@1]") /// ``` #[derive(Debug, Clone)] pub struct EquivalenceProperties { From 5affd7c341124436252d45a3f840b67234f6583a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 2 Oct 2024 07:40:30 -0400 Subject: [PATCH 3/4] Undo import reordering --- datafusion/physical-expr/src/equivalence/ordering.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs index 2b3b430b2fbd..bb3e9218bc41 100644 --- a/datafusion/physical-expr/src/equivalence/ordering.rs +++ b/datafusion/physical-expr/src/equivalence/ordering.rs @@ -15,14 +15,15 @@ // specific language governing permissions and limitations // under the License. -use crate::equivalence::add_offset_to_expr; -use crate::{LexOrdering, PhysicalExpr, PhysicalSortExpr}; -use arrow_schema::SortOptions; use std::fmt::Display; use std::hash::Hash; use std::sync::Arc; use std::vec::IntoIter; +use crate::equivalence::add_offset_to_expr; +use crate::{LexOrdering, PhysicalExpr, PhysicalSortExpr}; +use arrow_schema::SortOptions; + /// An `OrderingEquivalenceClass` object keeps track of different alternative /// orderings than can describe a schema. For example, consider the following table: /// From 0037100e8ef1d2c8929096373eab76e25073bb94 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 2 Oct 2024 08:04:12 -0400 Subject: [PATCH 4/4] remove unecessary static lifetimes --- .../physical-expr/src/equivalence/properties.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs index f9cc9ff42ab2..8137b4f9da13 100644 --- a/datafusion/physical-expr/src/equivalence/properties.rs +++ b/datafusion/physical-expr/src/equivalence/properties.rs @@ -3032,7 +3032,7 @@ mod tests { /// See [`Self::make_props`] for the format of the strings in `orderings` fn with_child_sort( mut self, - orderings: Vec>, + orderings: Vec>, schema: &SchemaRef, ) -> Self { let properties = self.make_props(orderings, vec![], schema); @@ -3047,8 +3047,8 @@ mod tests { /// `orderings` and `constants` fn with_child_sort_and_const_exprs( mut self, - orderings: Vec>, - constants: Vec<&'static str>, + orderings: Vec>, + constants: Vec<&str>, schema: &SchemaRef, ) -> Self { let properties = self.make_props(orderings, constants, schema); @@ -3059,7 +3059,7 @@ mod tests { /// Set the expected output sort order for the union of the children /// /// See [`Self::make_props`] for the format of the strings in `orderings` - fn with_expected_sort(mut self, orderings: Vec>) -> Self { + fn with_expected_sort(mut self, orderings: Vec>) -> Self { let properties = self.make_props(orderings, vec![], &self.output_schema); self.expected_properties = Some(properties); self @@ -3072,8 +3072,8 @@ mod tests { /// `orderings` and `constants`. fn with_expected_sort_and_const_exprs( mut self, - orderings: Vec>, - constants: Vec<&'static str>, + orderings: Vec>, + constants: Vec<&str>, ) -> Self { let properties = self.make_props(orderings, constants, &self.output_schema); self.expected_properties = Some(properties); @@ -3108,8 +3108,8 @@ mod tests { /// constants: strings formatted like `"a"`. fn make_props( &self, - orderings: Vec>, - constants: Vec<&'static str>, + orderings: Vec>, + constants: Vec<&str>, schema: &SchemaRef, ) -> EquivalenceProperties { let orderings = orderings