Skip to content

Commit

Permalink
Support tuples as types (#11896)
Browse files Browse the repository at this point in the history
* support tuples as types

* use compare_op_for_nested

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
samuelcolvin and alamb authored Aug 12, 2024
1 parent ffdc61d commit 140f7ce
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 8 deletions.
28 changes: 27 additions & 1 deletion datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::operator::Operator;
use arrow::array::{new_empty_array, Array};
use arrow::compute::can_cast_types;
use arrow::datatypes::{
DataType, Field, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
DataType, Field, FieldRef, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
};
use datafusion_common::{exec_datafusion_err, plan_datafusion_err, plan_err, Result};
Expand Down Expand Up @@ -498,6 +498,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
.or_else(|| string_numeric_coercion(lhs_type, rhs_type))
.or_else(|| string_temporal_coercion(lhs_type, rhs_type))
.or_else(|| binary_coercion(lhs_type, rhs_type))
.or_else(|| struct_coercion(lhs_type, rhs_type))
}

/// Coerce `lhs_type` and `rhs_type` to a common type for value exprs
Expand Down Expand Up @@ -780,6 +781,31 @@ fn coerce_numeric_type_to_decimal256(numeric_type: &DataType) -> Option<DataType
}
}

fn struct_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
(Struct(lhs_fields), Struct(rhs_fields)) => {
if lhs_fields.len() != rhs_fields.len() {
return None;
}

let types = std::iter::zip(lhs_fields.iter(), rhs_fields.iter())
.map(|(lhs, rhs)| comparison_coercion(lhs.data_type(), rhs.data_type()))
.collect::<Option<Vec<DataType>>>()?;

let fields = types
.into_iter()
.enumerate()
.map(|(i, datatype)| {
Arc::new(Field::new(format!("c{i}"), datatype, true))
})
.collect::<Vec<FieldRef>>();
Some(Struct(fields.into()))
}
_ => None,
}
}

/// Returns the output type of applying mathematics operations such as
/// `+` to arguments of `lhs_type` and `rhs_type`.
fn mathematics_numerical_coercion(
Expand Down
10 changes: 7 additions & 3 deletions datafusion/physical-expr/src/expressions/in_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ use crate::PhysicalExpr;
use arrow::array::*;
use arrow::buffer::BooleanBuffer;
use arrow::compute::kernels::boolean::{not, or_kleene};
use arrow::compute::kernels::cmp::eq;
use arrow::compute::take;
use arrow::datatypes::*;
use arrow::util::bit_iterator::BitIndexIterator;
Expand All @@ -41,7 +40,8 @@ use datafusion_common::hash_utils::HashValue;
use datafusion_common::{
exec_err, internal_err, not_impl_err, DFSchema, Result, ScalarValue,
};
use datafusion_expr::ColumnarValue;
use datafusion_expr::{ColumnarValue, Operator};
use datafusion_physical_expr_common::datum::compare_op_for_nested;

use ahash::RandomState;
use hashbrown::hash_map::RawEntryMut;
Expand Down Expand Up @@ -361,7 +361,11 @@ impl PhysicalExpr for InListExpr {
|result, expr| -> Result<BooleanArray> {
Ok(or_kleene(
&result,
&eq(&value, &expr?.into_array(num_rows)?)?,
&compare_op_for_nested(
Operator::Eq,
&value,
&expr?.into_array(num_rows)?,
)?,
)?)
},
)?;
Expand Down
20 changes: 19 additions & 1 deletion datafusion/sql/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}
not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}")
}
SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values),
_ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
}
}
Expand All @@ -670,7 +671,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
&self,
schema: &DFSchema,
planner_context: &mut PlannerContext,
values: Vec<sqlparser::ast::Expr>,
values: Vec<SQLExpr>,
fields: Vec<StructField>,
) -> Result<Expr> {
if !fields.is_empty() {
Expand All @@ -695,6 +696,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
not_impl_err!("Struct not supported by ExprPlanner: {create_struct_args:?}")
}

fn parse_tuple(
&self,
schema: &DFSchema,
planner_context: &mut PlannerContext,
values: Vec<SQLExpr>,
) -> Result<Expr> {
match values.first() {
Some(SQLExpr::Identifier(_)) | Some(SQLExpr::Value(_)) => {
self.parse_struct(schema, planner_context, values, vec![])
}
None => not_impl_err!("Empty tuple not supported yet"),
_ => {
not_impl_err!("Only identifiers and literals are supported in tuples")
}
}
}

fn sql_position_to_expr(
&self,
substr_expr: SQLExpr,
Expand Down
44 changes: 41 additions & 3 deletions datafusion/sqllogictest/test_files/struct.slt
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,6 @@ select named_struct('field_a', 1, 'field_b', 2);
----
{field_a: 1, field_b: 2}

statement ok
drop table values;

query T
select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3));
----
Expand All @@ -236,3 +233,44 @@ query ?
select {'animal': {'cat': 1, 'dog': 2, 'bird': {'parrot': 3, 'canary': 1}}, 'genre': {'fiction': ['mystery', 'sci-fi', 'fantasy'], 'non-fiction': {'biography': 5, 'history': 7, 'science': {'physics': 2, 'biology': 3}}}, 'vehicle': {'car': {'sedan': 4, 'suv': 2}, 'bicycle': 3, 'boat': ['sailboat', 'motorboat']}, 'weather': {'sunny': True, 'temperature': 25.5, 'wind': {'speed': 10, 'direction': 'NW'}}};
----
{animal: {cat: 1, dog: 2, bird: {parrot: 3, canary: 1}}, genre: {fiction: [mystery, sci-fi, fantasy], non-fiction: {biography: 5, history: 7, science: {physics: 2, biology: 3}}}, vehicle: {car: {sedan: 4, suv: 2}, bicycle: 3, boat: [sailboat, motorboat]}, weather: {sunny: true, temperature: 25.5, wind: {speed: 10, direction: NW}}}

# test tuple as struct
query B
select ('x', 'y') = ('x', 'y');
----
true

query B
select ('x', 'y') = ('y', 'x');
----
false

query error DataFusion error: Error during planning: Cannot infer common argument type for comparison operation Struct.*
select ('x', 'y') = ('x', 'y', 'z');

query B
select ('x', 'y') IN (('x', 'y'));
----
true

query B
select ('x', 'y') IN (('x', 'y'), ('y', 'x'));
----
true

query I
select a from values where (a, c) = (1, 'a');
----
1

query I
select a from values where (a, c) IN ((1, 'a'), (2, 'b'));
----
1
2

statement ok
drop table values;

statement ok
drop table struct_values;

0 comments on commit 140f7ce

Please sign in to comment.