-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support tuples as types #11896
Support tuples as types #11896
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,7 +25,7 @@ use crate::Operator; | |
use arrow::array::{new_empty_array, Array}; | ||
use arrow::compute::can_cast_types; | ||
use arrow::datatypes::{ | ||
DataType, Field, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, | ||
DataType, Field, FieldRef, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, | ||
DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, | ||
}; | ||
use datafusion_common::{exec_datafusion_err, plan_datafusion_err, plan_err, Result}; | ||
|
@@ -498,6 +498,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D | |
.or_else(|| string_numeric_coercion(lhs_type, rhs_type)) | ||
.or_else(|| string_temporal_coercion(lhs_type, rhs_type)) | ||
.or_else(|| binary_coercion(lhs_type, rhs_type)) | ||
.or_else(|| struct_coercion(lhs_type, rhs_type)) | ||
} | ||
|
||
/// Coerce `lhs_type` and `rhs_type` to a common type for value exprs | ||
|
@@ -780,6 +781,31 @@ fn coerce_numeric_type_to_decimal256(numeric_type: &DataType) -> Option<DataType | |
} | ||
} | ||
|
||
fn struct_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> { | ||
use arrow::datatypes::DataType::*; | ||
match (lhs_type, rhs_type) { | ||
(Struct(lhs_fields), Struct(rhs_fields)) => { | ||
if lhs_fields.len() != rhs_fields.len() { | ||
return None; | ||
} | ||
|
||
let types = std::iter::zip(lhs_fields.iter(), rhs_fields.iter()) | ||
.map(|(lhs, rhs)| comparison_coercion(lhs.data_type(), rhs.data_type())) | ||
.collect::<Option<Vec<DataType>>>()?; | ||
|
||
let fields = types | ||
.into_iter() | ||
.enumerate() | ||
.map(|(i, datatype)| { | ||
Arc::new(Field::new(format!("c{i}"), datatype, true)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should preserve field name and nullable, only type is updated |
||
}) | ||
.collect::<Vec<FieldRef>>(); | ||
Some(Struct(fields.into())) | ||
} | ||
_ => None, | ||
} | ||
} | ||
|
||
/// Returns the output type of applying mathematics operations such as | ||
/// `+` to arguments of `lhs_type` and `rhs_type`. | ||
fn mathematics_numerical_coercion( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -661,6 +661,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | |
} | ||
not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}") | ||
} | ||
SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values), | ||
_ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"), | ||
} | ||
} | ||
|
@@ -670,7 +671,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | |
&self, | ||
schema: &DFSchema, | ||
planner_context: &mut PlannerContext, | ||
values: Vec<sqlparser::ast::Expr>, | ||
values: Vec<SQLExpr>, | ||
fields: Vec<StructField>, | ||
) -> Result<Expr> { | ||
if !fields.is_empty() { | ||
|
@@ -695,6 +696,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | |
not_impl_err!("Struct not supported by ExprPlanner: {create_struct_args:?}") | ||
} | ||
|
||
fn parse_tuple( | ||
&self, | ||
schema: &DFSchema, | ||
planner_context: &mut PlannerContext, | ||
values: Vec<SQLExpr>, | ||
) -> Result<Expr> { | ||
match values.first() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check all |
||
Some(SQLExpr::Identifier(_)) | Some(SQLExpr::Value(_)) => { | ||
self.parse_struct(schema, planner_context, values, vec![]) | ||
} | ||
None => not_impl_err!("Empty tuple not supported yet"), | ||
_ => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was trying to figure out how to write a negative test for this case, however anything I could try seems to work DataFusion CLI v41.0.0
> select * from foo where (column1) IN ((column1+1), (2));
+---------+
| column1 |
+---------+
+---------+
0 row(s) fetched.
Elapsed 0.008 seconds.
> select * from foo where (column1) IN ((column1+1), (2), (1));
+---------+
| column1 |
+---------+
| 1 |
+---------+
1 row(s) fetched.
Elapsed 0.010 seconds. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. query error DataFusion error: This feature is not implemented: Only identifiers and literals are supported in tuples
select a from values where (a, c) IN ((abs(a), 1)); Query success in duckdb
We could support There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will file a ticket to track the feature There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I am not sure about whether our evaulation support struct array like |
||
not_impl_err!("Only identifiers and literals are supported in tuples") | ||
} | ||
} | ||
} | ||
|
||
fn sql_position_to_expr( | ||
&self, | ||
substr_expr: SQLExpr, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -218,9 +218,6 @@ select named_struct('field_a', 1, 'field_b', 2); | |
---- | ||
{field_a: 1, field_b: 2} | ||
|
||
statement ok | ||
drop table values; | ||
|
||
query T | ||
select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3)); | ||
---- | ||
|
@@ -236,3 +233,44 @@ query ? | |
select {'animal': {'cat': 1, 'dog': 2, 'bird': {'parrot': 3, 'canary': 1}}, 'genre': {'fiction': ['mystery', 'sci-fi', 'fantasy'], 'non-fiction': {'biography': 5, 'history': 7, 'science': {'physics': 2, 'biology': 3}}}, 'vehicle': {'car': {'sedan': 4, 'suv': 2}, 'bicycle': 3, 'boat': ['sailboat', 'motorboat']}, 'weather': {'sunny': True, 'temperature': 25.5, 'wind': {'speed': 10, 'direction': 'NW'}}}; | ||
---- | ||
{animal: {cat: 1, dog: 2, bird: {parrot: 3, canary: 1}}, genre: {fiction: [mystery, sci-fi, fantasy], non-fiction: {biography: 5, history: 7, science: {physics: 2, biology: 3}}}, vehicle: {car: {sedan: 4, suv: 2}, bicycle: 3, boat: [sailboat, motorboat]}, weather: {sunny: true, temperature: 25.5, wind: {speed: 10, direction: NW}}} | ||
|
||
# test tuple as struct | ||
query B | ||
select ('x', 'y') = ('x', 'y'); | ||
---- | ||
true | ||
|
||
query B | ||
select ('x', 'y') = ('y', 'x'); | ||
---- | ||
false | ||
|
||
query error DataFusion error: Error during planning: Cannot infer common argument type for comparison operation Struct.* | ||
select ('x', 'y') = ('x', 'y', 'z'); | ||
|
||
query B | ||
select ('x', 'y') IN (('x', 'y')); | ||
---- | ||
true | ||
|
||
query B | ||
select ('x', 'y') IN (('x', 'y'), ('y', 'x')); | ||
samuelcolvin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
---- | ||
true | ||
|
||
query I | ||
select a from values where (a, c) = (1, 'a'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I recommend we also do a negative test, like
|
||
---- | ||
1 | ||
|
||
query I | ||
select a from values where (a, c) IN ((1, 'a'), (2, 'b')); | ||
samuelcolvin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
---- | ||
1 | ||
2 | ||
|
||
statement ok | ||
drop table values; | ||
|
||
statement ok | ||
drop table struct_values; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please document what the intended rules for coercion are, in English, as a comment? That would help me understand if the code implements the intent correctly
I think the result is if all the types can be cocerced, then the output is a struct with fields named
c1
,c2
, etcI wonder if it would be better / easier to understand if the names of the fields were preserved (aka the field names from lhs_fields)