Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve empty relation opt for join types #11066

Merged
merged 7 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 52 additions & 5 deletions datafusion/optimizer/src/propagate_empty_relation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,20 @@ impl OptimizerRule for PropagateEmptyRelation {

LogicalPlan::Join(ref join) => {
// TODO: For Join, more join type need to be careful:
// For LeftAnti Join, if the right side is empty, the Join result is left side(should exclude null ??).
// For RightAnti Join, if the left side is empty, the Join result is right side(should exclude null ??).
// For Full Join, only both sides are empty, the Join result is empty.
// For LeftOut/Full Join, if the right side is empty, the Join can be eliminated with a Projection with left side
// columns + right side columns replaced with null values.
// For RightOut/Full Join, if the left side is empty, the Join can be eliminated with a Projection with right side
// columns + left side columns replaced with null values.
let (left_empty, right_empty) = binary_plan_children_is_empty(&plan)?;

match join.join_type {
// For Full Join, only both sides are empty, the Join result is empty.
JoinType::Full if left_empty && right_empty => Ok(Transformed::yes(
LogicalPlan::EmptyRelation(EmptyRelation {
produce_one_row: false,
schema: join.schema.clone(),
}),
)),
JoinType::Inner if left_empty || right_empty => Ok(Transformed::yes(
LogicalPlan::EmptyRelation(EmptyRelation {
produce_one_row: false,
Expand Down Expand Up @@ -134,13 +138,19 @@ impl OptimizerRule for PropagateEmptyRelation {
schema: join.schema.clone(),
}),
)),
JoinType::LeftAnti if right_empty => {
Copy link
Contributor

@alamb alamb Jun 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I double checked that an ANTI join (e.g. a NOT IN) is true when there are no tuples (aka the right side is emprt)

postgres=# select * from foo;
  x
-----
   1
   2
 NaN
(3 rows)

postgres=# select * from bar;
 y
---
(0 rows)

postgres=# select x NOT IN (SELECT y from bar) from foo;
 ?column?
----------
 t
 t
 t
(3 rows)

postgres=# select x from foo where NOT EXISTS (SELECT y from bar);
  x
-----
   1
   2
 NaN
(3 rows)

Thus I think this is a valid rewrite

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, thanks for the double check!

Ok(Transformed::yes((*join.left).clone()))
}
JoinType::RightAnti if left_empty => {
Ok(Transformed::yes((*join.right).clone()))
}
JoinType::RightAnti if right_empty => Ok(Transformed::yes(
LogicalPlan::EmptyRelation(EmptyRelation {
produce_one_row: false,
schema: join.schema.clone(),
}),
)),
_ => Ok(Transformed::no(LogicalPlan::Join(join.clone()))),
_ => Ok(Transformed::no(plan)),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

}
}
LogicalPlan::Aggregate(ref agg) => {
Expand Down Expand Up @@ -467,8 +477,39 @@ mod tests {
assert_together_optimized_plan(plan, expected, eq)
}

// TODO: fix this long name
fn assert_anti_join_empty_join_table_is_base_table(
anti_left_join: bool,
) -> Result<()> {
// if we have an anti join with an empty join table, then the result is the base_table
let (left, right, join_type, expected) = if anti_left_join {
let left = test_table_scan()?;
let right = LogicalPlanBuilder::from(test_table_scan()?)
.filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
.build()?;
let expected = left.display_indent().to_string();
(left, right, JoinType::LeftAnti, expected)
} else {
let right = test_table_scan()?;
let left = LogicalPlanBuilder::from(test_table_scan()?)
.filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
.build()?;
let expected = right.display_indent().to_string();
(left, right, JoinType::RightAnti, expected)
};

let plan = LogicalPlanBuilder::from(left)
.join_using(right, join_type, vec![Column::from_name("a".to_string())])?
.build()?;

assert_together_optimized_plan(plan, &expected, true)
}

#[test]
fn test_join_empty_propagation_rules() -> Result<()> {
// test full join with empty left and empty right
assert_empty_left_empty_right_lp(true, true, JoinType::Full, true)?;

// test left join with empty left
assert_empty_left_empty_right_lp(true, false, JoinType::Left, true)?;

Expand All @@ -491,7 +532,13 @@ mod tests {
assert_empty_left_empty_right_lp(true, false, JoinType::LeftAnti, true)?;

// test right anti join empty right
assert_empty_left_empty_right_lp(false, true, JoinType::RightAnti, true)
assert_empty_left_empty_right_lp(false, true, JoinType::RightAnti, true)?;

// test left anti join empty right
assert_anti_join_empty_join_table_is_base_table(true)?;

// test right anti join empty left
assert_anti_join_empty_join_table_is_base_table(false)
}

#[test]
Expand Down
30 changes: 30 additions & 0 deletions datafusion/sqllogictest/test_files/joins.slt
Original file line number Diff line number Diff line change
Expand Up @@ -3781,3 +3781,33 @@ EXPLAIN SELECT * FROM (
) AS a RIGHT ANTI JOIN (SELECT 1 AS a WHERE 1=0) AS b ON a.a=b.a;
----
logical_plan EmptyRelation

# FULL OUTER join with empty left and empty right table
query TT
EXPLAIN SELECT * FROM (
SELECT 1 as a WHERE 1=0
) AS a FULL JOIN (SELECT 1 AS a WHERE 1=0) AS b ON a.a=b.a;
----
logical_plan EmptyRelation

# Left ANTI join with empty right table
query TT
EXPLAIN SELECT * FROM (
SELECT 1 as a
) AS a LEFT ANTI JOIN (SELECT 1 AS a WHERE 1=0) as b ON a.a=b.a;
----
logical_plan
01)SubqueryAlias: a
02)--Projection: Int64(1) AS a
03)----EmptyRelation

# Right ANTI join with empty left table
query TT
EXPLAIN SELECT * FROM (
SELECT 1 as a WHERE 1=0
) AS a RIGHT ANTI JOIN (SELECT 1 AS a) as b ON a.a=b.a;
----
logical_plan
01)SubqueryAlias: b
02)--Projection: Int64(1) AS a
03)----EmptyRelation
5 changes: 1 addition & 4 deletions datafusion/sqllogictest/test_files/subquery.slt
Original file line number Diff line number Diff line change
Expand Up @@ -638,10 +638,7 @@ SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id
query TT
explain SELECT t1_id, t1_name FROM t1 WHERE NOT EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id limit 0)
----
logical_plan
01)LeftAnti Join: t1.t1_id = __correlated_sq_1.t2_id
02)--TableScan: t1 projection=[t1_id, t1_name]
03)--EmptyRelation
logical_plan TableScan: t1 projection=[t1_id, t1_name]

query IT rowsort
SELECT t1_id, t1_name FROM t1 WHERE NOT EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id limit 0)
Expand Down