Skip to content

Commit

Permalink
Fix left semi project join when all build side rows have null join ke…
Browse files Browse the repository at this point in the history
…ys (#3635)

Summary:
When build side is not empty, but all its rows have null join keys, the result
of the semi project join should be NULL, not FALSE.

Fixes #3626.

Pull Request resolved: #3635

Reviewed By: xiaoxmeng

Differential Revision: D42344123

Pulled By: mbasmanova

fbshipit-source-id: 41dc97ba3fb5b0167f6b8e650a1c183891719ccd
  • Loading branch information
mbasmanova authored and facebook-github-bot committed Jan 4, 2023
1 parent a37e20c commit 2672006
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 3 deletions.
9 changes: 6 additions & 3 deletions velox/exec/HashProbe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -642,9 +642,12 @@ void HashProbe::prepareOutput(vector_size_t size) {

void HashProbe::fillLeftSemiProjectMatchColumn(vector_size_t size) {
if (emptyBuildSide()) {
// Build side is empty. All rows should return 'match = false', even ones
// with a null join key.
matchColumn() = BaseVector::createConstant(false, size, pool());
// Build side is empty or all rows have null join keys.
if (nullAware_ && buildSideHasNullKeys_) {
matchColumn() = BaseVector::createNullConstant(BOOLEAN(), size, pool());
} else {
matchColumn() = BaseVector::createConstant(false, size, pool());
}
} else {
auto flatMatch = matchColumn()->as<FlatVector<bool>>();
flatMatch->resize(size);
Expand Down
33 changes: 33 additions & 0 deletions velox/exec/tests/HashJoinTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3089,6 +3089,39 @@ TEST_F(HashJoinTest, semiProjectWithNullKeys) {
.referenceQuery(
"SELECT t0, t1, t0 IN (SELECT u0 FROM u WHERE u0 < 0) FROM t")
.run();

// Build side with all rows having null join keys.
plan = makePlan(false /*nullAware*/, "", "u0 IS NULL");

HashJoinBuilder(*pool_, duckDbQueryRunner_, executor_.get())
.planNode(plan)
.checkSpillStats(false)
.referenceQuery(
"SELECT t0, t1, EXISTS (SELECT * FROM u WHERE u0 = t0 AND u0 IS NULL) FROM t")
.run();

HashJoinBuilder(*pool_, duckDbQueryRunner_, executor_.get())
.planNode(flipJoinSides(plan))
.checkSpillStats(false)
.referenceQuery(
"SELECT t0, t1, EXISTS (SELECT * FROM u WHERE u0 = t0 AND u0 IS NULL) FROM t")
.run();

plan = makePlan(true /*nullAware*/, "", "u0 IS NULL");

HashJoinBuilder(*pool_, duckDbQueryRunner_, executor_.get())
.planNode(plan)
.checkSpillStats(false)
.referenceQuery(
"SELECT t0, t1, t0 IN (SELECT u0 FROM u WHERE u0 IS NULL) FROM t")
.run();

HashJoinBuilder(*pool_, duckDbQueryRunner_, executor_.get())
.planNode(flipJoinSides(plan))
.checkSpillStats(false)
.referenceQuery(
"SELECT t0, t1, t0 IN (SELECT u0 FROM u WHERE u0 IS NULL) FROM t")
.run();
}

TEST_F(HashJoinTest, semiProjectOverLazyVectors) {
Expand Down

0 comments on commit 2672006

Please sign in to comment.