Skip to content

Commit

Permalink
grace join: fix FROM ANY vs bloomfilter (#7384)
Browse files Browse the repository at this point in the history
  • Loading branch information
yumkam authored Aug 2, 2024
1 parent 6416406 commit 153df90
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 10 deletions.
2 changes: 0 additions & 2 deletions ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,6 @@ class TGraceJoinSpillingSupportState : public TComputationValue<TGraceJoinSpilli
, PartialJoinCompleted(std::make_unique<bool>(false))
, HaveMoreLeftRows(std::make_unique<bool>(true))
, HaveMoreRightRows(std::make_unique<bool>(true))
, JoinedTuple(std::make_unique<std::vector<NUdf::TUnboxedValue*>>() )
, IsSelfJoin_(isSelfJoin)
, SelfJoinSameKeys_(isSelfJoin && (leftKeyColumns == rightKeyColumns))
, IsSpillingAllowed(isSpillingAllowed)
Expand Down Expand Up @@ -989,7 +988,6 @@ EFetchResult ProcessSpilledData(TComputationContext&, NUdf::TUnboxedValue*const*
const std::unique_ptr<bool> PartialJoinCompleted;
const std::unique_ptr<bool> HaveMoreLeftRows;
const std::unique_ptr<bool> HaveMoreRightRows;
const std::unique_ptr<std::vector<NUdf::TUnboxedValue*>> JoinedTuple;
const bool IsSelfJoin_;
const bool SelfJoinSameKeys_;
const bool IsSpillingAllowed;
Expand Down
21 changes: 13 additions & 8 deletions ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,13 @@ TTable::EAddTupleResult TTable::AddTuple( ui64 * intColumns, char ** stringColu
}
}

TempTuple[0] &= ui64(0x1); // Setting only nulls in key bit, all other bits are ignored for key hash
for (ui32 i = 1; i < NullsBitmapSize_; i ++) {
TempTuple[i] = 0;
}

XXH64_hash_t hash = XXH64(TempTuple.data() + NullsBitmapSize_, (TempTuple.size() - NullsBitmapSize_) * sizeof(ui64), 0);

if (!hash) hash = 1;

ui64 bucket = hash & BucketsMask;

if (other.TableBucketsStats[bucket].BloomFilter.IsFinalized()) {
if (!IsAny_ && other.TableBucketsStats[bucket].BloomFilter.IsFinalized()) {
auto bucket2 = &other.TableBucketsStats[bucket];
auto &bloomFilter = bucket2->BloomFilter;
++BloomLookups_;
Expand All @@ -105,15 +100,25 @@ TTable::EAddTupleResult TTable::AddTuple( ui64 * intColumns, char ** stringColu
ui32 offset = keyIntVals.size(); // Offset of tuple inside the keyIntVals vector

keyIntVals.push_back(hash);
keyIntVals.insert(keyIntVals.end(), intColumns, intColumns + NullsBitmapSize_);
keyIntVals.insert(keyIntVals.end(), TempTuple.begin() + NullsBitmapSize_, TempTuple.end());
keyIntVals.insert(keyIntVals.end(), TempTuple.begin(), TempTuple.end());

if (IsAny_) {
if ( !AddKeysToHashTable(kh, keyIntVals.begin() + offset, iColumns) ) {
keyIntVals.resize(offset);
++AnyFiltered_;
return EAddTupleResult::AnyMatch;
}

if (other.TableBucketsStats[bucket].BloomFilter.IsFinalized()) {
auto bucket2 = &other.TableBucketsStats[bucket];
auto &bloomFilter = bucket2->BloomFilter;
++BloomLookups_;
if (bloomFilter.IsMissing(hash)) {
keyIntVals.resize(offset);
++BloomHits_;
return EAddTupleResult::Unmatched;
}
}
}

TableBucketsStats[bucket].TuplesNum++;
Expand Down

0 comments on commit 153df90

Please sign in to comment.