From 7d45b7b8f16e30f079800a2bf504459d7ad29cf3 Mon Sep 17 00:00:00 2001 From: Yuriy Kaminskiy Date: Thu, 1 Aug 2024 21:35:33 +0300 Subject: [PATCH 1/4] grace join: remove unused variable --- ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp index 4f4b83168e47..e0ff41bba739 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp @@ -584,7 +584,6 @@ class TGraceJoinSpillingSupportState : public TComputationValue(false)) , HaveMoreLeftRows(std::make_unique(true)) , HaveMoreRightRows(std::make_unique(true)) - , JoinedTuple(std::make_unique>() ) , IsSelfJoin_(isSelfJoin) , SelfJoinSameKeys_(isSelfJoin && (leftKeyColumns == rightKeyColumns)) , IsSpillingAllowed(isSpillingAllowed) @@ -989,7 +988,6 @@ EFetchResult ProcessSpilledData(TComputationContext&, NUdf::TUnboxedValue*const* const std::unique_ptr PartialJoinCompleted; const std::unique_ptr HaveMoreLeftRows; const std::unique_ptr HaveMoreRightRows; - const std::unique_ptr> JoinedTuple; const bool IsSelfJoin_; const bool SelfJoinSameKeys_; const bool IsSpillingAllowed; From 220e60c7a9056a7ee05d4629ce9df3ea1c301376 Mon Sep 17 00:00:00 2001 From: Yuriy Kaminskiy Date: Thu, 1 Aug 2024 21:38:21 +0300 Subject: [PATCH 2/4] grace join: fix bloomfilter vs FROM ANY Fix case of rows with duplicated key on left side that does not match anything at right side in SELECT * FROM ANY foo LEFT JOIN bar --- .../comp_nodes/mkql_grace_join_imp.cpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp index dc79431f1550..95847aa9e60f 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp @@ -86,16 +86,6 @@ TTable::EAddTupleResult TTable::AddTuple( ui64 * intColumns, char ** stringColu ui64 bucket = hash & BucketsMask; - if (other.TableBucketsStats[bucket].BloomFilter.IsFinalized()) { - auto bucket2 = &other.TableBucketsStats[bucket]; - auto &bloomFilter = bucket2->BloomFilter; - ++BloomLookups_; - if (bloomFilter.IsMissing(hash)) { - ++BloomHits_; - return EAddTupleResult::Unmatched; - } - } - std::vector> & keyIntVals = TableBuckets[bucket].KeyIntVals; std::vector> & stringsOffsets = TableBuckets[bucket].StringsOffsets; std::vector> & dataIntVals = TableBuckets[bucket].DataIntVals; @@ -116,6 +106,17 @@ TTable::EAddTupleResult TTable::AddTuple( ui64 * intColumns, char ** stringColu } } + if (other.TableBucketsStats[bucket].BloomFilter.IsFinalized()) { + auto bucket2 = &other.TableBucketsStats[bucket]; + auto &bloomFilter = bucket2->BloomFilter; + ++BloomLookups_; + if (bloomFilter.IsMissing(hash)) { + keyIntVals.resize(offset); + ++BloomHits_; + return EAddTupleResult::Unmatched; + } + } + TableBucketsStats[bucket].TuplesNum++; if (NumberOfStringColumns || NumberOfIColumns ) { From 72695162120a7fa06f0c69221f0dff3fb8f8303f Mon Sep 17 00:00:00 2001 From: Yuriy Kaminskiy Date: Thu, 1 Aug 2024 23:18:29 +0300 Subject: [PATCH 3/4] fix performance regression --- .../comp_nodes/mkql_grace_join_imp.cpp | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp index 95847aa9e60f..ca53c49523c3 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp @@ -86,6 +86,16 @@ TTable::EAddTupleResult TTable::AddTuple( ui64 * intColumns, char ** stringColu ui64 bucket = hash & BucketsMask; + if (!IsAny_ && other.TableBucketsStats[bucket].BloomFilter.IsFinalized()) { + auto bucket2 = &other.TableBucketsStats[bucket]; + auto &bloomFilter = bucket2->BloomFilter; + ++BloomLookups_; + if (bloomFilter.IsMissing(hash)) { + ++BloomHits_; + return EAddTupleResult::Unmatched; + } + } + std::vector> & keyIntVals = TableBuckets[bucket].KeyIntVals; std::vector> & stringsOffsets = TableBuckets[bucket].StringsOffsets; std::vector> & dataIntVals = TableBuckets[bucket].DataIntVals; @@ -104,16 +114,16 @@ TTable::EAddTupleResult TTable::AddTuple( ui64 * intColumns, char ** stringColu ++AnyFiltered_; return EAddTupleResult::AnyMatch; } - } - if (other.TableBucketsStats[bucket].BloomFilter.IsFinalized()) { - auto bucket2 = &other.TableBucketsStats[bucket]; - auto &bloomFilter = bucket2->BloomFilter; - ++BloomLookups_; - if (bloomFilter.IsMissing(hash)) { - keyIntVals.resize(offset); - ++BloomHits_; - return EAddTupleResult::Unmatched; + if (other.TableBucketsStats[bucket].BloomFilter.IsFinalized()) { + auto bucket2 = &other.TableBucketsStats[bucket]; + auto &bloomFilter = bucket2->BloomFilter; + ++BloomLookups_; + if (bloomFilter.IsMissing(hash)) { + keyIntVals.resize(offset); + ++BloomHits_; + return EAddTupleResult::Unmatched; + } } } From 500466ba99e9e16fca6a658f1a5e8e965c806a6b Mon Sep 17 00:00:00 2001 From: Yuriy Kaminskiy Date: Thu, 1 Aug 2024 23:31:10 +0300 Subject: [PATCH 4/4] grace join: reduce number of copying --- .../yql/minikql/comp_nodes/mkql_grace_join_imp.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp index ca53c49523c3..0ad22b072942 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp @@ -75,11 +75,6 @@ TTable::EAddTupleResult TTable::AddTuple( ui64 * intColumns, char ** stringColu } } - TempTuple[0] &= ui64(0x1); // Setting only nulls in key bit, all other bits are ignored for key hash - for (ui32 i = 1; i < NullsBitmapSize_; i ++) { - TempTuple[i] = 0; - } - XXH64_hash_t hash = XXH64(TempTuple.data() + NullsBitmapSize_, (TempTuple.size() - NullsBitmapSize_) * sizeof(ui64), 0); if (!hash) hash = 1; @@ -105,8 +100,7 @@ TTable::EAddTupleResult TTable::AddTuple( ui64 * intColumns, char ** stringColu ui32 offset = keyIntVals.size(); // Offset of tuple inside the keyIntVals vector keyIntVals.push_back(hash); - keyIntVals.insert(keyIntVals.end(), intColumns, intColumns + NullsBitmapSize_); - keyIntVals.insert(keyIntVals.end(), TempTuple.begin() + NullsBitmapSize_, TempTuple.end()); + keyIntVals.insert(keyIntVals.end(), TempTuple.begin(), TempTuple.end()); if (IsAny_) { if ( !AddKeysToHashTable(kh, keyIntVals.begin() + offset, iColumns) ) {