Skip to content

Commit

Permalink
[CBO] Merge transitive closure + cycles fixes in DPHyp (#10857)
Browse files Browse the repository at this point in the history
Co-authored-by: yumkam <yumkam7@ydb.tech>
Co-authored-by: Pavel Velikhov <pavelvelikhov@ydb.tech>
Co-authored-by: Pavel Ivanov <pudge1000-7@qavm-9f0570a4.qemu>
  • Loading branch information
4 people authored Oct 25, 2024
1 parent 0bf5166 commit 5ef387d
Show file tree
Hide file tree
Showing 35 changed files with 1,219 additions and 765 deletions.
28 changes: 13 additions & 15 deletions ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ TMaybeNode<TKqlKeyInc> GetRightTableKeyPrefix(const TKqlKeyRange& range) {
/**
* KQP specific rule to check if a LookupJoin is applicable
*/
bool IsLookupJoinApplicableDetailed(const std::shared_ptr<NYql::TRelOptimizerNode>& node, const TVector<TString>& joinColumns, const TKqpProviderContext& ctx) {
bool IsLookupJoinApplicableDetailed(const std::shared_ptr<NYql::TRelOptimizerNode>& node, const TVector<TJoinColumn>& joinColumns, const TKqpProviderContext& ctx) {

auto rel = std::static_pointer_cast<TKqpRelOptimizerNode>(node);
auto expr = TExprBase(rel->Node);
Expand All @@ -45,7 +45,7 @@ bool IsLookupJoinApplicableDetailed(const std::shared_ptr<NYql::TRelOptimizerNod
return false;
}

if (find_if(joinColumns.begin(), joinColumns.end(), [&] (const TString& s) { return node->Stats->KeyColumns->Data[0] == s;}) != joinColumns.end()) {
if (std::find_if(joinColumns.begin(), joinColumns.end(), [&] (const TJoinColumn& c) { return node->Stats->KeyColumns->Data[0] == c.AttributeName;}) != joinColumns.end()) {
return true;
}

Expand Down Expand Up @@ -97,8 +97,8 @@ bool IsLookupJoinApplicableDetailed(const std::shared_ptr<NYql::TRelOptimizerNod
return false;
}

if (prefixSize < node->Stats->KeyColumns->Data.size() && (find_if(joinColumns.begin(), joinColumns.end(), [&] (const TString& s) {
return node->Stats->KeyColumns->Data[prefixSize] == s;
if (prefixSize < node->Stats->KeyColumns->Data.size() && (std::find_if(joinColumns.begin(), joinColumns.end(), [&] (const TJoinColumn& c) {
return node->Stats->KeyColumns->Data[prefixSize] == c.AttributeName;
}) == joinColumns.end())){
return false;
}
Expand All @@ -108,12 +108,11 @@ bool IsLookupJoinApplicableDetailed(const std::shared_ptr<NYql::TRelOptimizerNod

bool IsLookupJoinApplicable(std::shared_ptr<IBaseOptimizerNode> left,
std::shared_ptr<IBaseOptimizerNode> right,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
const TVector<TString>& leftJoinKeys,
const TVector<TString>& rightJoinKeys,
const TVector<TJoinColumn>& leftJoinKeys,
const TVector<TJoinColumn>& rightJoinKeys,
TKqpProviderContext& ctx
) {
Y_UNUSED(left, joinConditions, leftJoinKeys);
Y_UNUSED(left, leftJoinKeys);

if (!(right->Stats->StorageType == EStorageType::RowStorage)) {
return false;
Expand All @@ -130,7 +129,7 @@ bool IsLookupJoinApplicable(std::shared_ptr<IBaseOptimizerNode> left,
}

for (auto rightCol : rightJoinKeys) {
if (std::find(rightStats->KeyColumns->Data.begin(), rightStats->KeyColumns->Data.end(), rightCol) == rightStats->KeyColumns->Data.end()) {
if (find(rightStats->KeyColumns->Data.begin(), rightStats->KeyColumns->Data.end(), rightCol.AttributeName) == rightStats->KeyColumns->Data.end()) {
return false;
}
}
Expand All @@ -142,18 +141,17 @@ bool IsLookupJoinApplicable(std::shared_ptr<IBaseOptimizerNode> left,

bool TKqpProviderContext::IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
const TVector<TString>& leftJoinKeys,
const TVector<TString>& rightJoinKeys,
const TVector<TJoinColumn>& leftJoinKeys,
const TVector<TJoinColumn>& rightJoinKeys,
EJoinAlgoType joinAlgo,
EJoinKind joinKind) {
EJoinKind joinKind) {

switch( joinAlgo ) {
case EJoinAlgoType::LookupJoin:
if ((OptLevel != 3) && (left->Stats->Nrows > 1000)) {
return false;
}
return IsLookupJoinApplicable(left, right, joinConditions, leftJoinKeys, rightJoinKeys, *this);
return IsLookupJoinApplicable(left, right, leftJoinKeys, rightJoinKeys, *this);

case EJoinAlgoType::LookupJoinReverse:
if (joinKind != EJoinKind::LeftSemi) {
Expand All @@ -162,7 +160,7 @@ bool TKqpProviderContext::IsJoinApplicable(const std::shared_ptr<IBaseOptimizerN
if ((OptLevel != 3) && (right->Stats->Nrows > 1000)) {
return false;
}
return IsLookupJoinApplicable(right, left, joinConditions, rightJoinKeys, leftJoinKeys, *this);
return IsLookupJoinApplicable(right, left, rightJoinKeys, leftJoinKeys, *this);

case EJoinAlgoType::MapJoin:
return joinKind != EJoinKind::OuterJoin && joinKind != EJoinKind::Exclusion && right->Stats->ByteSize < 1e6;
Expand Down
3 changes: 1 addition & 2 deletions ydb/core/kqp/opt/logical/kqp_opt_cbo.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ struct TKqpProviderContext : public NYql::TBaseProviderContext {

virtual bool IsJoinApplicable(const std::shared_ptr<NYql::IBaseOptimizerNode>& left,
const std::shared_ptr<NYql::IBaseOptimizerNode>& right,
const std::set<std::pair<NYql::NDq::TJoinColumn, NYql::NDq::TJoinColumn>>& joinConditions,
const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys,
const TVector<NYql::NDq::TJoinColumn>& leftJoinKeys, const TVector<NYql::NDq::TJoinColumn>& rightJoinKeys,
NYql::EJoinAlgoType joinAlgo, NYql::EJoinKind joinKind) override;

virtual double ComputeJoinCost(const NYql::TOptimizerStatistics& leftStats, const NYql::TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, NYql::EJoinAlgoType joinAlgo) const override;
Expand Down
18 changes: 18 additions & 0 deletions ydb/core/kqp/ut/common/kqp_ut_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1463,6 +1463,24 @@ NJson::TJsonValue GetJoinOrder(const TString& deserializedPlan) {
return GetJoinOrderImpl(optRoot);
}

NJson::TJsonValue GetJoinOrderFromDetailedJoinOrderImpl(const NJson::TJsonValue& opt) {
if (!opt.GetMapSafe().contains("table")) {
NJson::TJsonValue res;
auto args = opt.GetMapSafe().at("args").GetArraySafe();
for (size_t i = 0; i < args.size(); ++i) {
res.AppendValue(GetJoinOrderFromDetailedJoinOrderImpl(args[i]));
}
return res;
}

return opt.GetMapSafe().at("table");
}

NJson::TJsonValue GetJoinOrderFromDetailedJoinOrder(const TString& deserializedDetailedJoinOrder) {
NJson::TJsonValue optRoot;
NJson::ReadJsonTree(deserializedDetailedJoinOrder, &optRoot, true);
return GetJoinOrderFromDetailedJoinOrderImpl(optRoot);
}

} // namspace NKqp
} // namespace NKikimr
2 changes: 2 additions & 0 deletions ydb/core/kqp/ut/common/kqp_ut_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,5 +350,7 @@ NJson::TJsonValue GetDetailedJoinOrder(const TString& deserializedPlan, const TG
/* Gets tables join order without details : only tables. */
NJson::TJsonValue GetJoinOrder(const TString& deserializedPlan);

NJson::TJsonValue GetJoinOrderFromDetailedJoinOrder(const TString& deserializedDetailedJoinOrder);

} // namespace NKqp
} // namespace NKikimr
1 change: 1 addition & 0 deletions ydb/core/kqp/ut/join/data/join_order/lookupbug.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,4 @@
}
]
}

1 change: 1 addition & 0 deletions ydb/core/kqp/ut/join/data/join_order/tpcc.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@
}
]
}

Loading

0 comments on commit 5ef387d

Please sign in to comment.