Skip to content

Commit

Permalink
Merge 179b632 into 5bea49a
Browse files Browse the repository at this point in the history
  • Loading branch information
Tony-Romanov authored Sep 3, 2024
2 parents 5bea49a + 179b632 commit c77ea87
Show file tree
Hide file tree
Showing 13 changed files with 108 additions and 35 deletions.
20 changes: 16 additions & 4 deletions ydb/library/yql/core/cbo/cbo_optimizer_new.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,17 @@ void TRelOptimizerNode::Print(std::stringstream& stream, int ntabs) {
}

TJoinOptimizerNode::TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left, const std::shared_ptr<IBaseOptimizerNode>& right,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions, const EJoinKind joinType, const EJoinAlgoType joinAlgo, bool nonReorderable) :
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions, const EJoinKind joinType, const EJoinAlgoType joinAlgo, bool leftAny, bool rightAny, bool nonReorderable) :
IBaseOptimizerNode(JoinNodeType),
LeftArg(left),
RightArg(right),
JoinConditions(joinConditions),
JoinType(joinType),
JoinAlgo(joinAlgo) {
IsReorderable = !nonReorderable;
JoinAlgo(joinAlgo),
LeftAny(leftAny),
RightAny(rightAny),
IsReorderable(!nonReorderable)
{
for (auto [l,r] : joinConditions ) {
LeftJoinKeys.push_back(l.AttributeName);
RightJoinKeys.push_back(r.AttributeName);
Expand All @@ -100,7 +103,14 @@ void TJoinOptimizerNode::Print(std::stringstream& stream, int ntabs) {
stream << " ";
}

stream << "Join: (" << ToString(JoinType) << "," << ToString(JoinAlgo) << ") ";
stream << "Join: (" << ToString(JoinType) << "," << ToString(JoinAlgo);
if (LeftAny) {
stream << ",LeftAny";
}
if (RightAny) {
stream << ",RightAny";
}
stream << ") ";

for (auto c : JoinConditions){
stream << c.first.RelName << "." << c.first.AttributeName
Expand Down Expand Up @@ -317,6 +327,8 @@ std::shared_ptr<IBaseOptimizerNode> MakeJoinTreeFromJson(const NJson::TJsonValue
{},
EJoinKind::Cross, // just a stub
EJoinAlgoType::Undefined,
false,
false,
true
);
return std::make_shared<TJoinOptimizerNode>(std::move(joinNode));
Expand Down
6 changes: 5 additions & 1 deletion ydb/library/yql/core/cbo/cbo_optimizer_new.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,18 @@ struct TJoinOptimizerNode : public IBaseOptimizerNode {
TVector<TString> RightJoinKeys;
EJoinKind JoinType;
EJoinAlgoType JoinAlgo;
const bool LeftAny;
const bool RightAny;
bool IsReorderable;

TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
const EJoinKind joinType,
const EJoinAlgoType joinAlgo,
bool nonReorderable=false);
bool leftAny,
bool rightAny,
bool nonReorderable = false);
virtual ~TJoinOptimizerNode() {}
virtual TVector<TString> Labels();
virtual void Print(std::stringstream& stream, int ntabs=0);
Expand Down
22 changes: 14 additions & 8 deletions ydb/library/yql/dq/opt/dq_cbo_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,15 @@ Y_UNIT_TEST(JoinSearch2Rels) {
std::static_pointer_cast<IBaseOptimizerNode>(rel2),
joinConditions,
InnerJoin,
EJoinAlgoType::GraceJoin
EJoinAlgoType::GraceJoin,
true,
false
);

auto res = optimizer->JoinSearch(op);
std::stringstream ss;
res->Print(ss);
TString expected = R"__(Join: (InnerJoin,MapJoin) b.1=a.1,
TString expected = R"__(Join: (InnerJoin,MapJoin,RightAny) b.1=a.1,
Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Storage: NA
Rel: b
Type: BaseTable, Nrows: 1e+06, Ncols: 1, ByteSize: 0, Cost: 9.00001e+06, Storage: NA
Expand Down Expand Up @@ -93,8 +95,10 @@ Y_UNIT_TEST(JoinSearch3Rels) {
std::static_pointer_cast<IBaseOptimizerNode>(rel2),
joinConditions,
InnerJoin,
EJoinAlgoType::GraceJoin
);
EJoinAlgoType::GraceJoin,
false,
false
);

joinConditions.insert({
NDq::TJoinColumn("a", "1"),
Expand All @@ -106,14 +110,16 @@ Y_UNIT_TEST(JoinSearch3Rels) {
std::static_pointer_cast<IBaseOptimizerNode>(rel3),
joinConditions,
InnerJoin,
EJoinAlgoType::GraceJoin
);
EJoinAlgoType::GraceJoin,
true,
false
);

auto res = optimizer->JoinSearch(op2);
std::stringstream ss;
res->Print(ss);

TString expected = R"__(Join: (InnerJoin,MapJoin) a.1=b.1,a.1=c.1,
TString expected = R"__(Join: (InnerJoin,MapJoin,LeftAny) a.1=b.1,a.1=c.1,
Type: ManyManyJoin, Nrows: 4e+13, Ncols: 3, ByteSize: 0, Cost: 4.004e+13, Storage: NA
Join: (InnerJoin,MapJoin) b.1=a.1,
Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Storage: NA
Expand Down Expand Up @@ -228,7 +234,7 @@ void _DqOptimizeEquiJoinWithCosts(const std::function<IOptimizerNew*()>& optFact
UNIT_ASSERT(equiJoin.Maybe<TCoEquiJoin>());
auto resStr = NCommon::ExprToPrettyString(ctx, *res.Ptr());
auto expected = R"__((
(let $1 '('"Inner" '"orders" '"customer" '('"orders" '"a") '('"customer" '"b") '('('"join_algo" '"MapJoin"))))
(let $1 '('"Inner" '"orders" '"customer" '('"orders" '"a") '('"customer" '"b") '('('join_algo 'MapJoin))))
(return (EquiJoin '('() '"orders") '('() '"customer") $1 '()))
)
)__";
Expand Down
10 changes: 8 additions & 2 deletions ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ class TDPHypSolver {
const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
EJoinKind joinKind,
bool leftAny,
bool rightAny,
bool isCommutative,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& reversedJoinConditions,
Expand Down Expand Up @@ -410,6 +412,8 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
EJoinKind joinKind,
bool leftAny,
bool rightAny,
bool isCommutative,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& reversedJoinConditions,
Expand Down Expand Up @@ -458,10 +462,10 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
Y_ENSURE(bestCost != std::numeric_limits<double>::infinity(), "No join was chosen!");

if (bestJoinIsReversed) {
return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, ctx, maybeHint);
return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, rightAny, leftAny, ctx, maybeHint);
}

return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, ctx, maybeHint);
return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, leftAny, rightAny, ctx, maybeHint);
}

/*
Expand Down Expand Up @@ -492,6 +496,8 @@ template<typename TNodeSet> void TDPHypSolver<TNodeSet>::EmitCsgCmp(const TNodeS
leftNodes,
rightNodes,
csgCmpEdge->JoinKind,
csgCmpEdge->LeftAny,
csgCmpEdge->RightAny,
csgCmpEdge->IsCommutative,
csgCmpEdge->JoinConditions,
reversedEdge->JoinConditions,
Expand Down
6 changes: 3 additions & 3 deletions ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ std::shared_ptr<IBaseOptimizerNode> CreateChain(size_t size, TString onAttribute
joinConditions.insert({TJoinColumn(eiPrevStr, onAttribute), TJoinColumn(eiStr, onAttribute)});

root = std::make_shared<TJoinOptimizerNode>(
root, ei, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined
root, ei, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false
);
}

Expand Down Expand Up @@ -86,7 +86,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {

// a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2
auto root = std::make_shared<TJoinOptimizerNode>(
lhs, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined
lhs, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false
);

joinConditions.clear();
Expand All @@ -97,7 +97,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
// a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2 --123-- c1 --228-- c2
// ^ we don't want to have transitive closure between c and a
root = std::make_shared<TJoinOptimizerNode>(
root, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined
root, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false
);

auto graph = MakeJoinHypergraph<TNodeSet>(root);
Expand Down
41 changes: 32 additions & 9 deletions ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "dq_opt_make_join_hypergraph.h"

#include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h>
#include <ydb/library/yql/core/yql_join.h>
#include <ydb/library/yql/dq/opt/dq_opt.h>
#include <ydb/library/yql/utils/log/log.h>

Expand Down Expand Up @@ -97,7 +98,9 @@ std::shared_ptr<TJoinOptimizerNode> ConvertToJoinTree(
TJoinColumn(rightScope, rightColumn)));
}

return std::make_shared<TJoinOptimizerNode>(left, right, joinConds, ConvertToJoinKind(joinTuple.Type().StringValue()), EJoinAlgoType::Undefined);
const auto linkSettings = GetEquiJoinLinkSettings(joinTuple.Options().Ref());
return std::make_shared<TJoinOptimizerNode>(left, right, joinConds, ConvertToJoinKind(joinTuple.Type().StringValue()), EJoinAlgoType::Undefined,
linkSettings.LeftHints.contains("any"), linkSettings.RightHints.contains("any"));
}

/**
Expand Down Expand Up @@ -143,14 +146,32 @@ TExprBase BuildTree(TExprContext& ctx, const TCoEquiJoin& equiJoin,
rightJoinColumns.push_back(BuildAtom(pair.second.AttributeName, equiJoin.Pos(), ctx));
}

auto optionsList = ctx.Builder(equiJoin.Pos())
.List()
.List(0)
.Atom(0, "join_algo")
.Atom(1, ToString(reorderResult->JoinAlgo))
TExprNode::TListType options(1U,
ctx.Builder(equiJoin.Pos())
.List()
.Atom(0, "join_algo", TNodeFlags::Default)
.Atom(1, ToString(reorderResult->JoinAlgo), TNodeFlags::Default)
.Seal()
.Seal()
.Build();
.Build()
);

if (reorderResult->LeftAny) {
options.emplace_back(ctx.Builder(equiJoin.Pos())
.List()
.Atom(0, "left", TNodeFlags::Default)
.Atom(1, "any", TNodeFlags::Default)
.Seal()
.Build());
}

if (reorderResult->RightAny) {
options.emplace_back(ctx.Builder(equiJoin.Pos())
.List()
.Atom(0, "right", TNodeFlags::Default)
.Atom(1, "any", TNodeFlags::Default)
.Seal()
.Build());
}

// Build the final output
return Build<TCoEquiJoinTuple>(ctx,equiJoin.Pos())
Expand All @@ -163,7 +184,9 @@ TExprBase BuildTree(TExprContext& ctx, const TCoEquiJoin& equiJoin,
.RightKeys()
.Add(rightJoinColumns)
.Build()
.Options(optionsList)
.Options()
.Add(std::move(options))
.Build()
.Done();
}

Expand Down
9 changes: 8 additions & 1 deletion ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,16 @@ class TJoinHypergraph {
const TNodeSet& left,
const TNodeSet& right,
EJoinKind joinKind,
bool leftAny,
bool rightAny,
bool isCommutative,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions
)
: Left(left)
, Right(right)
, JoinKind(joinKind)
, LeftAny(leftAny)
, RightAny(rightAny)
, IsCommutative(isCommutative)
, JoinConditions(joinConditions)
, IsReversed(false)
Expand All @@ -52,6 +56,7 @@ class TJoinHypergraph {
TNodeSet Left;
TNodeSet Right;
EJoinKind JoinKind;
bool LeftAny, RightAny;
bool IsCommutative;
std::set<std::pair<TJoinColumn, TJoinColumn>> JoinConditions;
TVector<TString> LeftJoinKeys;
Expand Down Expand Up @@ -410,6 +415,8 @@ class TTransitiveClosureConstructor {
const auto& nodes = Graph_.GetNodes();

EJoinKind groupJoinKind = edges[groupBegin].JoinKind;
bool leftAny = edges[groupBegin].LeftAny;
bool rightAny = edges[groupBegin].RightAny;
bool isJoinCommutative = edges[groupBegin].IsCommutative;

TVector<TString> groupConditionUsedAttributes;
Expand Down Expand Up @@ -446,7 +453,7 @@ class TTransitiveClosureConstructor {
});
}

auto e = THyperedge(lhs, rhs, groupJoinKind, isJoinCommutative, joinConditions);
auto e = THyperedge(lhs, rhs, groupJoinKind, leftAny, rightAny, isJoinCommutative, joinConditions);
Graph_.AddEdge(std::move(e));
}
}
Expand Down
6 changes: 4 additions & 2 deletions ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ std::shared_ptr<TJoinOptimizerNodeInternal> MakeJoinInternal(
const TVector<TString>& rightJoinKeys,
EJoinKind joinKind,
EJoinAlgoType joinAlgo,
bool leftAny,
bool rightAny,
IProviderContext& ctx,
TCardinalityHints::TCardinalityHint* maybeHint) {

auto res = std::make_shared<TJoinOptimizerNodeInternal>(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, joinAlgo);
auto res = std::make_shared<TJoinOptimizerNodeInternal>(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, joinAlgo, leftAny, rightAny);
res->Stats = std::make_shared<TOptimizerStatistics>(ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind, maybeHint));
return res;
}
Expand All @@ -37,7 +39,7 @@ std::shared_ptr<TJoinOptimizerNode> ConvertFromInternal(const std::shared_ptr<IB
right = ConvertFromInternal(right);
}

auto newJoin = std::make_shared<TJoinOptimizerNode>(left, right, join->JoinConditions, join->JoinType, join->JoinAlgo);
auto newJoin = std::make_shared<TJoinOptimizerNode>(left, right, join->JoinConditions, join->JoinType, join->JoinAlgo, join->LeftAny, join->RightAny);
newJoin->Stats = join->Stats;
return newJoin;
}
Expand Down
10 changes: 9 additions & 1 deletion ydb/library/yql/dq/opt/dq_opt_join_tree_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
const TVector<TString>& leftJoinKeys,
const TVector<TString>& rightJoinKeys,
const EJoinKind joinType,
const EJoinAlgoType joinAlgo
const EJoinAlgoType joinAlgo,
const bool leftAny,
const bool rightAny
)
: IBaseOptimizerNode(JoinNodeType)
, LeftArg(left)
Expand All @@ -32,6 +34,8 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
, RightJoinKeys(rightJoinKeys)
, JoinType(joinType)
, JoinAlgo(joinAlgo)
, LeftAny(leftAny)
, RightAny(rightAny)
{}

virtual ~TJoinOptimizerNodeInternal() = default;
Expand All @@ -52,6 +56,8 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
const TVector<TString>& RightJoinKeys;
EJoinKind JoinType;
EJoinAlgoType JoinAlgo;
const bool LeftAny;
const bool RightAny;
};

/**
Expand All @@ -65,6 +71,8 @@ std::shared_ptr<TJoinOptimizerNodeInternal> MakeJoinInternal(
const TVector<TString>& rightJoinKeys,
EJoinKind joinKind,
EJoinAlgoType joinAlgo,
bool leftAny,
bool rightAny,
IProviderContext& ctx,
TCardinalityHints::TCardinalityHint* maybeHint = nullptr
);
Expand Down
2 changes: 1 addition & 1 deletion ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ typename TJoinHypergraph<TNodeSet>::TEdge MakeHyperedge(
TNodeSet left = TES & subtreeNodes[joinNode->LeftArg];
TNodeSet right = TES & subtreeNodes[joinNode->RightArg];

return typename TJoinHypergraph<TNodeSet>::TEdge(left, right, joinNode->JoinType, OperatorIsCommutative(joinNode->JoinType) && joinNode->IsReorderable, joinNode->JoinConditions);
return typename TJoinHypergraph<TNodeSet>::TEdge(left, right, joinNode->JoinType, joinNode->LeftAny, joinNode->RightAny, OperatorIsCommutative(joinNode->JoinType) && joinNode->IsReorderable, joinNode->JoinConditions);
}

template<typename TNodeSet>
Expand Down
2 changes: 1 addition & 1 deletion ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ Y_UNIT_TEST(NonReordable) {
std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions;
joinConditions.insert({NDq::TJoinColumn{"a", "b"}, NDq::TJoinColumn{"a","c"}});
auto root = std::make_shared<TJoinOptimizerNode>(
left, right, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::GraceJoin, true);
left, right, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::GraceJoin, false, false, true);
TBaseProviderContext optCtx;
std::unique_ptr<IOptimizerNew> opt = std::unique_ptr<IOptimizerNew>(NDq::MakeNativeOptimizerNew(optCtx, 1024));
auto result = opt->JoinSearch(root);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,10 @@ class TYtJoinOptimizerNode: public TJoinOptimizerNode {
const EJoinKind joinType,
const EJoinAlgoType joinAlgo,
TYtJoinNodeOp* originalOp)
: TJoinOptimizerNode(left, right, joinConditions, joinType, joinAlgo, originalOp != nullptr)
: TJoinOptimizerNode(left, right, joinConditions, joinType, joinAlgo,
originalOp ? originalOp->LinkSettings.LeftHints.contains("any") : false,
originalOp ? originalOp->LinkSettings.RightHints.contains("any") : false,
originalOp != nullptr)
, OriginalOp(originalOp)
{ }

Expand Down
Loading

0 comments on commit c77ea87

Please sign in to comment.