Skip to content

Commit

Permalink
Implement converter yt join tree -> optimizer join tree YQL-17437 (#1671
Browse files Browse the repository at this point in the history
)
  • Loading branch information
resetius authored Feb 8, 2024
1 parent a5a4ea2 commit 73b4f84
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 0 deletions.
49 changes: 49 additions & 0 deletions ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <library/cpp/testing/unittest/registar.h>

#include <ydb/library/yql/providers/yt/provider/yql_yt_join_impl.h>
#include <ydb/library/yql/core/cbo/cbo_optimizer_new.h>

namespace NYql {

Expand Down Expand Up @@ -70,6 +71,54 @@ Y_UNIT_TEST(OrderJoinsDoesNothingWhenCBODisabled) {
UNIT_ASSERT_VALUES_EQUAL(tree, optimizedTree);
}

Y_UNIT_TEST(BuildOptimizerTree2Tables) {
TExprContext exprCtx;
auto tree = MakeOp({"c", "c_nationkey"}, {"n", "n_nationkey"}, {"c", "n"}, exprCtx);
tree->Left = MakeLeaf({"c"}, {"c"}, 100000, 12333, exprCtx);
tree->Right = MakeLeaf({"n"}, {"n"}, 1000, 1233, exprCtx);

std::shared_ptr<IBaseOptimizerNode> resultTree;
std::shared_ptr<IProviderContext> resultCtx;
BuildOptimizerJoinTree(resultTree, resultCtx, tree);

UNIT_ASSERT(resultTree->Kind == JoinNodeType);
auto root = std::static_pointer_cast<TJoinOptimizerNode>(resultTree);
UNIT_ASSERT(root->LeftArg->Kind == RelNodeType);
UNIT_ASSERT(root->RightArg->Kind == RelNodeType);

auto left = std::static_pointer_cast<TRelOptimizerNode>(root->LeftArg);
auto right = std::static_pointer_cast<TRelOptimizerNode>(root->RightArg);

UNIT_ASSERT_VALUES_EQUAL(left->Label, "c");
UNIT_ASSERT_VALUES_EQUAL(right->Label, "n");
UNIT_ASSERT_VALUES_EQUAL(left->Stats->Nrows, 100000);
UNIT_ASSERT_VALUES_EQUAL(right->Stats->Nrows, 1000);
}

Y_UNIT_TEST(BuildOptimizerTree2TablesComplexLabel) {
TExprContext exprCtx;
auto tree = MakeOp({"c", "c_nationkey"}, {"n", "n_nationkey"}, {"c", "n", "e"}, exprCtx);
tree->Left = MakeLeaf({"c"}, {"c"}, 1000000, 1233333, exprCtx);
tree->Right = MakeLeaf({"n"}, {"n", "e"}, 10000, 12333, exprCtx);

std::shared_ptr<IBaseOptimizerNode> resultTree;
std::shared_ptr<IProviderContext> resultCtx;
BuildOptimizerJoinTree(resultTree, resultCtx, tree);

UNIT_ASSERT(resultTree->Kind == JoinNodeType);
auto root = std::static_pointer_cast<TJoinOptimizerNode>(resultTree);
UNIT_ASSERT(root->LeftArg->Kind == RelNodeType);
UNIT_ASSERT(root->RightArg->Kind == RelNodeType);

auto left = std::static_pointer_cast<TRelOptimizerNode>(root->LeftArg);
auto right = std::static_pointer_cast<TRelOptimizerNode>(root->RightArg);

UNIT_ASSERT_VALUES_EQUAL(left->Label, "c");
UNIT_ASSERT_VALUES_EQUAL(right->Label, "n");
UNIT_ASSERT_VALUES_EQUAL(left->Stats->Nrows, 1000000);
UNIT_ASSERT_VALUES_EQUAL(right->Stats->Nrows, 10000);
}

#define ADD_TEST(Name) \
Y_UNIT_TEST(Name ## _PG) { \
Name(ECostBasedOptimizerType::PG); \
Expand Down
5 changes: 5 additions & 0 deletions ydb/library/yql/providers/yt/provider/yql_yt_join_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,9 @@ IGraphTransformer::TStatus RewriteYtEquiJoin(TYtEquiJoin equiJoin, TYtJoinNodeOp
TMaybeNode<TExprBase> ExportYtEquiJoin(TYtEquiJoin equiJoin, const TYtJoinNodeOp& op, TExprContext& ctx, const TYtState::TPtr& state);
TYtJoinNodeOp::TPtr OrderJoins(TYtJoinNodeOp::TPtr op, const TYtState::TPtr& state, TExprContext& ctx, bool debug = false);

struct IBaseOptimizerNode;
struct IProviderContext;

void BuildOptimizerJoinTree(std::shared_ptr<IBaseOptimizerNode>& tree, std::shared_ptr<IProviderContext>& ctx, TYtJoinNodeOp::TPtr op);

}
95 changes: 95 additions & 0 deletions ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <ydb/library/yql/parser/pg_wrapper/interface/optimizer.h>
#include <ydb/library/yql/providers/common/provider/yql_provider.h>
#include <ydb/library/yql/utils/log/log.h>
#include <ydb/library/yql/core/cbo/cbo_optimizer_new.h>

#include <ydb/library/yql/dq/opt/dq_opt_log.h>

Expand Down Expand Up @@ -408,8 +409,102 @@ class TJoinReorderer {
IOptimizer::TOutput Result;
};

class TOptimizerTreeBuilder
{
public:
TOptimizerTreeBuilder(std::shared_ptr<IBaseOptimizerNode>& tree, std::shared_ptr<IProviderContext>& ctx, TYtJoinNodeOp::TPtr inputTree)
: Tree(tree)
, Ctx(ctx)
, InputTree(inputTree)
{ }

void Do() {
Ctx = std::make_shared<TDummyProviderContext>();
Tree = ProcessNode(InputTree);
}

private:
std::shared_ptr<IBaseOptimizerNode> ProcessNode(TYtJoinNode::TPtr node) {
if (auto* op = dynamic_cast<TYtJoinNodeOp*>(node.Get())) {
return OnOp(op);
} else if (auto* leaf = dynamic_cast<TYtJoinNodeLeaf*>(node.Get())) {
return OnLeaf(leaf);
} else {
YQL_ENSURE("Unknown node type");
return nullptr;
}
}

std::shared_ptr<IBaseOptimizerNode> OnOp(TYtJoinNodeOp* op) {
auto joinKind = ConvertToJoinKind(TString(op->JoinKind->Content()));
auto left = ProcessNode(op->Left);
auto right = ProcessNode(op->Right);
YQL_ENSURE(op->LeftLabel->ChildrenSize() == op->RightLabel->ChildrenSize());
std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions;
for (ui32 i = 0; i < op->LeftLabel->ChildrenSize(); i += 2) {
auto ltable = op->LeftLabel->Child(i)->Content();
auto lcolumn = op->LeftLabel->Child(i + 1)->Content();
auto rtable = op->RightLabel->Child(i)->Content();
auto rcolumn = op->RightLabel->Child(i + 1)->Content();
NDq::TJoinColumn lcol{TString(ltable), TString(lcolumn)};
NDq::TJoinColumn rcol{TString(rtable), TString(rcolumn)};
joinConditions.insert({lcol, rcol});
}

return std::make_shared<TJoinOptimizerNode>(
left, right, joinConditions, joinKind, EJoinAlgoType::GraceJoin
);
}

std::shared_ptr<IBaseOptimizerNode> OnLeaf(TYtJoinNodeLeaf* leaf) {
TString label;
if (leaf->Label->ChildrenSize() == 0) {
label = leaf->Label->Content();
} else {
for (ui32 i = 0; i < leaf->Label->ChildrenSize(); ++i) {
label += leaf->Label->Child(i)->Content();
if (i+1 != leaf->Label->ChildrenSize()) {
label += ",";
}
}
}

TYtSection section{leaf->Section};
auto stat = std::make_shared<TOptimizerStatistics>();
if (Y_UNLIKELY(!section.Settings().Empty()) && Y_UNLIKELY(section.Settings().Item(0).Name() == "Test")) {
for (const auto& setting : section.Settings()) {
if (setting.Name() == "Rows") {
stat->Nrows += FromString<ui64>(setting.Value().Ref().Content());
} else if (setting.Name() == "Size") {
stat->Cost += FromString<ui64>(setting.Value().Ref().Content());
}
}
} else {
for (auto path: section.Paths()) {
auto tableStat = TYtTableBaseInfo::GetStat(path.Table());
stat->Cost += tableStat->DataSize;
stat->Nrows += tableStat->RecordsCount;
}
}

return std::make_shared<TRelOptimizerNode>(
std::move(label), std::move(stat)
);
}

std::shared_ptr<IBaseOptimizerNode>& Tree;
std::shared_ptr<IProviderContext>& Ctx;

TYtJoinNodeOp::TPtr InputTree;
};

} // namespace

void BuildOptimizerJoinTree(std::shared_ptr<IBaseOptimizerNode>& tree, std::shared_ptr<IProviderContext>& ctx, TYtJoinNodeOp::TPtr op)
{
TOptimizerTreeBuilder(tree, ctx, op).Do();
}

TYtJoinNodeOp::TPtr OrderJoins(TYtJoinNodeOp::TPtr op, const TYtState::TPtr& state, TExprContext& ctx, bool debug)
{
if (state->Types->CostBasedOptimizer == ECostBasedOptimizerType::Disable) {
Expand Down

0 comments on commit 73b4f84

Please sign in to comment.