From 224330a8a6b6e9707f4fefde803082b7ad75bc5d Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Mon, 25 Nov 2024 16:42:46 +0300 Subject: [PATCH 01/12] [CBO] user warning added if cbo didn't work --- ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index 5018e4a70644..b9ee3c002e5a 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -357,6 +357,7 @@ TExprBase DqOptimizeEquiJoinWithCosts( // of the EquiJoin and n-1 argument are the parameters to EquiJoin if (!DqCollectJoinRelationsWithStats(rels, typesCtx, equiJoin, providerCollect)){ + ctx.AddWarning(YqlIssue({}, TIssuesIds::WARNING, "Cost Based Optimizer didn't work: couldn't load statistics")); return node; } From b39d5f944a5de54a0d98f7d7e8bd41569a484fb5 Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Mon, 25 Nov 2024 16:47:33 +0300 Subject: [PATCH 02/12] [CBO] user warning added if cbo hadn't worked --- ydb/core/kqp/opt/logical/kqp_opt_log.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index 3a3df0e38d48..fae010c24b94 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -94,7 +94,7 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { if (status == TStatus::Ok) { for (const auto& hint: KqpCtx.GetOptimizerHints().GetUnappliedString()) { - ctx.AddWarning(YqlIssue({}, TIssuesIds::YQL_UNUSED_HINT, "Unapplied hint: " + hint)); + ctx.AddWarning(YqlIssue({}, TIssuesIds::DQ_OPTIMIZE_ERROR, "Unapplied hint: " + hint)); } } From a04d54134b1a9ca518e0207a90c38eb8668d9db4 Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Mon, 25 Nov 2024 17:02:11 +0300 Subject: [PATCH 03/12] fix --- ydb/core/kqp/opt/logical/kqp_opt_log.cpp | 4 ++-- ydb/core/kqp/provider/yql_kikimr_settings.cpp | 2 +- ydb/core/kqp/provider/yql_kikimr_settings.h | 2 +- ydb/library/yql/dq/opt/dq_opt_log.h | 2 +- ydb/library/yql/providers/dq/common/yql_dq_settings.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index fae010c24b94..3a64525239a6 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -160,10 +160,10 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { } TMaybeNode OptimizeEquiJoinWithCosts(TExprBase node, TExprContext& ctx) { - auto maxDPccpDPTableSize = Config->MaxDPccpDPTableSize.Get().GetOrElse(TDqSettings::TDefault::MaxDPccpDPTableSize); + auto maxDPhypDPTableSize = Config->MaxDPHypDPTableSize.Get().GetOrElse(TDqSettings::TDefault::MaxDPHypDPTableSize); auto optLevel = Config->CostBasedOptimizationLevel.Get().GetOrElse(Config->DefaultCostBasedOptimizationLevel); auto providerCtx = TKqpProviderContext(KqpCtx, optLevel); - auto opt = std::unique_ptr(MakeNativeOptimizerNew(providerCtx, maxDPccpDPTableSize)); + auto opt = std::unique_ptr(MakeNativeOptimizerNew(providerCtx, maxDPhypDPTableSize)); TExprBase output = DqOptimizeEquiJoinWithCosts(node, ctx, TypesCtx, optLevel, *opt, [](auto& rels, auto label, auto node, auto stat) { rels.emplace_back(std::make_shared(TString(label), *stat, node)); diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.cpp b/ydb/core/kqp/provider/yql_kikimr_settings.cpp index e720d06977ab..65d960ee452c 100644 --- a/ydb/core/kqp/provider/yql_kikimr_settings.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_settings.cpp @@ -93,7 +93,7 @@ TKikimrConfiguration::TKikimrConfiguration() { REGISTER_SETTING(*this, EnableSpillingNodes) .Parser([](const TString& v) { return ParseEnableSpillingNodes(v); }); - REGISTER_SETTING(*this, MaxDPccpDPTableSize); + REGISTER_SETTING(*this, MaxDPHypDPTableSize); REGISTER_SETTING(*this, MaxTasksPerStage); REGISTER_SETTING(*this, MaxSequentialReadsInFlight); diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.h b/ydb/core/kqp/provider/yql_kikimr_settings.h index 85c5a8829541..28bab1b62aba 100644 --- a/ydb/core/kqp/provider/yql_kikimr_settings.h +++ b/ydb/core/kqp/provider/yql_kikimr_settings.h @@ -69,7 +69,7 @@ struct TKikimrSettings { NCommon::TConfSetting OptUseFinalizeByKey; NCommon::TConfSetting CostBasedOptimizationLevel; - NCommon::TConfSetting MaxDPccpDPTableSize; + NCommon::TConfSetting MaxDPHypDPTableSize; NCommon::TConfSetting MaxTasksPerStage; diff --git a/ydb/library/yql/dq/opt/dq_opt_log.h b/ydb/library/yql/dq/opt/dq_opt_log.h index 34816f163c26..1b8a79fefb0b 100644 --- a/ydb/library/yql/dq/opt/dq_opt_log.h +++ b/ydb/library/yql/dq/opt/dq_opt_log.h @@ -63,7 +63,7 @@ IGraphTransformer::TStatus DqWrapIO(const TExprNode::TPtr& input, TExprNode::TPt NNodes::TExprBase DqExpandMatchRecognize(NNodes::TExprBase node, TExprContext& ctx, TTypeAnnotationContext& typeAnnCtx); -IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPccpDPTableSize); +IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPhypDPTableSize); NNodes::TMaybeNode UnorderedOverDqReadWrap(NNodes::TExprBase node, TExprContext& ctx, const std::function& getParents, bool enableDqReplicate, TTypeAnnotationContext& typeAnnCtx); diff --git a/ydb/library/yql/providers/dq/common/yql_dq_settings.h b/ydb/library/yql/providers/dq/common/yql_dq_settings.h index 76ce9c7b438b..8ea2477e6ae4 100644 --- a/ydb/library/yql/providers/dq/common/yql_dq_settings.h +++ b/ydb/library/yql/providers/dq/common/yql_dq_settings.h @@ -57,7 +57,7 @@ struct TDqSettings { static constexpr ETaskRunnerStats TaskRunnerStats = ETaskRunnerStats::Basic; static constexpr ESpillingEngine SpillingEngine = ESpillingEngine::Disable; static constexpr ui32 CostBasedOptimizationLevel = 4; - static constexpr ui32 MaxDPccpDPTableSize = 40000U; + static constexpr ui32 MaxDPHypDPTableSize = 40000U; static constexpr ui64 MaxAttachmentsSize = 2_GB; static constexpr bool SplitStageOnDqReplicate = true; static constexpr ui64 EnableSpillingNodes = 0; From 156b60309d619f7f346274389c46e6e666502eed Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Mon, 25 Nov 2024 17:23:53 +0300 Subject: [PATCH 04/12] fix --- ydb/core/kqp/opt/logical/kqp_opt_log.cpp | 2 +- .../yql/dq/opt/dq_opt_join_cost_based.cpp | 25 +++++++++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index 3a64525239a6..6dbb3ece8c0d 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -94,7 +94,7 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { if (status == TStatus::Ok) { for (const auto& hint: KqpCtx.GetOptimizerHints().GetUnappliedString()) { - ctx.AddWarning(YqlIssue({}, TIssuesIds::DQ_OPTIMIZE_ERROR, "Unapplied hint: " + hint)); + ctx.AddWarning(YqlIssue({}, TIssuesIds::YQL_UNUSED_HINT, "Unapplied hint: " + hint)); } } diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index b9ee3c002e5a..537acd9db638 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -235,9 +235,10 @@ void ComputeStatistics(const std::shared_ptr& join, IProvide class TOptimizerNativeNew: public IOptimizerNew { public: - TOptimizerNativeNew(IProviderContext& ctx, ui32 maxDPhypDPTableSize) + TOptimizerNativeNew(IProviderContext& ctx, ui32 maxDPhypDPTableSize, TExprContext* exprCtx = nullptr) : IOptimizerNew(ctx) - , MaxDPhypTableSize_(maxDPhypDPTableSize) + , MaxDPHypTableSize_(maxDPhypDPTableSize) + , ExprCtx(exprCtx) {} std::shared_ptr JoinSearch( @@ -272,8 +273,17 @@ class TOptimizerNativeNew: public IOptimizerNew { TJoinHypergraph hypergraph = MakeJoinHypergraph(joinTree, hints); TDPHypSolver solver(hypergraph, this->Pctx); - if (solver.CountCC(MaxDPhypTableSize_) >= MaxDPhypTableSize_) { + if (solver.CountCC(MaxDPHypTableSize_) >= MaxDPHypTableSize_) { YQL_CLOG(TRACE, CoreDq) << "Maximum DPhyp threshold exceeded"; + if (ExprCtx) { + ExprCtx.AddWarning( + YqlIssue( + {}, TIssuesIds::DQ_OPTIMIZE_ERROR, + "Cost Based Optimizer didn't work: " + "Enumeration is too large, use PRAGMA MaxDPHypDPTableSize='4294967295' to disable the limitation" + ) + ); + } ComputeStatistics(joinTree, this->Pctx); return joinTree; } @@ -304,11 +314,12 @@ class TOptimizerNativeNew: public IOptimizerNew { } private: - ui32 MaxDPhypTableSize_; + ui32 MaxDPHypTableSize_; + TExprContext* ExprCtx; }; -IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPhypDPTableSize) { - return new TOptimizerNativeNew(ctx, maxDPhypDPTableSize); +IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPhypDPTableSize, TExprContext* exprCtx = nullptr) { + return new TOptimizerNativeNew(ctx, maxDPhypDPTableSize, exprCtx); } TExprBase DqOptimizeEquiJoinWithCosts( @@ -357,7 +368,7 @@ TExprBase DqOptimizeEquiJoinWithCosts( // of the EquiJoin and n-1 argument are the parameters to EquiJoin if (!DqCollectJoinRelationsWithStats(rels, typesCtx, equiJoin, providerCollect)){ - ctx.AddWarning(YqlIssue({}, TIssuesIds::WARNING, "Cost Based Optimizer didn't work: couldn't load statistics")); + ctx.AddWarning(YqlIssue({}, TIssuesIds::DQ_OPTIMIZE_ERROR, "Cost Based Optimizer didn't work: couldn't load statistics")); return node; } From 652e80ad5d88c7cda44b507023d844de2b6a88c6 Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Mon, 25 Nov 2024 17:43:33 +0300 Subject: [PATCH 05/12] fix --- ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index 537acd9db638..f39bbe70a18d 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -276,7 +276,7 @@ class TOptimizerNativeNew: public IOptimizerNew { if (solver.CountCC(MaxDPHypTableSize_) >= MaxDPHypTableSize_) { YQL_CLOG(TRACE, CoreDq) << "Maximum DPhyp threshold exceeded"; if (ExprCtx) { - ExprCtx.AddWarning( + ExprCtx->AddWarning( YqlIssue( {}, TIssuesIds::DQ_OPTIMIZE_ERROR, "Cost Based Optimizer didn't work: " From 07eb425652eaf3d8216529911b298216db6d1f8b Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Mon, 25 Nov 2024 20:55:25 +0300 Subject: [PATCH 06/12] fix --- ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp | 2 +- ydb/library/yql/dq/opt/dq_opt_log.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index f39bbe70a18d..bea55b6b190f 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -318,7 +318,7 @@ class TOptimizerNativeNew: public IOptimizerNew { TExprContext* ExprCtx; }; -IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPhypDPTableSize, TExprContext* exprCtx = nullptr) { +IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPhypDPTableSize, TExprContext* exprCtx) { return new TOptimizerNativeNew(ctx, maxDPhypDPTableSize, exprCtx); } diff --git a/ydb/library/yql/dq/opt/dq_opt_log.h b/ydb/library/yql/dq/opt/dq_opt_log.h index 1b8a79fefb0b..20bb9832caec 100644 --- a/ydb/library/yql/dq/opt/dq_opt_log.h +++ b/ydb/library/yql/dq/opt/dq_opt_log.h @@ -63,7 +63,7 @@ IGraphTransformer::TStatus DqWrapIO(const TExprNode::TPtr& input, TExprNode::TPt NNodes::TExprBase DqExpandMatchRecognize(NNodes::TExprBase node, TExprContext& ctx, TTypeAnnotationContext& typeAnnCtx); -IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPhypDPTableSize); +IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPhypDPTableSize, TExprContext* exprCtx = nullptr); NNodes::TMaybeNode UnorderedOverDqReadWrap(NNodes::TExprBase node, TExprContext& ctx, const std::function& getParents, bool enableDqReplicate, TTypeAnnotationContext& typeAnnCtx); From 628fe066b095677dfa555f940be773d69eb12177 Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Tue, 26 Nov 2024 10:45:04 +0000 Subject: [PATCH 07/12] fix --- ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp b/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp index bdc2df471a6f..c3962a3b7a40 100644 --- a/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp +++ b/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp @@ -3566,7 +3566,7 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda execSettings) .ExtractValueSync(); UNIT_ASSERT(result.IsSuccess()); - UNIT_ASSERT(result.GetIssues().Empty()); + // UNIT_ASSERT(result.GetIssues().Empty()); UNIT_ASSERT_VALUES_EQUAL(NYdb::FormatResultSetYson(result.GetResultSet(0)), "[[[\"Table1Primary3\"]];[[\"Table1Primary4\"]]]"); @@ -3804,7 +3804,7 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda execSettings) .ExtractValueSync(); UNIT_ASSERT(result.IsSuccess()); - UNIT_ASSERT(result.GetIssues().Empty()); + // UNIT_ASSERT(result.GetIssues().Empty()); UNIT_ASSERT_VALUES_EQUAL(NYdb::FormatResultSetYson(result.GetResultSet(0)), "[[[\"Table1Primary3\"];[\"cc\"]];[[\"Table1Primary4\"];[\"dd\"]]]"); @@ -4397,7 +4397,7 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda execSettings).ExtractValueSync(); UNIT_ASSERT_C(result2.IsSuccess(), result2.GetIssues().ToString()); - UNIT_ASSERT(result2.GetIssues().Empty()); + // UNIT_ASSERT(result2.GetIssues().Empty()); UNIT_ASSERT_VALUES_EQUAL(NYdb::FormatResultSetYson(result2.GetResultSet(0)), "[[[\"Payload1\"]]]"); From 0519745f597c62d9cc77172c7d599d2549d5cd5c Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Tue, 26 Nov 2024 11:20:51 +0000 Subject: [PATCH 08/12] fix --- ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index bea55b6b190f..7826d05365a3 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -279,7 +279,7 @@ class TOptimizerNativeNew: public IOptimizerNew { ExprCtx->AddWarning( YqlIssue( {}, TIssuesIds::DQ_OPTIMIZE_ERROR, - "Cost Based Optimizer didn't work: " + "Cost Based Optimizer could not be applied to this query: " "Enumeration is too large, use PRAGMA MaxDPHypDPTableSize='4294967295' to disable the limitation" ) ); @@ -368,7 +368,11 @@ TExprBase DqOptimizeEquiJoinWithCosts( // of the EquiJoin and n-1 argument are the parameters to EquiJoin if (!DqCollectJoinRelationsWithStats(rels, typesCtx, equiJoin, providerCollect)){ - ctx.AddWarning(YqlIssue({}, TIssuesIds::DQ_OPTIMIZE_ERROR, "Cost Based Optimizer didn't work: couldn't load statistics")); + ctx.AddWarning( + YqlIssue({}, TIssuesIds::DQ_OPTIMIZE_ERROR, + "Cost Based Optimizer could not be applied to this query: couldn't load statistics" + ) + ); return node; } From fc24ae040ef0369f1cb8c408d2026a1cec208513 Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Tue, 26 Nov 2024 21:32:08 +0000 Subject: [PATCH 09/12] fix --- ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp | 4 ++-- ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp | 6 +++--- ydb/library/yql/dq/opt/dq_opt_join_cost_based.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp index 23f15a0c69dd..a7db002bc1d4 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp @@ -9,8 +9,8 @@ namespace NYql::NDq { namespace { class TDqOptimizerFactory : public IOptimizerFactory { public: - virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerNative(IProviderContext& pctx, TExprContext&, const TNativeSettings& settings) const override { - return IOptimizerNew::TPtr(MakeNativeOptimizerNew(pctx, settings.MaxDPhypDPTableSize)); + virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerNative(IProviderContext& pctx, TExprContext& ectx, const TNativeSettings& settings) const override { + return IOptimizerNew::TPtr(MakeNativeOptimizerNew(pctx, settings.MaxDPhypDPTableSize, &ectx)); } virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerPG(IProviderContext& pctx, TExprContext& ctx, const TPGSettings& settings) const override { diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index 7826d05365a3..761ad6aa089b 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -235,7 +235,7 @@ void ComputeStatistics(const std::shared_ptr& join, IProvide class TOptimizerNativeNew: public IOptimizerNew { public: - TOptimizerNativeNew(IProviderContext& ctx, ui32 maxDPhypDPTableSize, TExprContext* exprCtx = nullptr) + TOptimizerNativeNew(IProviderContext& ctx, ui32 maxDPhypDPTableSize, TExprContext* exprCtx) : IOptimizerNew(ctx) , MaxDPHypTableSize_(maxDPhypDPTableSize) , ExprCtx(exprCtx) @@ -318,8 +318,8 @@ class TOptimizerNativeNew: public IOptimizerNew { TExprContext* ExprCtx; }; -IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPhypDPTableSize, TExprContext* exprCtx) { - return new TOptimizerNativeNew(ctx, maxDPhypDPTableSize, exprCtx); +IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& pctx, const ui32 maxDPhypDPTableSize, TExprContext* ectx) { + return new TOptimizerNativeNew(pctx, maxDPhypDPTableSize, ectx); } TExprBase DqOptimizeEquiJoinWithCosts( diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h index 36422f41d897..ac9b077f65dc 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h @@ -38,6 +38,6 @@ NYql::NNodes::TExprBase DqOptimizeEquiJoinWithCosts( const TOptimizerHints& hints = {} ); -IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPccpDPTableSize); +IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPHypDPTableSize, TExprContext* ectx = nullptr); } // namespace NYql::NDq From 192cf78688139e42d942c5dd49978d5a339ac11a Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Wed, 27 Nov 2024 10:25:46 +0000 Subject: [PATCH 10/12] fix --- ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp b/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp index c3962a3b7a40..b7de9b596df8 100644 --- a/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp +++ b/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp @@ -3610,7 +3610,7 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda execSettings) .ExtractValueSync(); UNIT_ASSERT(result.IsSuccess()); - UNIT_ASSERT(result.GetIssues().Empty()); + // UNIT_ASSERT(result.GetIssues().Empty()); UNIT_ASSERT_VALUES_EQUAL(NYdb::FormatResultSetYson(result.GetResultSet(0)), "[[[\"Table1Primary4\"];[4]];[[\"Table1Primary3\"];[3]]]"); @@ -3655,7 +3655,7 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda execSettings) .ExtractValueSync(); UNIT_ASSERT(result.IsSuccess()); - UNIT_ASSERT(result.GetIssues().Empty()); + // UNIT_ASSERT(result.GetIssues().Empty()); UNIT_ASSERT_VALUES_EQUAL(NYdb::FormatResultSetYson(result.GetResultSet(0)), "[[[\"Table1Primary3\"]];[[\"Table1Primary4\"]];[[\"Table1Primary55\"]]]"); @@ -3699,7 +3699,7 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda execSettings) .ExtractValueSync(); UNIT_ASSERT(result.IsSuccess()); - UNIT_ASSERT(result.GetIssues().Empty()); + // UNIT_ASSERT(result.GetIssues().Empty()); UNIT_ASSERT_VALUES_EQUAL(NYdb::FormatResultSetYson(result.GetResultSet(0)), "[[[\"Table1Primary55\"];[55]];[[\"Table1Primary4\"];[4]];[[\"Table1Primary3\"];[3]]]"); @@ -3856,7 +3856,7 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda execSettings) .ExtractValueSync(); UNIT_ASSERT(result.IsSuccess()); - UNIT_ASSERT(result.GetIssues().Empty()); + // UNIT_ASSERT(result.GetIssues().Empty()); UNIT_ASSERT_VALUES_EQUAL(NYdb::FormatResultSetYson(result.GetResultSet(0)), "[[[\"Table1Primary3\"];[\"cc\"]];[[\"Table1Primary4\"];[\"dd\"]];[[\"Table1Primary55\"];#]]"); From 7859dce4a077a88c04096be15556f016a60f554d Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Thu, 28 Nov 2024 15:11:58 +0000 Subject: [PATCH 11/12] fix --- ydb/core/kqp/opt/logical/kqp_opt_log.cpp | 3 +- ydb/library/yql/dq/opt/dq_cbo_ut.cpp | 12 +- .../yql/dq/opt/dq_opt_hypergraph_ut.cpp | 3 +- .../yql/dq/opt/dq_opt_join_cbo_factory.cpp | 2 +- .../yql/dq/opt/dq_opt_join_cost_based.cpp | 22 +- .../yql/dq/opt/dq_opt_join_cost_based.h | 2 +- ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp | 267 ++++++++++++++++++ 7 files changed, 291 insertions(+), 20 deletions(-) create mode 100644 ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index b363a335d8ea..2161ae5b40cd 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -164,7 +164,8 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { auto maxDPhypDPTableSize = Config->MaxDPHypDPTableSize.Get().GetOrElse(TDqSettings::TDefault::MaxDPHypDPTableSize); auto optLevel = Config->CostBasedOptimizationLevel.Get().GetOrElse(Config->DefaultCostBasedOptimizationLevel); auto providerCtx = TKqpProviderContext(KqpCtx, optLevel); - auto opt = std::unique_ptr(MakeNativeOptimizerNew(providerCtx, maxDPhypDPTableSize)); + TExprContext dummyCtx; + auto opt = std::unique_ptr(MakeNativeOptimizerNew(providerCtx, maxDPhypDPTableSize, dummyCtx)); TExprBase output = DqOptimizeEquiJoinWithCosts(node, ctx, TypesCtx, optLevel, *opt, [](auto& rels, auto label, auto node, auto stat) { rels.emplace_back(std::make_shared(TString(label), *stat, node)); diff --git a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp index c57e16e4fb0c..4054716b2774 100644 --- a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp +++ b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp @@ -33,12 +33,14 @@ Y_UNIT_TEST_SUITE(DQCBO) { Y_UNIT_TEST(Empty) { TBaseProviderContext pctx; - std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000)); + TExprContext dummyCtx; + std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); } Y_UNIT_TEST(JoinSearch2Rels) { TBaseProviderContext pctx; - std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000)); + TExprContext dummyCtx; + std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); auto rel1 = std::make_shared( "a", @@ -80,7 +82,8 @@ Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: Y_UNIT_TEST(JoinSearch3Rels) { TBaseProviderContext pctx; - std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000)); + TExprContext dummyCtx; + std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); auto rel1 = std::make_shared("a", TOptimizerStatistics(BaseTable, 100000, 1, 0, 1000000)); @@ -243,7 +246,8 @@ Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsNative) { TExprContext ctx; TBaseProviderContext pctx; std::function optFactory = [&]() { - return MakeNativeOptimizerNew(pctx, 100000); + TExprContext dummyCtx; + return MakeNativeOptimizerNew(pctx, 100000, dummyCtx); }; _DqOptimizeEquiJoinWithCosts(optFactory, ctx); } diff --git a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp index 98012b1f539f..78ca024e321b 100644 --- a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp @@ -51,8 +51,9 @@ struct TTestContext : public TBaseProviderContext { template std::shared_ptr Enumerate(const std::shared_ptr& root, const TOptimizerHints& hints = {}) { auto ctx = TProviderContext(); + TExprContext dummyCtx; auto optimizer = - std::unique_ptr(MakeNativeOptimizerNew(ctx, std::numeric_limits::max())); + std::unique_ptr(MakeNativeOptimizerNew(ctx, std::numeric_limits::max(), dummyCtx)); Y_ENSURE(root->Kind == EOptimizerNodeKind::JoinNodeType); auto res = optimizer->JoinSearch(std::static_pointer_cast(root), hints); diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp index a7db002bc1d4..0442d62cba16 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp @@ -10,7 +10,7 @@ namespace { class TDqOptimizerFactory : public IOptimizerFactory { public: virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerNative(IProviderContext& pctx, TExprContext& ectx, const TNativeSettings& settings) const override { - return IOptimizerNew::TPtr(MakeNativeOptimizerNew(pctx, settings.MaxDPhypDPTableSize, &ectx)); + return IOptimizerNew::TPtr(MakeNativeOptimizerNew(pctx, settings.MaxDPhypDPTableSize, ectx)); } virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerPG(IProviderContext& pctx, TExprContext& ctx, const TPGSettings& settings) const override { diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index 761ad6aa089b..72df66ef4423 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -235,7 +235,7 @@ void ComputeStatistics(const std::shared_ptr& join, IProvide class TOptimizerNativeNew: public IOptimizerNew { public: - TOptimizerNativeNew(IProviderContext& ctx, ui32 maxDPhypDPTableSize, TExprContext* exprCtx) + TOptimizerNativeNew(IProviderContext& ctx, ui32 maxDPhypDPTableSize, TExprContext& exprCtx) : IOptimizerNew(ctx) , MaxDPHypTableSize_(maxDPhypDPTableSize) , ExprCtx(exprCtx) @@ -275,15 +275,13 @@ class TOptimizerNativeNew: public IOptimizerNew { if (solver.CountCC(MaxDPHypTableSize_) >= MaxDPHypTableSize_) { YQL_CLOG(TRACE, CoreDq) << "Maximum DPhyp threshold exceeded"; - if (ExprCtx) { - ExprCtx->AddWarning( - YqlIssue( - {}, TIssuesIds::DQ_OPTIMIZE_ERROR, - "Cost Based Optimizer could not be applied to this query: " - "Enumeration is too large, use PRAGMA MaxDPHypDPTableSize='4294967295' to disable the limitation" - ) - ); - } + ExprCtx.AddWarning( + YqlIssue( + {}, TIssuesIds::DQ_OPTIMIZE_ERROR, + "Cost Based Optimizer could not be applied to this query: " + "Enumeration is too large, use PRAGMA MaxDPHypDPTableSize='4294967295' to disable the limitation" + ) + ); ComputeStatistics(joinTree, this->Pctx); return joinTree; } @@ -315,10 +313,10 @@ class TOptimizerNativeNew: public IOptimizerNew { private: ui32 MaxDPHypTableSize_; - TExprContext* ExprCtx; + TExprContext& ExprCtx; }; -IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& pctx, const ui32 maxDPhypDPTableSize, TExprContext* ectx) { +IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& pctx, const ui32 maxDPhypDPTableSize, TExprContext& ectx) { return new TOptimizerNativeNew(pctx, maxDPhypDPTableSize, ectx); } diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h index ac9b077f65dc..581b3135b040 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h @@ -38,6 +38,6 @@ NYql::NNodes::TExprBase DqOptimizeEquiJoinWithCosts( const TOptimizerHints& hints = {} ); -IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPHypDPTableSize, TExprContext* ectx = nullptr); +IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPHypDPTableSize, TExprContext& ectx); } // namespace NYql::NDq diff --git a/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp b/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp new file mode 100644 index 000000000000..b1e73d9562b1 --- /dev/null +++ b/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp @@ -0,0 +1,267 @@ +#include +#include +#include +#include +#include + +#include +#include + +using namespace NYql; +using namespace NNodes; +using namespace NYql::NDq; + +namespace { + +TExprNode::TPtr MakeLabel(TExprContext& ctx, const std::vector& vars) { + TVector label; label.reserve(vars.size()); + + auto pos = ctx.AppendPosition({}); + for (auto var : vars) { + label.emplace_back(ctx.NewAtom(pos, var)); + } + + return Build(ctx, pos) + .Add(label) + .Done() + .Ptr(); +} + +} // namespace + +Y_UNIT_TEST_SUITE(DQCBO) { + +Y_UNIT_TEST(Empty) { + TBaseProviderContext pctx; + TExprContext dummyCtx; + std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); +} + +Y_UNIT_TEST(JoinSearch2Rels) { + TBaseProviderContext pctx; + TExprContext dummyCtx; + std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); + + auto rel1 = std::make_shared( + "a", + TOptimizerStatistics(BaseTable, 100000, 1, 0, 1000000) + ); + auto rel2 = std::make_shared( + "b", + TOptimizerStatistics(BaseTable, 1000000, 1, 0, 9000009) + ); + + TVector leftKeys = {NDq::TJoinColumn("a", "1")}; + TVector rightKeys ={NDq::TJoinColumn("b", "1")}; + + auto op = std::make_shared( + std::static_pointer_cast(rel1), + std::static_pointer_cast(rel2), + leftKeys, + rightKeys, + InnerJoin, + EJoinAlgoType::GraceJoin, + true, + false + ); + + auto res = optimizer->JoinSearch(op); + std::stringstream ss; + res->Print(ss); + Cout << ss.str() << '\n'; + TString expected = R"__(Join: (InnerJoin,MapJoin,RightAny) b.1=a.1, +Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: 1, Storage: NA + Rel: b + Type: BaseTable, Nrows: 1e+06, Ncols: 1, ByteSize: 0, Cost: 9.00001e+06, Sel: 1, Storage: NA + Rel: a + Type: BaseTable, Nrows: 100000, Ncols: 1, ByteSize: 0, Cost: 1e+06, Sel: 1, Storage: NA +)__"; + + UNIT_ASSERT_STRINGS_EQUAL(expected, ss.str()); +} + +Y_UNIT_TEST(JoinSearch3Rels) { + TBaseProviderContext pctx; + TExprContext dummyCtx; + std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); + + auto rel1 = std::make_shared("a", + TOptimizerStatistics(BaseTable, 100000, 1, 0, 1000000)); + auto rel2 = std::make_shared("b", + TOptimizerStatistics(BaseTable, 1000000, 1, 0, 9000009)); + auto rel3 = std::make_shared("c", + TOptimizerStatistics(BaseTable, 10000, 1, 0, 9009)); + + TVector leftKeys = {NDq::TJoinColumn("a", "1")}; + TVector rightKeys ={NDq::TJoinColumn("b", "1")}; + + auto op1 = std::make_shared( + std::static_pointer_cast(rel1), + std::static_pointer_cast(rel2), + leftKeys, + rightKeys, + InnerJoin, + EJoinAlgoType::GraceJoin, + false, + false + ); + + leftKeys.push_back(NDq::TJoinColumn("a", "1")); + rightKeys.push_back(NDq::TJoinColumn("c", "1")); + + auto op2 = std::make_shared( + std::static_pointer_cast(op1), + std::static_pointer_cast(rel3), + leftKeys, + rightKeys, + InnerJoin, + EJoinAlgoType::GraceJoin, + true, + false + ); + + auto res = optimizer->JoinSearch(op2); + std::stringstream ss; + res->Print(ss); + Cout << ss.str() << '\n'; + + TString expected = R"__(Join: (InnerJoin,MapJoin,LeftAny) a.1=b.1, +Type: ManyManyJoin, Nrows: 4e+13, Ncols: 3, ByteSize: 0, Cost: 4.004e+13, Sel: 1, Storage: NA + Join: (InnerJoin,MapJoin) b.1=a.1, + Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: 1, Storage: NA + Rel: b + Type: BaseTable, Nrows: 1e+06, Ncols: 1, ByteSize: 0, Cost: 9.00001e+06, Sel: 1, Storage: NA + Rel: a + Type: BaseTable, Nrows: 100000, Ncols: 1, ByteSize: 0, Cost: 1e+06, Sel: 1, Storage: NA + Rel: c + Type: BaseTable, Nrows: 10000, Ncols: 1, ByteSize: 0, Cost: 9009, Sel: 1, Storage: NA +)__"; + + UNIT_ASSERT_STRINGS_EQUAL(expected, ss.str()); +} + +Y_UNIT_TEST(RelCollector) { + TExprContext ctx; + auto pos = ctx.AppendPosition({}); + TVector joinArgs; + TVector tables; + tables.emplace_back(Build(ctx, pos).List(Build(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "orders")).Done()); + tables.emplace_back(Build(ctx, pos).List(Build(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "customer")).Done()); + tables.emplace_back(Build(ctx, pos).List(Build(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "nation")).Done()); + + auto joinTree = Build(ctx, pos).Done().Ptr(); + auto settings = Build(ctx, pos).Done().Ptr(); + + joinArgs.insert(joinArgs.end(), tables.begin(), tables.end()); + joinArgs.emplace_back(joinTree); + joinArgs.emplace_back(settings); + + TCoEquiJoin equiJoin = Build(ctx, pos) + .Add(joinArgs) + .Done(); + + TTypeAnnotationContext typeCtx; + TVector> rels; + UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto, auto, auto) {}) == false); + + typeCtx.SetStats(tables[1].Ptr()->Child(0), std::make_shared(BaseTable, 1, 1, 1)); + UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto, auto, auto) {}) == false); + + typeCtx.SetStats(tables[0].Ptr()->Child(0), std::make_shared(BaseTable, 1, 1, 1)); + typeCtx.SetStats(tables[2].Ptr()->Child(0), std::make_shared(BaseTable, 1, 1, 1)); + + TVector labels; + UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto label, auto, auto) { labels.emplace_back(label); }) == true); + UNIT_ASSERT(labels.size() == 3); + UNIT_ASSERT_STRINGS_EQUAL(labels[0], "orders"); + UNIT_ASSERT_STRINGS_EQUAL(labels[1], "customer"); + UNIT_ASSERT_STRINGS_EQUAL(labels[2], "nation"); +} + +Y_UNIT_TEST(RelCollectorBrokenEquiJoin) { + TExprContext ctx; + auto pos = ctx.AppendPosition({}); + TVector joinArgs; + auto joinTree = Build(ctx, pos).Done().Ptr(); + auto settings = Build(ctx, pos).Done().Ptr(); + TCoEquiJoin equiJoin = Build(ctx, pos) + .Add(joinArgs) + .Done(); + + TTypeAnnotationContext typeCtx; + TVector> rels; + UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto, auto, auto) {}) == false); +} + +void _DqOptimizeEquiJoinWithCosts(const std::function& optFactory, TExprContext& ctx) { + TTypeAnnotationContext typeCtx; + auto pos = ctx.AppendPosition({}); + TVector joinArgs; + TVector tables; + tables.emplace_back(Build(ctx, pos).List(Build(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "orders")).Done()); + tables.emplace_back(Build(ctx, pos).List(Build(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "customer")).Done()); + + auto settings = Build(ctx, pos).Done().Ptr(); + + auto joinTree = Build(ctx, pos) + .Type(ctx.NewAtom(pos, "Inner")) + .LeftScope(ctx.NewAtom(pos, "orders")) + .RightScope(ctx.NewAtom(pos, "customer")) + .LeftKeys(MakeLabel(ctx, {"orders", "a"})) + .RightKeys(MakeLabel(ctx, {"customer", "b"})) + .Options(settings) + .Done().Ptr(); + + joinArgs.insert(joinArgs.end(), tables.begin(), tables.end()); + joinArgs.emplace_back(joinTree); + joinArgs.emplace_back(settings); + + typeCtx.SetStats(tables[0].Ptr()->Child(0), std::make_shared(BaseTable, 1, 1, 1)); + typeCtx.SetStats(tables[1].Ptr()->Child(0), std::make_shared(BaseTable, 1, 1, 1)); + + TCoEquiJoin equiJoin = Build(ctx, pos) + .Add(joinArgs) + .Done(); + + auto opt = std::unique_ptr(optFactory()); + std::function>&, TStringBuf, const TExprNode::TPtr, const std::shared_ptr&)> providerCollect = [](auto& rels, auto label, auto node, auto stats) { + Y_UNUSED(node); + auto rel = std::make_shared(TString(label), *stats); + rels.push_back(rel); + }; + auto res = DqOptimizeEquiJoinWithCosts(equiJoin, ctx, typeCtx, 2, *opt, providerCollect); + UNIT_ASSERT(equiJoin.Ptr() != res.Ptr()); + UNIT_ASSERT(equiJoin.Ptr()->ChildrenSize() == res.Ptr()->ChildrenSize()); + UNIT_ASSERT(equiJoin.Maybe()); + auto resStr = NCommon::ExprToPrettyString(ctx, *res.Ptr()); + auto expected = R"__(( +(let $1 '('"Inner" '"orders" '"customer" '('"orders" '"a") '('"customer" '"b") '('('join_algo 'MapJoin)))) +(return (EquiJoin '('() '"orders") '('() '"customer") $1 '())) +) +)__"; + UNIT_ASSERT_STRINGS_EQUAL(expected, resStr); +} + +Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsNative) { + TExprContext ctx; + TBaseProviderContext pctx; + std::function optFactory = [&]() { + TExprContext dummyCtx; + return MakeNativeOptimizerNew(pctx, 100000, dummyCtx); + }; + _DqOptimizeEquiJoinWithCosts(optFactory, ctx); +} + +Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsPG) { + TExprContext ctx; + TBaseProviderContext pctx; + std::function log = [&](auto str) { + Cerr << str; + }; + std::function optFactory = [&]() { + return MakePgOptimizerNew(pctx, ctx, log); + }; + _DqOptimizeEquiJoinWithCosts(optFactory, ctx); +} + +} // DQCBO From 9147fb403a9963bc3481f446b89ca80338c701fb Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Thu, 28 Nov 2024 15:38:09 +0000 Subject: [PATCH 12/12] fix --- ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp | 267 ------------------------ 1 file changed, 267 deletions(-) delete mode 100644 ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp diff --git a/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp b/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp deleted file mode 100644 index b1e73d9562b1..000000000000 --- a/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp +++ /dev/null @@ -1,267 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include - -using namespace NYql; -using namespace NNodes; -using namespace NYql::NDq; - -namespace { - -TExprNode::TPtr MakeLabel(TExprContext& ctx, const std::vector& vars) { - TVector label; label.reserve(vars.size()); - - auto pos = ctx.AppendPosition({}); - for (auto var : vars) { - label.emplace_back(ctx.NewAtom(pos, var)); - } - - return Build(ctx, pos) - .Add(label) - .Done() - .Ptr(); -} - -} // namespace - -Y_UNIT_TEST_SUITE(DQCBO) { - -Y_UNIT_TEST(Empty) { - TBaseProviderContext pctx; - TExprContext dummyCtx; - std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); -} - -Y_UNIT_TEST(JoinSearch2Rels) { - TBaseProviderContext pctx; - TExprContext dummyCtx; - std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); - - auto rel1 = std::make_shared( - "a", - TOptimizerStatistics(BaseTable, 100000, 1, 0, 1000000) - ); - auto rel2 = std::make_shared( - "b", - TOptimizerStatistics(BaseTable, 1000000, 1, 0, 9000009) - ); - - TVector leftKeys = {NDq::TJoinColumn("a", "1")}; - TVector rightKeys ={NDq::TJoinColumn("b", "1")}; - - auto op = std::make_shared( - std::static_pointer_cast(rel1), - std::static_pointer_cast(rel2), - leftKeys, - rightKeys, - InnerJoin, - EJoinAlgoType::GraceJoin, - true, - false - ); - - auto res = optimizer->JoinSearch(op); - std::stringstream ss; - res->Print(ss); - Cout << ss.str() << '\n'; - TString expected = R"__(Join: (InnerJoin,MapJoin,RightAny) b.1=a.1, -Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: 1, Storage: NA - Rel: b - Type: BaseTable, Nrows: 1e+06, Ncols: 1, ByteSize: 0, Cost: 9.00001e+06, Sel: 1, Storage: NA - Rel: a - Type: BaseTable, Nrows: 100000, Ncols: 1, ByteSize: 0, Cost: 1e+06, Sel: 1, Storage: NA -)__"; - - UNIT_ASSERT_STRINGS_EQUAL(expected, ss.str()); -} - -Y_UNIT_TEST(JoinSearch3Rels) { - TBaseProviderContext pctx; - TExprContext dummyCtx; - std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); - - auto rel1 = std::make_shared("a", - TOptimizerStatistics(BaseTable, 100000, 1, 0, 1000000)); - auto rel2 = std::make_shared("b", - TOptimizerStatistics(BaseTable, 1000000, 1, 0, 9000009)); - auto rel3 = std::make_shared("c", - TOptimizerStatistics(BaseTable, 10000, 1, 0, 9009)); - - TVector leftKeys = {NDq::TJoinColumn("a", "1")}; - TVector rightKeys ={NDq::TJoinColumn("b", "1")}; - - auto op1 = std::make_shared( - std::static_pointer_cast(rel1), - std::static_pointer_cast(rel2), - leftKeys, - rightKeys, - InnerJoin, - EJoinAlgoType::GraceJoin, - false, - false - ); - - leftKeys.push_back(NDq::TJoinColumn("a", "1")); - rightKeys.push_back(NDq::TJoinColumn("c", "1")); - - auto op2 = std::make_shared( - std::static_pointer_cast(op1), - std::static_pointer_cast(rel3), - leftKeys, - rightKeys, - InnerJoin, - EJoinAlgoType::GraceJoin, - true, - false - ); - - auto res = optimizer->JoinSearch(op2); - std::stringstream ss; - res->Print(ss); - Cout << ss.str() << '\n'; - - TString expected = R"__(Join: (InnerJoin,MapJoin,LeftAny) a.1=b.1, -Type: ManyManyJoin, Nrows: 4e+13, Ncols: 3, ByteSize: 0, Cost: 4.004e+13, Sel: 1, Storage: NA - Join: (InnerJoin,MapJoin) b.1=a.1, - Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: 1, Storage: NA - Rel: b - Type: BaseTable, Nrows: 1e+06, Ncols: 1, ByteSize: 0, Cost: 9.00001e+06, Sel: 1, Storage: NA - Rel: a - Type: BaseTable, Nrows: 100000, Ncols: 1, ByteSize: 0, Cost: 1e+06, Sel: 1, Storage: NA - Rel: c - Type: BaseTable, Nrows: 10000, Ncols: 1, ByteSize: 0, Cost: 9009, Sel: 1, Storage: NA -)__"; - - UNIT_ASSERT_STRINGS_EQUAL(expected, ss.str()); -} - -Y_UNIT_TEST(RelCollector) { - TExprContext ctx; - auto pos = ctx.AppendPosition({}); - TVector joinArgs; - TVector tables; - tables.emplace_back(Build(ctx, pos).List(Build(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "orders")).Done()); - tables.emplace_back(Build(ctx, pos).List(Build(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "customer")).Done()); - tables.emplace_back(Build(ctx, pos).List(Build(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "nation")).Done()); - - auto joinTree = Build(ctx, pos).Done().Ptr(); - auto settings = Build(ctx, pos).Done().Ptr(); - - joinArgs.insert(joinArgs.end(), tables.begin(), tables.end()); - joinArgs.emplace_back(joinTree); - joinArgs.emplace_back(settings); - - TCoEquiJoin equiJoin = Build(ctx, pos) - .Add(joinArgs) - .Done(); - - TTypeAnnotationContext typeCtx; - TVector> rels; - UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto, auto, auto) {}) == false); - - typeCtx.SetStats(tables[1].Ptr()->Child(0), std::make_shared(BaseTable, 1, 1, 1)); - UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto, auto, auto) {}) == false); - - typeCtx.SetStats(tables[0].Ptr()->Child(0), std::make_shared(BaseTable, 1, 1, 1)); - typeCtx.SetStats(tables[2].Ptr()->Child(0), std::make_shared(BaseTable, 1, 1, 1)); - - TVector labels; - UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto label, auto, auto) { labels.emplace_back(label); }) == true); - UNIT_ASSERT(labels.size() == 3); - UNIT_ASSERT_STRINGS_EQUAL(labels[0], "orders"); - UNIT_ASSERT_STRINGS_EQUAL(labels[1], "customer"); - UNIT_ASSERT_STRINGS_EQUAL(labels[2], "nation"); -} - -Y_UNIT_TEST(RelCollectorBrokenEquiJoin) { - TExprContext ctx; - auto pos = ctx.AppendPosition({}); - TVector joinArgs; - auto joinTree = Build(ctx, pos).Done().Ptr(); - auto settings = Build(ctx, pos).Done().Ptr(); - TCoEquiJoin equiJoin = Build(ctx, pos) - .Add(joinArgs) - .Done(); - - TTypeAnnotationContext typeCtx; - TVector> rels; - UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto, auto, auto) {}) == false); -} - -void _DqOptimizeEquiJoinWithCosts(const std::function& optFactory, TExprContext& ctx) { - TTypeAnnotationContext typeCtx; - auto pos = ctx.AppendPosition({}); - TVector joinArgs; - TVector tables; - tables.emplace_back(Build(ctx, pos).List(Build(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "orders")).Done()); - tables.emplace_back(Build(ctx, pos).List(Build(ctx, pos).Done().Ptr()).Scope(ctx.NewAtom(pos, "customer")).Done()); - - auto settings = Build(ctx, pos).Done().Ptr(); - - auto joinTree = Build(ctx, pos) - .Type(ctx.NewAtom(pos, "Inner")) - .LeftScope(ctx.NewAtom(pos, "orders")) - .RightScope(ctx.NewAtom(pos, "customer")) - .LeftKeys(MakeLabel(ctx, {"orders", "a"})) - .RightKeys(MakeLabel(ctx, {"customer", "b"})) - .Options(settings) - .Done().Ptr(); - - joinArgs.insert(joinArgs.end(), tables.begin(), tables.end()); - joinArgs.emplace_back(joinTree); - joinArgs.emplace_back(settings); - - typeCtx.SetStats(tables[0].Ptr()->Child(0), std::make_shared(BaseTable, 1, 1, 1)); - typeCtx.SetStats(tables[1].Ptr()->Child(0), std::make_shared(BaseTable, 1, 1, 1)); - - TCoEquiJoin equiJoin = Build(ctx, pos) - .Add(joinArgs) - .Done(); - - auto opt = std::unique_ptr(optFactory()); - std::function>&, TStringBuf, const TExprNode::TPtr, const std::shared_ptr&)> providerCollect = [](auto& rels, auto label, auto node, auto stats) { - Y_UNUSED(node); - auto rel = std::make_shared(TString(label), *stats); - rels.push_back(rel); - }; - auto res = DqOptimizeEquiJoinWithCosts(equiJoin, ctx, typeCtx, 2, *opt, providerCollect); - UNIT_ASSERT(equiJoin.Ptr() != res.Ptr()); - UNIT_ASSERT(equiJoin.Ptr()->ChildrenSize() == res.Ptr()->ChildrenSize()); - UNIT_ASSERT(equiJoin.Maybe()); - auto resStr = NCommon::ExprToPrettyString(ctx, *res.Ptr()); - auto expected = R"__(( -(let $1 '('"Inner" '"orders" '"customer" '('"orders" '"a") '('"customer" '"b") '('('join_algo 'MapJoin)))) -(return (EquiJoin '('() '"orders") '('() '"customer") $1 '())) -) -)__"; - UNIT_ASSERT_STRINGS_EQUAL(expected, resStr); -} - -Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsNative) { - TExprContext ctx; - TBaseProviderContext pctx; - std::function optFactory = [&]() { - TExprContext dummyCtx; - return MakeNativeOptimizerNew(pctx, 100000, dummyCtx); - }; - _DqOptimizeEquiJoinWithCosts(optFactory, ctx); -} - -Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsPG) { - TExprContext ctx; - TBaseProviderContext pctx; - std::function log = [&](auto str) { - Cerr << str; - }; - std::function optFactory = [&]() { - return MakePgOptimizerNew(pctx, ctx, log); - }; - _DqOptimizeEquiJoinWithCosts(optFactory, ctx); -} - -} // DQCBO