From c25f7a7160466875262b1910e35300604bee7049 Mon Sep 17 00:00:00 2001 From: Mikhail Surin Date: Fri, 18 Oct 2024 13:44:49 +0300 Subject: [PATCH] Prefer indices aligned with order-by-limit (#10589) --- ydb/core/kqp/opt/kqp_opt.cpp | 43 +++++++++++++++++++ ydb/core/kqp/opt/kqp_opt_impl.h | 3 ++ .../logical/kqp_opt_log_ranges_predext.cpp | 32 ++++++++++++-- .../kqp/opt/physical/kqp_opt_phy_helpers.cpp | 43 ------------------- ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h | 3 -- ydb/core/kqp/ut/opt/kqp_ne_ut.cpp | 23 ++++++++++ 6 files changed, 97 insertions(+), 50 deletions(-) diff --git a/ydb/core/kqp/opt/kqp_opt.cpp b/ydb/core/kqp/opt/kqp_opt.cpp index bd0797040f6c..8af6ac69ad4b 100644 --- a/ydb/core/kqp/opt/kqp_opt.cpp +++ b/ydb/core/kqp/opt/kqp_opt.cpp @@ -110,6 +110,49 @@ TKqpTable BuildTableMeta(const TKikimrTableDescription& tableDesc, const TPositi return BuildTableMeta(*tableDesc.Metadata, pos, ctx); } +bool IsSortKeyPrimary(const NYql::NNodes::TCoLambda& keySelector, const NYql::TKikimrTableDescription& tableDesc, + const TMaybe>& passthroughFields) +{ + auto checkKey = [keySelector, &tableDesc, &passthroughFields] (NYql::NNodes::TExprBase key, ui32 index) { + if (!key.Maybe()) { + return false; + } + + auto member = key.Cast(); + if (member.Struct().Raw() != keySelector.Args().Arg(0).Raw()) { + return false; + } + + auto column = TString(member.Name().Value()); + auto columnIndex = tableDesc.GetKeyColumnIndex(column); + if (!columnIndex || *columnIndex != index) { + return false; + } + + if (passthroughFields && !passthroughFields->contains(column)) { + return false; + } + + return true; + }; + + auto lambdaBody = keySelector.Body(); + if (auto maybeTuple = lambdaBody.Maybe()) { + auto tuple = maybeTuple.Cast(); + for (size_t i = 0; i < tuple.Size(); ++i) { + if (!checkKey(tuple.Item(i), i)) { + return false; + } + } + } else { + if (!checkKey(lambdaBody, 0)) { + return false; + } + } + + return true; +} + bool IsBuiltEffect(const TExprBase& effect) { // Stage with effect output if (effect.Maybe()) { diff --git a/ydb/core/kqp/opt/kqp_opt_impl.h b/ydb/core/kqp/opt/kqp_opt_impl.h index 8370b0994b9b..e85efabe2927 100644 --- a/ydb/core/kqp/opt/kqp_opt_impl.h +++ b/ydb/core/kqp/opt/kqp_opt_impl.h @@ -62,4 +62,7 @@ TVector> BuildS bool IsBuiltEffect(const NYql::NNodes::TExprBase& effect); +bool IsSortKeyPrimary(const NYql::NNodes::TCoLambda& keySelector, const NYql::TKikimrTableDescription& tableDesc, + const TMaybe>& passthroughFields = {}); + } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp index bc5316f15acd..29722d23a386 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp @@ -177,6 +177,22 @@ TMaybeNode TryBuildTrivialReadTable(TCoFlatMap& flatmap, TKqlReadTabl .Done(); } +TMaybeNode ExtractTopSortKeySelector(TExprBase node, const NYql::TParentsMap& parentsMap) { + auto it = parentsMap.find(node.Raw()); + if (it != parentsMap.end()) { + if (it->second.size() != 1) { + return {}; + } + for (auto* node : it->second) { + if (TCoTopSort::Match(node)) { + TCoTopSort topSort(node); + return topSort.KeySelectorLambda(); + } + } + } + return {}; +} + } // namespace TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, @@ -269,7 +285,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx YQL_ENSURE(prepareSuccess); if (!indexName.IsValid() && !readSettings.ForcePrimary && kqpCtx.Config->IndexAutoChooserMode != NKikimrConfig::TTableServiceConfig_EIndexAutoChooseMode_DISABLED) { - using TIndexComparisonKey = std::tuple; + using TIndexComparisonKey = std::tuple; auto calcNeedsJoin = [&] (const TKikimrTableMetadataPtr& keyTable) -> bool { bool needsJoin = false; for (auto&& column : read.Columns()) { @@ -280,8 +296,16 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx return needsJoin; }; - auto calcKey = [&](NYql::IPredicateRangeExtractor::TBuildResult buildResult, size_t descriptionKeyColumns, bool needsJoin) -> TIndexComparisonKey { + auto keySelector = ExtractTopSortKeySelector(flatmap, parentsMap); + + auto calcKey = [&]( + NYql::IPredicateRangeExtractor::TBuildResult buildResult, + size_t descriptionKeyColumns, + bool needsJoin, + const NYql::TKikimrTableDescription & tableDesc) -> TIndexComparisonKey + { return std::make_tuple( + keySelector.IsValid() && IsSortKeyPrimary(keySelector.Cast(), tableDesc), buildResult.PointPrefixLen >= descriptionKeyColumns, buildResult.PointPrefixLen >= descriptionKeyColumns ? 0 : buildResult.PointPrefixLen, buildResult.UsedPrefixLen >= descriptionKeyColumns, @@ -293,7 +317,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx auto primaryBuildResult = extractor->BuildComputeNode(mainTableDesc.Metadata->KeyColumnNames, ctx, typesCtx); if (primaryBuildResult.PointPrefixLen < mainTableDesc.Metadata->KeyColumnNames.size()) { - auto maxKey = calcKey(primaryBuildResult, mainTableDesc.Metadata->KeyColumnNames.size(), false); + auto maxKey = calcKey(primaryBuildResult, mainTableDesc.Metadata->KeyColumnNames.size(), false, mainTableDesc); for (auto& index : mainTableDesc.Metadata->Indexes) { if (index.Type != TIndexDescription::EType::GlobalAsync) { auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, mainTableDesc.Metadata->GetIndexMetadata(TString(index.Name)).first->Name); @@ -307,7 +331,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx continue; } - auto key = calcKey(buildResult, index.KeyColumns.size(), needsJoin); + auto key = calcKey(buildResult, index.KeyColumns.size(), needsJoin, tableDesc); if (key > maxKey) { maxKey = key; chosenIndex = index.Name; diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp index 18a439af0c21..298680c604bb 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp @@ -170,49 +170,6 @@ NYql::NNodes::TDqStage ReplaceTableSourceSettings(NYql::NNodes::TDqStage stage, .Done(); } -bool IsSortKeyPrimary(const NYql::NNodes::TCoLambda& keySelector, const NYql::TKikimrTableDescription& tableDesc, - const TMaybe>& passthroughFields) -{ - auto checkKey = [keySelector, &tableDesc, &passthroughFields] (NYql::NNodes::TExprBase key, ui32 index) { - if (!key.Maybe()) { - return false; - } - - auto member = key.Cast(); - if (member.Struct().Raw() != keySelector.Args().Arg(0).Raw()) { - return false; - } - - auto column = TString(member.Name().Value()); - auto columnIndex = tableDesc.GetKeyColumnIndex(column); - if (!columnIndex || *columnIndex != index) { - return false; - } - - if (passthroughFields && !passthroughFields->contains(column)) { - return false; - } - - return true; - }; - - auto lambdaBody = keySelector.Body(); - if (auto maybeTuple = lambdaBody.Maybe()) { - auto tuple = maybeTuple.Cast(); - for (size_t i = 0; i < tuple.Size(); ++i) { - if (!checkKey(tuple.Item(i), i)) { - return false; - } - } - } else { - if (!checkKey(lambdaBody, 0)) { - return false; - } - } - - return true; -} - ESortDirection GetSortDirection(const NYql::NNodes::TExprBase& sortDirections) { auto getDirection = [] (TExprBase expr) -> ESortDirection { if (!expr.Maybe()) { diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h b/ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h index 0e22dbac8e4f..846bf2d5cd69 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h @@ -28,9 +28,6 @@ NYql::NNodes::TDqStage ReplaceStageArg(NYql::NNodes::TDqStage stage, size_t inpu NYql::NNodes::TDqStage ReplaceTableSourceSettings(NYql::NNodes::TDqStage stage, size_t inputIndex, NYql::NNodes::TKqpReadRangesSourceSettings settings, NYql::TExprContext& ctx); -bool IsSortKeyPrimary(const NYql::NNodes::TCoLambda& keySelector, const NYql::TKikimrTableDescription& tableDesc, - const TMaybe>& passthroughFields = {}); - enum ESortDirection : ui32 { None = 0, Forward = 1, diff --git a/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp b/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp index 6a420ade9b08..cd478e6cdac0 100644 --- a/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp +++ b/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp @@ -4059,6 +4059,29 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { AssertTableReads(result, "/Root/SecondaryKeys/Index/indexImplTable", 1); } + Y_UNIT_TEST(AutoChooseIndexOrderByLimit) { + TKikimrSettings settings; + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetIndexAutoChooseMode(NKikimrConfig::TTableServiceConfig_EIndexAutoChooseMode_ONLY_POINTS); + settings.SetAppConfig(appConfig); + + TKikimrRunner kikimr(settings); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + CreateSampleTablesWithIndex(session); + + NYdb::NTable::TExecDataQuerySettings querySettings; + querySettings.CollectQueryStats(ECollectQueryStatsMode::Profile); + + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + SELECT Fk, Key FROM `/Root/SecondaryKeys` WHERE Fk = 1 ORDER BY Key DESC LIMIT 1; + )", TTxControl::BeginTx(TTxSettings::SerializableRW()), querySettings).GetValueSync(); + AssertSuccessResult(result); + AssertTableReads(result, "/Root/SecondaryKeys/Index/indexImplTable", 0); + } + Y_UNIT_TEST(MultipleBroadcastJoin) { TKikimrSettings kisettings; NKikimrConfig::TAppConfig appConfig;