From 7c74822988c542519ead15d75837dcab10226204 Mon Sep 17 00:00:00 2001 From: Anton Romanov Date: Sun, 24 Dec 2023 18:32:04 +0100 Subject: [PATCH 1/2] Fix constrains after expand PartitionsByKeys. --- .../yql_opt_peephole_physical.cpp | 29 ++++++- ydb/library/yql/core/yql_expr_constraint.cpp | 86 +------------------ ydb/library/yql/core/yql_opt_utils.cpp | 79 +++++++++++++++++ ydb/library/yql/core/yql_opt_utils.h | 5 ++ 4 files changed, 116 insertions(+), 83 deletions(-) diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp index 8f9f11e59782..7e0d0af21ac9 100644 --- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -2524,9 +2524,36 @@ TExprNode::TPtr ExpandPartitionsByKeys(const TExprNode::TPtr& node, TExprContext .Seal() .Build(); } + + if (auto keys = GetPathsToKeys(node->Child(1U)->Tail(), node->Child(1U)->Head().Head()); !keys.empty()) { + if (const auto sortKeySelector = node->Child(2U); sortKeySelector->IsLambda()) { + auto sortKeys = GetPathsToKeys(sortKeySelector->Tail(), sortKeySelector->Head().Head()); + std::move(sortKeys.begin(), sortKeys.end(), std::back_inserter(keys)); + std::sort(keys.begin(), keys.end()); + } + + TExprNode::TListType columns; + columns.reserve(keys.size()); + for (const auto& path : keys) { + if (1U == path.size()) + columns.emplace_back(ctx.NewAtom(node->Pos(), path.front())); + else { + TExprNode::TListType atoms(path.size()); + std::transform(path.cbegin(), path.cend(), atoms.begin(), [&](const std::string_view& name) { return ctx.NewAtom(node->Pos(), name); }); + columns.emplace_back(ctx.NewList(node->Pos(), std::move(atoms))); + } + } + + sort = ctx.Builder(node->Pos()) + .Callable("AssumeChopped") + .Add(0, std::move(sort)) + .List(1).Add(std::move(columns)).Seal() + .Seal() + .Build(); + } } - return KeepConstraints(ctx.ReplaceNode(node->Tail().TailPtr(), node->Tail().Head().Head(), std::move(sort)), *node, ctx); + return ctx.ReplaceNode(node->Tail().TailPtr(), node->Tail().Head().Head(), std::move(sort)); } TExprNode::TPtr ExpandIsKeySwitch(const TExprNode::TPtr& node, TExprContext& ctx) { diff --git a/ydb/library/yql/core/yql_expr_constraint.cpp b/ydb/library/yql/core/yql_expr_constraint.cpp index 63b7c25097e3..4d435052cacb 100644 --- a/ydb/library/yql/core/yql_expr_constraint.cpp +++ b/ydb/library/yql/core/yql_expr_constraint.cpp @@ -753,38 +753,6 @@ class TCallableConstraintTransformer : public TCallableTransformerBase GetPathToKey(const TExprNode& body, const TExprNode& arg) { - if (&body == &arg) - return TPartOfConstraintBase::TPathType(); - - if (body.IsCallable({"Member","Nth"})) { - if (auto path = GetPathToKey(body.Head(), arg)) { - path->emplace_back(body.Tail().Content()); - return path; - } - } - - if (body.IsCallable({"CastStruct","FilterMembers","Just","Unwrap"})) - return GetPathToKey(body.Head(), arg); - if (body.IsCallable("Member") && body.Head().IsCallable("AsStruct")) - return GetPathToKey(GetLiteralStructMember(body.Head(), body.Tail()), arg); - if (body.IsCallable("Nth") && body.Head().IsList()) - return GetPathToKey(*body.Head().Child(FromString(body.Tail().Content())), arg); - if (body.IsList() && 1U == body.ChildrenSize() && body.Head().IsCallable("Nth") && body.Head().Tail().IsAtom("0") && - 1U == RemoveOptionality(*body.Head().Head().GetTypeAnn()).Cast()->GetSize()) - // Especialy for "Extract single item tuple from Condense1" optimizer. - return GetPathToKey(body.Head().Head(), arg); - if (body.IsCallable("AsStruct") && 1U == body.ChildrenSize() && body.Head().Tail().IsCallable("Member") && - body.Head().Head().Content() == body.Head().Tail().Tail().Content() && - 1U == RemoveOptionality(*body.Head().Tail().Head().GetTypeAnn()).Cast()->GetSize()) - // Especialy for "Extract single item struct from Condense1" optimizer. - return GetPathToKey(body.Head().Tail().Head(), arg); - if (IsTransparentIfPresent(body) && &body.Head() == &arg) - return GetPathToKey(body.Child(1)->Tail().Head(), body.Child(1)->Head().Head()); - - return std::nullopt; - } - static std::vector> ExtractSimpleSortTraits(const TExprNode& sortDirections, const TExprNode& keySelectorLambda) { const auto& keySelectorBody = keySelectorLambda.Tail(); @@ -2684,34 +2652,6 @@ class TCallableConstraintTransformer : public TCallableTransformerBaseChildRef(TCoIsKeySwitch::idx_StateKeyExtractor), ctx, stateConstraints)); } - static const TExprNode& GetLiteralStructMember(const TExprNode& literal, const TExprNode& member) { - for (const auto& child : literal.Children()) - if (&child->Head() == &member || child->Head().Content() == member.Content()) - return child->Tail(); - ythrow yexception() << "Member '" << member.Content() << "' not found in literal struct."; - } - - static std::optional> GetPathToKey(const TExprNode& body, const TExprNode::TChildrenType& args) { - if (body.IsArgument()) { - for (auto i = 0U; i < args.size(); ++i) - if (&body == args[i].Get()) - return std::make_pair(TPartOfConstraintBase::TPathType(), i); - } else if (body.IsCallable({"Member","Nth"})) { - if (auto path = GetPathToKey(body.Head(), args)) { - path->first.emplace_back(body.Tail().Content()); - return path; - } else if (const auto& head = SkipCallables(body.Head(), {"CastStruct","FilterMembers"}); head.IsCallable("AsStruct") && body.IsCallable("Member")) { - return GetPathToKey(GetLiteralStructMember(head, body.Tail()), args); - } else if (body.IsCallable("Nth") && body.Head().IsList()) { - return GetPathToKey(*body.Head().Child(FromString(body.Tail().Content())), args); - } else if (body.IsCallable({"CastStruct","FilterMembers"})) { - return GetPathToKey(body.Head(), args); - } - } - - return std::nullopt; - } - template static TPartOfConstraintBase::TSetType GetSimpleKeys(const TExprNode& body, const TExprNode::TChildrenType& args, TExprContext& ctx) { TPartOfConstraintBase::TSetType keys; @@ -2749,24 +2689,6 @@ class TCallableConstraintTransformer : public TCallableTransformerBase static TPartOfConstraintBase::TSetType GetSimpleKeys(const TExprNode& selector, TExprContext& ctx) { YQL_ENSURE(selector.IsLambda() && 2U == selector.ChildrenSize()); @@ -2774,8 +2696,8 @@ class TCallableConstraintTransformer : public TCallableTransformerBaseChild(TCoBase::idx_KeySelectorLambda)->Tail(), input->Child(TCoBase::idx_KeySelectorLambda)->Head().Head()); + keys = GetPathsToKeys(input->Child(TCoBase::idx_KeySelectorLambda)->Tail(), input->Child(TCoBase::idx_KeySelectorLambda)->Head().Head()); if (const auto sortKeySelector = input->Child(TCoBase::idx_SortKeySelectorLambda); sortKeySelector->IsLambda()) { if (const auto status = UpdateLambdaConstraints(*sortKeySelector); status != TStatus::Ok) { return status; } - auto sortKeys = GetSimpleKeys(sortKeySelector->Tail(), sortKeySelector->Head().Head()); + auto sortKeys = GetPathsToKeys(sortKeySelector->Tail(), sortKeySelector->Head().Head()); std::move(sortKeys.begin(), sortKeys.end(), std::back_inserter(keys)); std::sort(keys.begin(), keys.end()); } diff --git a/ydb/library/yql/core/yql_opt_utils.cpp b/ydb/library/yql/core/yql_opt_utils.cpp index b67f89854b42..91741edc2333 100644 --- a/ydb/library/yql/core/yql_opt_utils.cpp +++ b/ydb/library/yql/core/yql_opt_utils.cpp @@ -82,6 +82,13 @@ TExprNodeBuilder GetterBuilder(TExprNodeBuilder parent, ui32 index, const TTypeA return GetterBuilder(parent.Callable(index, ETypeAnnotationKind::Struct == parentType->GetKind() ? "Member" : "Nth"), 0U, type, path).Atom(1, name).Seal(); } +const TExprNode& GetLiteralStructMember(const TExprNode& literal, const TExprNode& member) { + for (const auto& child : literal.Children()) + if (&child->Head() == &member || child->Head().Content() == member.Content()) + return child->Tail(); + ythrow yexception() << "Member '" << member.Content() << "' not found in literal struct."; +} + } TExprNode::TPtr MakeBoolNothing(TPositionHandle position, TExprContext& ctx) { @@ -1994,4 +2001,76 @@ void OptimizeSubsetFieldsForNodeWithMultiUsage(const TExprNode::TPtr& node, cons } } + +std::optional> GetPathToKey(const TExprNode& body, const TExprNode::TChildrenType& args) { + if (body.IsArgument()) { + for (auto i = 0U; i < args.size(); ++i) + if (&body == args[i].Get()) + return std::make_pair(TPartOfConstraintBase::TPathType(), i); + } else if (body.IsCallable({"Member","Nth"})) { + if (auto path = GetPathToKey(body.Head(), args)) { + path->first.emplace_back(body.Tail().Content()); + return path; + } else if (const auto& head = SkipCallables(body.Head(), {"CastStruct","FilterMembers"}); head.IsCallable("AsStruct") && body.IsCallable("Member")) { + return GetPathToKey(GetLiteralStructMember(head, body.Tail()), args); + } else if (body.IsCallable("Nth") && body.Head().IsList()) { + return GetPathToKey(*body.Head().Child(FromString(body.Tail().Content())), args); + } else if (body.IsCallable({"CastStruct","FilterMembers"})) { + return GetPathToKey(body.Head(), args); + } + } + + return std::nullopt; +} + +std::optional GetPathToKey(const TExprNode& body, const TExprNode& arg) { + if (&body == &arg) + return TPartOfConstraintBase::TPathType(); + + if (body.IsCallable({"Member","Nth"})) { + if (auto path = GetPathToKey(body.Head(), arg)) { + path->emplace_back(body.Tail().Content()); + return path; + } + } + + if (body.IsCallable({"CastStruct","FilterMembers","Just","Unwrap"})) + return GetPathToKey(body.Head(), arg); + if (body.IsCallable("Member") && body.Head().IsCallable("AsStruct")) + return GetPathToKey(GetLiteralStructMember(body.Head(), body.Tail()), arg); + if (body.IsCallable("Nth") && body.Head().IsList()) + return GetPathToKey(*body.Head().Child(FromString(body.Tail().Content())), arg); + if (body.IsList() && 1U == body.ChildrenSize() && body.Head().IsCallable("Nth") && body.Head().Tail().IsAtom("0") && + 1U == RemoveOptionality(*body.Head().Head().GetTypeAnn()).Cast()->GetSize()) + // Especialy for "Extract single item tuple from Condense1" optimizer. + return GetPathToKey(body.Head().Head(), arg); + if (body.IsCallable("AsStruct") && 1U == body.ChildrenSize() && body.Head().Tail().IsCallable("Member") && + body.Head().Head().Content() == body.Head().Tail().Tail().Content() && + 1U == RemoveOptionality(*body.Head().Tail().Head().GetTypeAnn()).Cast()->GetSize()) + // Especialy for "Extract single item struct from Condense1" optimizer. + return GetPathToKey(body.Head().Tail().Head(), arg); + if (IsTransparentIfPresent(body) && &body.Head() == &arg) + return GetPathToKey(body.Child(1)->Tail().Head(), body.Child(1)->Head().Head()); + + return std::nullopt; +} + +TPartOfConstraintBase::TSetType GetPathsToKeys(const TExprNode& body, const TExprNode& arg) { + TPartOfConstraintBase::TSetType keys; + if (body.IsList()) { + if (const auto size = body.ChildrenSize()) { + keys.reserve(size); + for (auto i = 0U; i < size; ++i) + if (auto path = GetPathToKey(*body.Child(i), arg)) + keys.insert_unique(std::move(*path)); + } + } else if (body.IsCallable("StablePickle")) { + return GetPathsToKeys(body.Head(), arg); + } else if (auto path = GetPathToKey(body, arg)) { + keys.insert_unique(std::move(*path)); + } + + return keys; +} + } diff --git a/ydb/library/yql/core/yql_opt_utils.h b/ydb/library/yql/core/yql_opt_utils.h index 0d759bc4ff1e..c89e705e96b6 100644 --- a/ydb/library/yql/core/yql_opt_utils.h +++ b/ydb/library/yql/core/yql_opt_utils.h @@ -149,4 +149,9 @@ void OptimizeSubsetFieldsForNodeWithMultiUsage(const TExprNode::TPtr& node, cons TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx, std::function handler); +std::optional GetPathToKey(const TExprNode& body, const TExprNode& arg); +std::optional> GetPathToKey(const TExprNode& body, const TExprNode::TChildrenType& args); + +TPartOfConstraintBase::TSetType GetPathsToKeys(const TExprNode& body, const TExprNode& arg); + } From f6c65720467ea245e07e290118a939bed1d332a2 Mon Sep 17 00:00:00 2001 From: Anton Romanov Date: Mon, 25 Dec 2023 10:12:51 +0100 Subject: [PATCH 2/2] A little fix and cosmetic. --- .../yql_opt_peephole_physical.cpp | 32 ++++++++----------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp index 7e0d0af21ac9..0a148c90f24d 100644 --- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -2445,23 +2445,17 @@ TExprNode::TPtr ExpandPartitionsByKeys(const TExprNode::TPtr& node, TExprContext const bool isStream = node->Head().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Flow || node->Head().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Stream; TExprNode::TPtr sort; - auto keyExtractor = node->ChildPtr(1); + const auto keyExtractor = node->Child(TCoPartitionsByKeys::idx_KeySelectorLambda); const bool isConstKey = !IsDepended(keyExtractor->Tail(), keyExtractor->Head().Head()); - const bool haveSort = !node->Child(2)->IsCallable("Void"); - auto idLambda = ctx.Builder(node->Pos()) - .Lambda() - .Param("x") - .Arg("x") - .Seal() - .Build(); + const bool haveSort = node->Child(TCoPartitionsByKeys::idx_SortKeySelectorLambda)->IsLambda(); auto sortLambda = ctx.Builder(node->Pos()) .Lambda() .Param("x") .Callable("Sort") .Arg(0, "x") - .Add(1, node->ChildPtr(2)) - .Add(2, node->ChildPtr(3)) + .Add(1, node->ChildPtr(TCoPartitionsByKeys::idx_SortDirections)) + .Add(2, node->ChildPtr(TCoPartitionsByKeys::idx_SortKeySelectorLambda)) .Seal() .Seal() .Build(); @@ -2480,7 +2474,7 @@ TExprNode::TPtr ExpandPartitionsByKeys(const TExprNode::TPtr& node, TExprContext .Callable(0, "DictPayloads") .Arg(0, "dict") .Seal() - .Add(1, haveSort ? sortLambda : idLambda) + .Add(1, haveSort ? sortLambda : MakeIdentityLambda(node->Pos(), ctx)) .Seal() .Seal() .Build(); @@ -2490,8 +2484,8 @@ TExprNode::TPtr ExpandPartitionsByKeys(const TExprNode::TPtr& node, TExprContext sort = ctx.Builder(node->Pos()) .Callable("Sort") .Add(0, node->HeadPtr()) - .Add(1, node->ChildPtr(2)) - .Add(2, node->ChildPtr(3)) + .Add(1, node->ChildPtr(TCoPartitionsByKeys::idx_SortDirections)) + .Add(2, node->ChildPtr(TCoPartitionsByKeys::idx_SortKeySelectorLambda)) .Seal() .Build(); } else { @@ -2503,8 +2497,8 @@ TExprNode::TPtr ExpandPartitionsByKeys(const TExprNode::TPtr& node, TExprContext .Callable("OrderedFlatMap") .Callable(0, "SqueezeToDict") .Add(0, node->HeadPtr()) - .Add(1, std::move(keyExtractor)) - .Add(2, std::move(idLambda)) + .Add(1, node->ChildPtr(TCoPartitionsByKeys::idx_KeySelectorLambda)) + .Add(2, MakeIdentityLambda(node->Pos(), ctx)) .Add(3, std::move(settings)) .Seal() .Add(1, std::move(flatten)) @@ -2516,8 +2510,8 @@ TExprNode::TPtr ExpandPartitionsByKeys(const TExprNode::TPtr& node, TExprContext .With(0) .Callable("ToDict") .Add(0, node->HeadPtr()) - .Add(1, std::move(keyExtractor)) - .Add(2, std::move(idLambda)) + .Add(1, node->ChildPtr(TCoPartitionsByKeys::idx_KeySelectorLambda)) + .Add(2, MakeIdentityLambda(node->Pos(), ctx)) .Add(3, std::move(settings)) .Seal() .Done() @@ -2525,8 +2519,8 @@ TExprNode::TPtr ExpandPartitionsByKeys(const TExprNode::TPtr& node, TExprContext .Build(); } - if (auto keys = GetPathsToKeys(node->Child(1U)->Tail(), node->Child(1U)->Head().Head()); !keys.empty()) { - if (const auto sortKeySelector = node->Child(2U); sortKeySelector->IsLambda()) { + if (auto keys = GetPathsToKeys(keyExtractor->Tail(), keyExtractor->Head().Head()); !keys.empty()) { + if (const auto sortKeySelector = node->Child(TCoPartitionsByKeys::idx_SortKeySelectorLambda); sortKeySelector->IsLambda()) { auto sortKeys = GetPathsToKeys(sortKeySelector->Tail(), sortKeySelector->Head().Head()); std::move(sortKeys.begin(), sortKeys.end(), std::back_inserter(keys)); std::sort(keys.begin(), keys.end());