diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp index ad65650a9df4..611816bcee25 100644 --- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -5073,6 +5073,19 @@ TExprNode::TListType&& DropUnused(TExprNode::TListType&& list, const std::vector return std::move(list); } +std::set FillAllIndexes(const TTypeAnnotationNode& rowType) { + std::set set; + for (ui32 i = 0U; i < rowType.Cast()->GetSize(); ++i) + set.emplace(i); + return set; +} + +template +void RemoveUsedIndexes(const TExprNode& list, std::set& unused) { + for (auto i = 0U; i < list.ChildrenSize() >> (EvenOnly ? 1U : 0U); ++i) + unused.erase(FromString(list.Child(EvenOnly ? i << 1U : i)->Content())); +} + TExprNode::TPtr DropUnusedArgs(const TExprNode& lambda, const std::vector& unused, TExprContext& ctx, ui32 skip = 0U) { const auto& copy = ctx.DeepCopyLambda(lambda); return ctx.ChangeChild(*copy, 0U, ctx.NewArguments(copy->Head().Pos(), DropUnused(copy->Head().ChildrenList(), unused, skip))); @@ -5095,6 +5108,28 @@ void DropUnusedRenames(TExprNode::TPtr& renames, const std::vector& unused renames = ctx.ChangeChildren(*renames, std::move(children)); } +template +void UpdateInputIndexes(TExprNode::TPtr& indexes, const std::vector& unused, TExprContext& ctx) { + TExprNode::TListType children; + children.reserve(indexes->ChildrenSize()); + for (auto i = 0U; i < indexes->ChildrenSize() >> (EvenOnly ? 1U : 0U); ++i) { + const auto idx = i << (EvenOnly ? 1U : 0U); + const auto outIndex = indexes->Child(idx); + const auto oldValue = FromString(outIndex->Content()); + const auto newValue = oldValue - ui32(std::distance(unused.cbegin(), std::lower_bound(unused.cbegin(), unused.cend(), oldValue))); + if (oldValue == newValue) { + children.emplace_back(indexes->ChildPtr(idx)); + } else { + children.emplace_back(ctx.NewAtom(indexes->Child(idx)->Pos(), newValue)); + } + if constexpr (EvenOnly) { + children.emplace_back(indexes->ChildPtr(1U + idx)); + } + } + + indexes = ctx.ChangeChildren(*indexes, std::move(children)); +} + TExprNode::TPtr DropUnusedStateFromUpdate(const TExprNode& lambda, const std::vector& unused, TExprContext& ctx) { const auto& copy = ctx.DeepCopyLambda(lambda, DropUnused(GetLambdaBody(lambda), unused)); return ctx.ChangeChild(*copy, 0U, ctx.NewArguments(copy->Head().Pos(), DropUnused(copy->Head().ChildrenList(), unused, lambda.Head().ChildrenSize() - lambda.ChildrenSize() + 1U))); @@ -6698,6 +6733,59 @@ TExprNode::TPtr OptimizeMapJoinCore(const TExprNode::TPtr& node, TExprContext& c return node; } +TExprNode::TPtr OptimizeGraceJoinCore(const TExprNode::TPtr& node, TExprContext& ctx) { + const TCoGraceJoinCore grace(node); + auto leftUnused = FillAllIndexes(GetSeqItemType(*grace.LeftInput().Ref().GetTypeAnn())); + auto rightUnused = FillAllIndexes(GetSeqItemType(*grace.RightInput().Ref().GetTypeAnn())); + RemoveUsedIndexes(grace.LeftKeysColumns().Ref(), leftUnused); + RemoveUsedIndexes(grace.RightKeysColumns().Ref(), rightUnused); + RemoveUsedIndexes(grace.LeftRenames().Ref(), leftUnused); + RemoveUsedIndexes(grace.RightRenames().Ref(), rightUnused); + if (!(leftUnused.empty() && rightUnused.empty())) { + YQL_CLOG(DEBUG, CorePeepHole) << node->Content() << " with " << leftUnused.size() << " left and " << rightUnused.size() << " right unused columns."; + + auto children = node->ChildrenList(); + if (!leftUnused.empty()) { + const std::vector unused(leftUnused.cbegin(), leftUnused.cend()); + children[TCoGraceJoinCore::idx_LeftInput] = MakeWideMapForDropUnused(std::move(children[TCoGraceJoinCore::idx_LeftInput]), unused, ctx); + UpdateInputIndexes(children[TCoGraceJoinCore::idx_LeftKeysColumns], unused, ctx); + UpdateInputIndexes(children[TCoGraceJoinCore::idx_LeftRenames], unused, ctx); + } + if (!rightUnused.empty()) { + const std::vector unused(rightUnused.cbegin(), rightUnused.cend()); + children[TCoGraceJoinCore::idx_RightInput] = MakeWideMapForDropUnused(std::move(children[TCoGraceJoinCore::idx_RightInput]), unused, ctx); + UpdateInputIndexes(children[TCoGraceJoinCore::idx_RightKeysColumns], unused, ctx); + UpdateInputIndexes(children[TCoGraceJoinCore::idx_RightRenames], unused, ctx); + } + return ctx.ChangeChildren(*node, std::move(children)); + } + + return node; +} + +TExprNode::TPtr OptimizeGraceSelfJoinCore(const TExprNode::TPtr& node, TExprContext& ctx) { + const TCoGraceSelfJoinCore grace(node); + auto unused = FillAllIndexes(GetSeqItemType(*grace.Input().Ref().GetTypeAnn())); + RemoveUsedIndexes(grace.LeftKeysColumns().Ref(), unused); + RemoveUsedIndexes(grace.RightKeysColumns().Ref(), unused); + RemoveUsedIndexes(grace.LeftRenames().Ref(), unused); + RemoveUsedIndexes(grace.RightRenames().Ref(), unused); + if (!unused.empty()) { + YQL_CLOG(DEBUG, CorePeepHole) << node->Content() << " with " << unused.size() << " unused columns."; + + auto children = node->ChildrenList(); + const std::vector vector(unused.cbegin(), unused.cend()); + children[TCoGraceSelfJoinCore::idx_Input] = MakeWideMapForDropUnused(std::move(children[TCoGraceSelfJoinCore::idx_Input]), vector, ctx); + UpdateInputIndexes(children[TCoGraceSelfJoinCore::idx_LeftKeysColumns], vector, ctx); + UpdateInputIndexes(children[TCoGraceSelfJoinCore::idx_RightKeysColumns], vector, ctx); + UpdateInputIndexes(children[TCoGraceSelfJoinCore::idx_LeftRenames], vector, ctx); + UpdateInputIndexes(children[TCoGraceSelfJoinCore::idx_RightRenames], vector, ctx); + return ctx.ChangeChildren(*node, std::move(children)); + } + + return node; +} + TExprNode::TPtr OptimizeCommonJoinCore(const TExprNode::TPtr& node, TExprContext& ctx) { if (const auto& input = node->Head(); input.IsCallable("NarrowMap") && input.Tail().Tail().IsCallable("AsStruct")) { YQL_CLOG(DEBUG, CorePeepHole) << "Swap " << node->Content() << " with " << input.Content(); @@ -8338,6 +8426,8 @@ struct TPeepHoleRules { {"WideCondense1", &OptimizeWideCondense1}, {"WideChopper", &OptimizeWideChopper}, {"MapJoinCore", &OptimizeMapJoinCore}, + {"GraceJoinCore", &OptimizeGraceJoinCore}, + {"GraceSelfJoinCore", &OptimizeGraceSelfJoinCore}, {"CommonJoinCore", &OptimizeCommonJoinCore}, {"BuildTablePath", &DoBuildTablePath}, {"Exists", &OptimizeExists},