Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix constrains after expand PartitionsByKeys. #685

Merged
merged 2 commits into from
Dec 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 39 additions & 18 deletions ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2445,23 +2445,17 @@ TExprNode::TPtr ExpandPartitionsByKeys(const TExprNode::TPtr& node, TExprContext
const bool isStream = node->Head().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Flow ||
node->Head().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Stream;
TExprNode::TPtr sort;
auto keyExtractor = node->ChildPtr(1);
const auto keyExtractor = node->Child(TCoPartitionsByKeys::idx_KeySelectorLambda);
const bool isConstKey = !IsDepended(keyExtractor->Tail(), keyExtractor->Head().Head());
const bool haveSort = !node->Child(2)->IsCallable("Void");
auto idLambda = ctx.Builder(node->Pos())
.Lambda()
.Param("x")
.Arg("x")
.Seal()
.Build();
const bool haveSort = node->Child(TCoPartitionsByKeys::idx_SortKeySelectorLambda)->IsLambda();

auto sortLambda = ctx.Builder(node->Pos())
.Lambda()
.Param("x")
.Callable("Sort")
.Arg(0, "x")
.Add(1, node->ChildPtr(2))
.Add(2, node->ChildPtr(3))
.Add(1, node->ChildPtr(TCoPartitionsByKeys::idx_SortDirections))
.Add(2, node->ChildPtr(TCoPartitionsByKeys::idx_SortKeySelectorLambda))
.Seal()
.Seal()
.Build();
Expand All @@ -2480,7 +2474,7 @@ TExprNode::TPtr ExpandPartitionsByKeys(const TExprNode::TPtr& node, TExprContext
.Callable(0, "DictPayloads")
.Arg(0, "dict")
.Seal()
.Add(1, haveSort ? sortLambda : idLambda)
.Add(1, haveSort ? sortLambda : MakeIdentityLambda(node->Pos(), ctx))
.Seal()
.Seal()
.Build();
Expand All @@ -2490,8 +2484,8 @@ TExprNode::TPtr ExpandPartitionsByKeys(const TExprNode::TPtr& node, TExprContext
sort = ctx.Builder(node->Pos())
.Callable("Sort")
.Add(0, node->HeadPtr())
.Add(1, node->ChildPtr(2))
.Add(2, node->ChildPtr(3))
.Add(1, node->ChildPtr(TCoPartitionsByKeys::idx_SortDirections))
.Add(2, node->ChildPtr(TCoPartitionsByKeys::idx_SortKeySelectorLambda))
.Seal()
.Build();
} else {
Expand All @@ -2503,8 +2497,8 @@ TExprNode::TPtr ExpandPartitionsByKeys(const TExprNode::TPtr& node, TExprContext
.Callable("OrderedFlatMap")
.Callable(0, "SqueezeToDict")
.Add(0, node->HeadPtr())
.Add(1, std::move(keyExtractor))
.Add(2, std::move(idLambda))
.Add(1, node->ChildPtr(TCoPartitionsByKeys::idx_KeySelectorLambda))
.Add(2, MakeIdentityLambda(node->Pos(), ctx))
.Add(3, std::move(settings))
.Seal()
.Add(1, std::move(flatten))
Expand All @@ -2516,17 +2510,44 @@ TExprNode::TPtr ExpandPartitionsByKeys(const TExprNode::TPtr& node, TExprContext
.With(0)
.Callable("ToDict")
.Add(0, node->HeadPtr())
.Add(1, std::move(keyExtractor))
.Add(2, std::move(idLambda))
.Add(1, node->ChildPtr(TCoPartitionsByKeys::idx_KeySelectorLambda))
.Add(2, MakeIdentityLambda(node->Pos(), ctx))
.Add(3, std::move(settings))
.Seal()
.Done()
.Seal()
.Build();
}

if (auto keys = GetPathsToKeys(keyExtractor->Tail(), keyExtractor->Head().Head()); !keys.empty()) {
if (const auto sortKeySelector = node->Child(TCoPartitionsByKeys::idx_SortKeySelectorLambda); sortKeySelector->IsLambda()) {
auto sortKeys = GetPathsToKeys(sortKeySelector->Tail(), sortKeySelector->Head().Head());
std::move(sortKeys.begin(), sortKeys.end(), std::back_inserter(keys));
std::sort(keys.begin(), keys.end());
}

TExprNode::TListType columns;
columns.reserve(keys.size());
for (const auto& path : keys) {
if (1U == path.size())
columns.emplace_back(ctx.NewAtom(node->Pos(), path.front()));
else {
TExprNode::TListType atoms(path.size());
std::transform(path.cbegin(), path.cend(), atoms.begin(), [&](const std::string_view& name) { return ctx.NewAtom(node->Pos(), name); });
columns.emplace_back(ctx.NewList(node->Pos(), std::move(atoms)));
}
}

sort = ctx.Builder(node->Pos())
.Callable("AssumeChopped")
.Add(0, std::move(sort))
.List(1).Add(std::move(columns)).Seal()
.Seal()
.Build();
}
}

return KeepConstraints(ctx.ReplaceNode(node->Tail().TailPtr(), node->Tail().Head().Head(), std::move(sort)), *node, ctx);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Тут ничего не может быть кроме Chopped? Distinct/Uniq?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Distinct/Uniq должны выводится естественным образом из Chopped на входе.

return ctx.ReplaceNode(node->Tail().TailPtr(), node->Tail().Head().Head(), std::move(sort));
}

TExprNode::TPtr ExpandIsKeySwitch(const TExprNode::TPtr& node, TExprContext& ctx) {
Expand Down
86 changes: 4 additions & 82 deletions ydb/library/yql/core/yql_expr_constraint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -753,38 +753,6 @@ class TCallableConstraintTransformer : public TCallableTransformerBase<TCallable
return std::nullopt;
}

static std::optional<TPartOfConstraintBase::TPathType> GetPathToKey(const TExprNode& body, const TExprNode& arg) {
if (&body == &arg)
return TPartOfConstraintBase::TPathType();

if (body.IsCallable({"Member","Nth"})) {
if (auto path = GetPathToKey(body.Head(), arg)) {
path->emplace_back(body.Tail().Content());
return path;
}
}

if (body.IsCallable({"CastStruct","FilterMembers","Just","Unwrap"}))
return GetPathToKey(body.Head(), arg);
if (body.IsCallable("Member") && body.Head().IsCallable("AsStruct"))
return GetPathToKey(GetLiteralStructMember(body.Head(), body.Tail()), arg);
if (body.IsCallable("Nth") && body.Head().IsList())
return GetPathToKey(*body.Head().Child(FromString<ui32>(body.Tail().Content())), arg);
if (body.IsList() && 1U == body.ChildrenSize() && body.Head().IsCallable("Nth") && body.Head().Tail().IsAtom("0") &&
1U == RemoveOptionality(*body.Head().Head().GetTypeAnn()).Cast<TTupleExprType>()->GetSize())
// Especialy for "Extract single item tuple from Condense1" optimizer.
return GetPathToKey(body.Head().Head(), arg);
if (body.IsCallable("AsStruct") && 1U == body.ChildrenSize() && body.Head().Tail().IsCallable("Member") &&
body.Head().Head().Content() == body.Head().Tail().Tail().Content() &&
1U == RemoveOptionality(*body.Head().Tail().Head().GetTypeAnn()).Cast<TStructExprType>()->GetSize())
// Especialy for "Extract single item struct from Condense1" optimizer.
return GetPathToKey(body.Head().Tail().Head(), arg);
if (IsTransparentIfPresent(body) && &body.Head() == &arg)
return GetPathToKey(body.Child(1)->Tail().Head(), body.Child(1)->Head().Head());

return std::nullopt;
}

static std::vector<std::pair<TPartOfConstraintBase::TPathType, bool>>
ExtractSimpleSortTraits(const TExprNode& sortDirections, const TExprNode& keySelectorLambda) {
const auto& keySelectorBody = keySelectorLambda.Tail();
Expand Down Expand Up @@ -2684,34 +2652,6 @@ class TCallableConstraintTransformer : public TCallableTransformerBase<TCallable
.Combine(UpdateLambdaConstraints(input->ChildRef(TCoIsKeySwitch::idx_StateKeyExtractor), ctx, stateConstraints));
}

static const TExprNode& GetLiteralStructMember(const TExprNode& literal, const TExprNode& member) {
for (const auto& child : literal.Children())
if (&child->Head() == &member || child->Head().Content() == member.Content())
return child->Tail();
ythrow yexception() << "Member '" << member.Content() << "' not found in literal struct.";
}

static std::optional<std::pair<TPartOfConstraintBase::TPathType, ui32>> GetPathToKey(const TExprNode& body, const TExprNode::TChildrenType& args) {
if (body.IsArgument()) {
for (auto i = 0U; i < args.size(); ++i)
if (&body == args[i].Get())
return std::make_pair(TPartOfConstraintBase::TPathType(), i);
} else if (body.IsCallable({"Member","Nth"})) {
if (auto path = GetPathToKey(body.Head(), args)) {
path->first.emplace_back(body.Tail().Content());
return path;
} else if (const auto& head = SkipCallables(body.Head(), {"CastStruct","FilterMembers"}); head.IsCallable("AsStruct") && body.IsCallable("Member")) {
return GetPathToKey(GetLiteralStructMember(head, body.Tail()), args);
} else if (body.IsCallable("Nth") && body.Head().IsList()) {
return GetPathToKey(*body.Head().Child(FromString<ui32>(body.Tail().Content())), args);
} else if (body.IsCallable({"CastStruct","FilterMembers"})) {
return GetPathToKey(body.Head(), args);
}
}

return std::nullopt;
}

template<bool Wide>
static TPartOfConstraintBase::TSetType GetSimpleKeys(const TExprNode& body, const TExprNode::TChildrenType& args, TExprContext& ctx) {
TPartOfConstraintBase::TSetType keys;
Expand Down Expand Up @@ -2749,33 +2689,15 @@ class TCallableConstraintTransformer : public TCallableTransformerBase<TCallable
return keys;
}

static TPartOfConstraintBase::TSetType GetSimpleKeys(const TExprNode& body, const TExprNode& arg) {
TPartOfConstraintBase::TSetType keys;
if (body.IsList()) {
if (const auto size = body.ChildrenSize()) {
keys.reserve(size);
for (auto i = 0U; i < size; ++i)
if (auto path = GetPathToKey(*body.Child(i), arg))
keys.insert_unique(std::move(*path));
}
} else if (body.IsCallable("StablePickle")) {
return GetSimpleKeys(body.Head(), arg);
} else if (auto path = GetPathToKey(body, arg)) {
keys.insert_unique(std::move(*path));
}

return keys;
}

template<bool Wide>
static TPartOfConstraintBase::TSetType GetSimpleKeys(const TExprNode& selector, TExprContext& ctx) {
YQL_ENSURE(selector.IsLambda() && 2U == selector.ChildrenSize());
const auto& body = selector.Tail();
if constexpr (!Wide) {
if (TCoIsKeySwitch::Match(&body)) {
const TCoIsKeySwitch keySwitch(&body);
const auto& i = GetSimpleKeys(*ctx.ReplaceNode(keySwitch.ItemKeyExtractor().Body().Ptr(), keySwitch.ItemKeyExtractor().Args().Arg(0).Ref(), keySwitch.Item().Ptr()), keySwitch.Item().Ref());
const auto& s = GetSimpleKeys(*ctx.ReplaceNode(keySwitch.StateKeyExtractor().Body().Ptr(), keySwitch.StateKeyExtractor().Args().Arg(0).Ref(), keySwitch.State().Ptr()), keySwitch.Item().Ref());
const auto& i = GetPathsToKeys(*ctx.ReplaceNode(keySwitch.ItemKeyExtractor().Body().Ptr(), keySwitch.ItemKeyExtractor().Args().Arg(0).Ref(), keySwitch.Item().Ptr()), keySwitch.Item().Ref());
const auto& s = GetPathsToKeys(*ctx.ReplaceNode(keySwitch.StateKeyExtractor().Body().Ptr(), keySwitch.StateKeyExtractor().Args().Arg(0).Ref(), keySwitch.State().Ptr()), keySwitch.Item().Ref());
return i == s ? i : TPartOfConstraintBase::TSetType();
}
}
Expand Down Expand Up @@ -3005,13 +2927,13 @@ class TCallableConstraintTransformer : public TCallableTransformerBase<TCallable

TPartOfConstraintBase::TSetType keys;
if constexpr (Partitions) {
keys = GetSimpleKeys(input->Child(TCoBase::idx_KeySelectorLambda)->Tail(), input->Child(TCoBase::idx_KeySelectorLambda)->Head().Head());
keys = GetPathsToKeys(input->Child(TCoBase::idx_KeySelectorLambda)->Tail(), input->Child(TCoBase::idx_KeySelectorLambda)->Head().Head());
if (const auto sortKeySelector = input->Child(TCoBase::idx_SortKeySelectorLambda); sortKeySelector->IsLambda()) {
if (const auto status = UpdateLambdaConstraints(*sortKeySelector); status != TStatus::Ok) {
return status;
}

auto sortKeys = GetSimpleKeys(sortKeySelector->Tail(), sortKeySelector->Head().Head());
auto sortKeys = GetPathsToKeys(sortKeySelector->Tail(), sortKeySelector->Head().Head());
std::move(sortKeys.begin(), sortKeys.end(), std::back_inserter(keys));
std::sort(keys.begin(), keys.end());
}
Expand Down
79 changes: 79 additions & 0 deletions ydb/library/yql/core/yql_opt_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ TExprNodeBuilder GetterBuilder(TExprNodeBuilder parent, ui32 index, const TTypeA
return GetterBuilder(parent.Callable(index, ETypeAnnotationKind::Struct == parentType->GetKind() ? "Member" : "Nth"), 0U, type, path).Atom(1, name).Seal();
}

const TExprNode& GetLiteralStructMember(const TExprNode& literal, const TExprNode& member) {
for (const auto& child : literal.Children())
if (&child->Head() == &member || child->Head().Content() == member.Content())
return child->Tail();
ythrow yexception() << "Member '" << member.Content() << "' not found in literal struct.";
}

}

TExprNode::TPtr MakeBoolNothing(TPositionHandle position, TExprContext& ctx) {
Expand Down Expand Up @@ -1994,4 +2001,76 @@ void OptimizeSubsetFieldsForNodeWithMultiUsage(const TExprNode::TPtr& node, cons
}
}


std::optional<std::pair<TPartOfConstraintBase::TPathType, ui32>> GetPathToKey(const TExprNode& body, const TExprNode::TChildrenType& args) {
if (body.IsArgument()) {
for (auto i = 0U; i < args.size(); ++i)
if (&body == args[i].Get())
return std::make_pair(TPartOfConstraintBase::TPathType(), i);
} else if (body.IsCallable({"Member","Nth"})) {
if (auto path = GetPathToKey(body.Head(), args)) {
path->first.emplace_back(body.Tail().Content());
return path;
} else if (const auto& head = SkipCallables(body.Head(), {"CastStruct","FilterMembers"}); head.IsCallable("AsStruct") && body.IsCallable("Member")) {
return GetPathToKey(GetLiteralStructMember(head, body.Tail()), args);
} else if (body.IsCallable("Nth") && body.Head().IsList()) {
return GetPathToKey(*body.Head().Child(FromString<ui32>(body.Tail().Content())), args);
} else if (body.IsCallable({"CastStruct","FilterMembers"})) {
return GetPathToKey(body.Head(), args);
}
}

return std::nullopt;
}

std::optional<TPartOfConstraintBase::TPathType> GetPathToKey(const TExprNode& body, const TExprNode& arg) {
if (&body == &arg)
return TPartOfConstraintBase::TPathType();

if (body.IsCallable({"Member","Nth"})) {
if (auto path = GetPathToKey(body.Head(), arg)) {
path->emplace_back(body.Tail().Content());
return path;
}
}

if (body.IsCallable({"CastStruct","FilterMembers","Just","Unwrap"}))
return GetPathToKey(body.Head(), arg);
if (body.IsCallable("Member") && body.Head().IsCallable("AsStruct"))
return GetPathToKey(GetLiteralStructMember(body.Head(), body.Tail()), arg);
if (body.IsCallable("Nth") && body.Head().IsList())
return GetPathToKey(*body.Head().Child(FromString<ui32>(body.Tail().Content())), arg);
if (body.IsList() && 1U == body.ChildrenSize() && body.Head().IsCallable("Nth") && body.Head().Tail().IsAtom("0") &&
1U == RemoveOptionality(*body.Head().Head().GetTypeAnn()).Cast<TTupleExprType>()->GetSize())
// Especialy for "Extract single item tuple from Condense1" optimizer.
return GetPathToKey(body.Head().Head(), arg);
if (body.IsCallable("AsStruct") && 1U == body.ChildrenSize() && body.Head().Tail().IsCallable("Member") &&
body.Head().Head().Content() == body.Head().Tail().Tail().Content() &&
1U == RemoveOptionality(*body.Head().Tail().Head().GetTypeAnn()).Cast<TStructExprType>()->GetSize())
// Especialy for "Extract single item struct from Condense1" optimizer.
return GetPathToKey(body.Head().Tail().Head(), arg);
if (IsTransparentIfPresent(body) && &body.Head() == &arg)
return GetPathToKey(body.Child(1)->Tail().Head(), body.Child(1)->Head().Head());

return std::nullopt;
}

TPartOfConstraintBase::TSetType GetPathsToKeys(const TExprNode& body, const TExprNode& arg) {
TPartOfConstraintBase::TSetType keys;
if (body.IsList()) {
if (const auto size = body.ChildrenSize()) {
keys.reserve(size);
for (auto i = 0U; i < size; ++i)
if (auto path = GetPathToKey(*body.Child(i), arg))
keys.insert_unique(std::move(*path));
}
} else if (body.IsCallable("StablePickle")) {
return GetPathsToKeys(body.Head(), arg);
} else if (auto path = GetPathToKey(body, arg)) {
keys.insert_unique(std::move(*path));
}

return keys;
}

}
5 changes: 5 additions & 0 deletions ydb/library/yql/core/yql_opt_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,9 @@ void OptimizeSubsetFieldsForNodeWithMultiUsage(const TExprNode::TPtr& node, cons
TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx,
std::function<TExprNode::TPtr(const TExprNode::TPtr&, const TExprNode::TPtr&, const TParentsMap&, TExprContext&)> handler);

std::optional<TPartOfConstraintBase::TPathType> GetPathToKey(const TExprNode& body, const TExprNode& arg);
std::optional<std::pair<TPartOfConstraintBase::TPathType, ui32>> GetPathToKey(const TExprNode& body, const TExprNode::TChildrenType& args);

TPartOfConstraintBase::TSetType GetPathsToKeys(const TExprNode& body, const TExprNode& arg);

}
Loading