Skip to content

Commit

Permalink
support StartsWith predicates for pg types
Browse files Browse the repository at this point in the history
  • Loading branch information
nepal committed Jan 5, 2024
1 parent fdcdd6f commit 085a2e5
Show file tree
Hide file tree
Showing 10 changed files with 220 additions and 7 deletions.
10 changes: 10 additions & 0 deletions ydb/library/yql/core/common_opt/yql_co_simple1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4665,6 +4665,16 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
map["IsDistinctFrom"] = std::bind(&OptimizeDistinctFrom<false>, _1, _2);

map["StartsWith"] = map["EndsWith"] = map["StringContains"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) {
if (node->Head().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Pg || node->Tail().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Pg) {
TExprNodeList converted;
for (auto& child : node->ChildrenList()) {
const bool isPg = child->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Pg;
converted.emplace_back(ctx.WrapByCallableIf(isPg, "FromPg", std::move(child)));
}
YQL_CLOG(DEBUG, Core) << "Converting Pg strings to YQL strings in " << node->Content();
return ctx.ChangeChildren(*node, std::move(converted));
}

if (node->Tail().IsCallable("String") && node->Tail().Head().Content().empty()) {
YQL_CLOG(DEBUG, Core) << node->Content() << " with empty string in second argument";
if (node->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Optional) {
Expand Down
25 changes: 23 additions & 2 deletions ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "extract_predicate_impl.h"

#include <ydb/library/yql/core/type_ann/type_ann_pg.h>
#include <ydb/library/yql/core/yql_expr_type_annotation.h>
#include <ydb/library/yql/core/yql_opt_utils.h>
#include <ydb/library/yql/core/yql_expr_constraint.h>
Expand Down Expand Up @@ -781,6 +782,17 @@ TExprNode::TPtr OptimizeNodeForRangeExtraction(const TExprNode::TPtr& node, cons
}
}

if (node->IsCallable("StartsWith")) {
if (node->Head().IsCallable("FromPg")) {
YQL_CLOG(DEBUG, Core) << "Get rid of FromPg() in " << node->Content() << " first argument";
return ctx.ChangeChild(*node, 0, node->Head().HeadPtr());
}
if (node->Tail().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Pg) {
YQL_CLOG(DEBUG, Core) << "Convert second argument of " << node->Content() << " from PG type";
return ctx.ChangeChild(*node, 1, ctx.NewCallable(node->Tail().Pos(), "FromPg", {node->TailPtr()}));
}
}

return node;
}

Expand Down Expand Up @@ -911,13 +923,22 @@ TExprNode::TPtr BuildSingleComputeRange(const TStructExprType& rowType,

if (opNode->IsCallable("StartsWith")) {
YQL_ENSURE(keys.size() == 1);
return ctx.Builder(pos)
const bool keyIsPg = firstKeyType->GetKind() == ETypeAnnotationKind::Pg;
const TTypeAnnotationNode* rangeForType = firstKeyType;
if (keyIsPg) {
const TTypeAnnotationNode* yqlType = NTypeAnnImpl::FromPgImpl(pos, firstKeyType, ctx);
YQL_ENSURE(yqlType);
rangeForType = yqlType;
YQL_ENSURE(opNode->Tail().GetTypeAnn()->GetKind() != ETypeAnnotationKind::Pg);
}
auto rangeForNode = ctx.Builder(pos)
.Callable("RangeFor")
.Atom(0, hasNot ? "NotStartsWith" : "StartsWith", TNodeFlags::Default)
.Add(1, opNode->TailPtr())
.Add(2, ExpandType(pos, *firstKeyType, ctx))
.Add(2, ExpandType(pos, *rangeForType, ctx))
.Seal()
.Build();
return ctx.WrapByCallableIf(keyIsPg, "RangeToPg", std::move(rangeForNode));
}

if (opNode->IsCallable("SqlIn")) {
Expand Down
1 change: 1 addition & 0 deletions ydb/library/yql/core/extract_predicate/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ SRCS(

PEERDIR(
ydb/library/yql/core/services
ydb/library/yql/core/type_ann
)

YQL_LAST_ABI_VERSION()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7602,6 +7602,7 @@ struct TPeepHoleRules {
{"RangeEmpty", &ExpandRangeEmpty},
{"AsRange", &ExpandAsRange},
{"RangeFor", &ExpandRangeFor},
{"RangeToPg", &ExpandRangeToPg},
{"ToFlow", &DropToFlowDeps},
{"CheckedAdd", &ExpandCheckedAdd},
{"CheckedSub", &ExpandCheckedSub},
Expand Down
71 changes: 66 additions & 5 deletions ydb/library/yql/core/type_ann/type_ann_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3225,14 +3225,32 @@ namespace NTypeAnnImpl {
return IGraphTransformer::TStatus::Repeat;
}

bool isOptional1, isOptional2;
if (const TDataExprType *dataTypeOne, *dataTypeTwo;
!(EnsureDataOrOptionalOfData(input->Head(), isOptional1, dataTypeOne, ctx.Expr) && EnsureDataOrOptionalOfData(input->Tail(), isOptional2, dataTypeTwo, ctx.Expr)
&& EnsureStringOrUtf8Type(input->Head().Pos(), *dataTypeOne, ctx.Expr) && EnsureStringOrUtf8Type(input->Tail().Pos(), *dataTypeTwo, ctx.Expr))) {
if (!EnsureComputable(input->Head(), ctx.Expr) || !EnsureComputable(input->Tail(), ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}

if (isOptional1 || isOptional2)
bool hasOptionals = false;
for (auto& child : input->ChildrenList()) {
const TTypeAnnotationNode* type = child->GetTypeAnn();
if (type->GetKind() == ETypeAnnotationKind::Pg) {
type = FromPgImpl(child->Pos(), type, ctx.Expr);
if (!type) {
return IGraphTransformer::TStatus::Error;
}
}
bool isOptional = false;
const TDataExprType* dataType = nullptr;
if (!IsDataOrOptionalOfData(type, isOptional, dataType) ||
!(dataType->GetSlot() == EDataSlot::String || dataType->GetSlot() == EDataSlot::Utf8))
{
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(child->Pos()), TStringBuilder()
<< "Expected (optional) string/utf8 or corresponding Pg type, but got: " << *child->GetTypeAnn()));
return IGraphTransformer::TStatus::Error;
}
hasOptionals = hasOptionals || isOptional;
}

if (hasOptionals)
input->SetTypeAnn(ctx.Expr.MakeType<TOptionalExprType>(ctx.Expr.MakeType<TDataExprType>(EDataSlot::Bool)));
else
input->SetTypeAnn(ctx.Expr.MakeType<TDataExprType>(EDataSlot::Bool));
Expand Down Expand Up @@ -11107,6 +11125,48 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
return IGraphTransformer::TStatus::Ok;
}

IGraphTransformer::TStatus RangeToPgWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
Y_UNUSED(output);

if (!EnsureArgsCount(*input, 1, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}

if (!EnsureListType(input->Head(), ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}

auto argType = input->Head().GetTypeAnn();
auto rangeType = argType->Cast<TListExprType>()->GetItemType();
if (!EnsureValidRange(input->Head().Pos(), rangeType, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}

auto boundaryType = rangeType->Cast<TTupleExprType>()->GetItems().front();
const auto& boundaryItems = boundaryType->Cast<TTupleExprType>()->GetItems();

TTypeAnnotationNode::TListType resultBoundaryItems;
resultBoundaryItems.reserve(boundaryItems.size());
for (size_t i = 0; i < boundaryItems.size(); ++i) {
if (i % 2 == 0) {
resultBoundaryItems.push_back(boundaryItems[i]);
} else {
auto keyType = boundaryItems[i]->Cast<TOptionalExprType>()->GetItemType();
auto pgKeyType = ToPgImpl(input->Head().Pos(), keyType, ctx.Expr);
if (!pgKeyType) {
return IGraphTransformer::TStatus::Error;
}
resultBoundaryItems.push_back(ctx.Expr.MakeType<TOptionalExprType>(pgKeyType));
}
}

const TTypeAnnotationNode* resultBoundaryType = ctx.Expr.MakeType<TTupleExprType>(resultBoundaryItems);
const TTypeAnnotationNode* resultRangeType =
ctx.Expr.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{resultBoundaryType, resultBoundaryType});
input->SetTypeAnn(ctx.Expr.MakeType<TListExprType>(resultRangeType));
return IGraphTransformer::TStatus::Ok;
}

IGraphTransformer::TStatus RangeCreateWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
Y_UNUSED(output);

Expand Down Expand Up @@ -12164,6 +12224,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
ExtFunctions["OrderedSqlRename"] = &SqlRenameWrapper;

Functions["AsRange"] = &AsRangeWrapper;
Functions["RangeToPg"] = &RangeToPgWrapper;
Functions["RangeCreate"] = &RangeCreateWrapper;
Functions["RangeEmpty"] = &RangeEmptyWrapper;
Functions["RangeFor"] = &RangeForWrapper;
Expand Down
47 changes: 47 additions & 0 deletions ydb/library/yql/core/yql_opt_range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,4 +519,51 @@ TExprNode::TPtr ExpandRangeFor(const TExprNode::TPtr& node, TExprContext& ctx) {
return result;
}

TExprNode::TPtr ExpandRangeToPg(const TExprNode::TPtr& node, TExprContext& ctx) {
YQL_ENSURE(node->IsCallable("RangeToPg"));
const size_t numComponents = node->Head().GetTypeAnn()->Cast<TListExprType>()->GetItemType()->
Cast<TTupleExprType>()->GetItems().front()->Cast<TTupleExprType>()->GetSize();
return ctx.Builder(node->Pos())
.Callable("OrderedMap")
.Add(0, node->HeadPtr())
.Lambda(1)
.Param("range")
.Callable("StaticMap")
.Arg(0, "range")
.Lambda(1)
.Param("boundary")
.List()
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
for (size_t i = 0; i < numComponents; ++i) {
if (i % 2 == 0) {
parent
.Callable(i, "Nth")
.Arg(0, "boundary")
.Atom(1, i)
.Seal();
} else {
parent
.Callable(i, "Map")
.Callable(0, "Nth")
.Arg(0, "boundary")
.Atom(1, i)
.Seal()
.Lambda(1)
.Param("unwrapped")
.Callable("ToPg")
.Arg(0, "unwrapped")
.Seal()
.Seal()
.Seal();
}
}
return parent;
})
.Seal()
.Seal()
.Seal()
.Seal()
.Seal()
.Build();
}
}
1 change: 1 addition & 0 deletions ydb/library/yql/core/yql_opt_range.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ namespace NYql {
TExprNode::TPtr ExpandRangeEmpty(const TExprNode::TPtr& node, TExprContext& ctx);
TExprNode::TPtr ExpandAsRange(const TExprNode::TPtr& node, TExprContext& ctx);
TExprNode::TPtr ExpandRangeFor(const TExprNode::TPtr& node, TExprContext& ctx);
TExprNode::TPtr ExpandRangeToPg(const TExprNode::TPtr& node, TExprContext& ctx);

}

14 changes: 14 additions & 0 deletions ydb/library/yql/tests/sql/sql2yql/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -3940,6 +3940,13 @@
"uri": "https://{canondata_backend}/1773845/fe2146df711e0729e3c3cc1bc9b2c5b1fdfcfea1/resource.tar.gz#test_sql2yql.test_compute_range-pg_sqlin_/sql.yql"
}
],
"test_sql2yql.test[compute_range-pg_startswith]": [
{
"checksum": "f2e42e95b7b84fd210244e0c61c3f614",
"size": 4450,
"uri": "https://{canondata_backend}/1031349/96841816c51116681477e138bb81b6493013c777/resource.tar.gz#test_sql2yql.test_compute_range-pg_startswith_/sql.yql"
}
],
"test_sql2yql.test[compute_range-preserve_rest_predicates_order]": [
{
"checksum": "4915841ad83886d7f63fe939e0848687",
Expand Down Expand Up @@ -21230,6 +21237,13 @@
"uri": "https://{canondata_backend}/1773845/fe2146df711e0729e3c3cc1bc9b2c5b1fdfcfea1/resource.tar.gz#test_sql_format.test_compute_range-pg_sqlin_/formatted.sql"
}
],
"test_sql_format.test[compute_range-pg_startswith]": [
{
"checksum": "b06b88f1965f643fea24cb7e5d8d0459",
"size": 955,
"uri": "https://{canondata_backend}/1031349/96841816c51116681477e138bb81b6493013c777/resource.tar.gz#test_sql_format.test_compute_range-pg_startswith_/formatted.sql"
}
],
"test_sql_format.test[compute_range-preserve_rest_predicates_order]": [
{
"checksum": "77cd36176a336f2a79ee10f5697b124f",
Expand Down
36 changes: 36 additions & 0 deletions ydb/library/yql/tests/sql/suites/compute_range/pg_startswith.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/* syntax version 1 */
/* postgres can not */
/* dq can not */
/* dqfile can not */
/* yt can not */
pragma warning("disable", "4510");
pragma warning("disable", "1108");

-- like 'aaaa'
select YQL::RangeComputeFor(
Struct<a:PgInt4,b:PgText>,
($row) -> (StartsWith(FromPg($row.b), 'aaaa') ?? false),
AsTuple(AsAtom("b"))
);

-- not like 'aaaa'
select YQL::RangeComputeFor(
Struct<a:PgInt4,b:PgText>,
($row) -> (not (StartsWith(FromPg($row.b), 'aaaa') ?? true)),
AsTuple(AsAtom("b"))
);


-- like <invalid utf8>
select YQL::RangeComputeFor(
Struct<a:PgInt4,b:PgText>,
($row) -> (StartsWith(FromPg($row.b), 'a\xf5') ?? false),
AsTuple(AsAtom("b"))
);

-- not like <invalid utf8>
select YQL::RangeComputeFor(
Struct<a:PgInt4,b:PgText>,
($row) -> (not (StartsWith(FromPg($row.b), 'a\xf5') ?? true)),
AsTuple(AsAtom("b"))
);
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,27 @@
"uri": "https://{canondata_backend}/1942671/812d348532a02502eb8901f04707aeea3f495e62/resource.tar.gz#test.test_compute_range-multiply_limit_with_dups-default.txt-Results_/results.txt"
}
],
"test.test[compute_range-pg_startswith-default.txt-Debug]": [
{
"checksum": "fb20b05a49ae3533e4b581ad09bc01f4",
"size": 1242,
"uri": "https://{canondata_backend}/1781765/f97b29106f835508c9465d1d8ba8cc89cdfb0bdc/resource.tar.gz#test.test_compute_range-pg_startswith-default.txt-Debug_/opt.yql"
}
],
"test.test[compute_range-pg_startswith-default.txt-Plan]": [
{
"checksum": "55515ae638f317612d048052be489bfd",
"size": 1740,
"uri": "https://{canondata_backend}/1781765/f97b29106f835508c9465d1d8ba8cc89cdfb0bdc/resource.tar.gz#test.test_compute_range-pg_startswith-default.txt-Plan_/plan.txt"
}
],
"test.test[compute_range-pg_startswith-default.txt-Results]": [
{
"checksum": "cc3057a2f21b5e8e4ef004621d352021",
"size": 19014,
"uri": "https://{canondata_backend}/1781765/f97b29106f835508c9465d1d8ba8cc89cdfb0bdc/resource.tar.gz#test.test_compute_range-pg_startswith-default.txt-Results_/results.txt"
}
],
"test.test[count-count_all-default.txt-Debug]": [
{
"checksum": "4545bbb3b7c7d6ac6fbcccdae8916f50",
Expand Down

0 comments on commit 085a2e5

Please sign in to comment.