From fb1c9feeeec09d26bc23e8f2d921789fe703b857 Mon Sep 17 00:00:00 2001 From: Vitaly Stoyan Date: Thu, 24 Oct 2024 15:54:58 +0300 Subject: [PATCH 1/4] init --- .../yql/core/type_ann/type_ann_core.cpp | 7 +- ydb/library/yql/core/yql_type_annotation.h | 1 + ydb/library/yql/core/yql_udf_index.cpp | 123 ++++++++++++++++-- ydb/library/yql/core/yql_udf_index.h | 19 ++- ydb/library/yql/core/yql_udf_resolver.h | 1 + .../udf_resolve/yql_outproc_udf_resolver.cpp | 1 + .../udf_resolve/yql_simple_udf_resolver.cpp | 1 + .../yql_udf_resolver_with_index.cpp | 39 ++++-- .../providers/config/yql_config_provider.cpp | 14 +- ydb/library/yql/tests/common/udf_test/test.py | 5 +- .../datetime2/test/canondata/result.json | 5 + .../test.test_IgnoreCaseFuncs_/results.txt | 36 +++++ .../datetime2/test/cases/IgnoreCaseFuncs.cfg | 1 + .../datetime2/test/cases/IgnoreCaseFuncs.sql | 3 + .../common/yson2/test/canondata/result.json | 13 +- .../test.test_IgnoreCaseFuncs_/results.txt | 65 +++++++++ .../yson2/test/cases/IgnoreCaseFuncs.cfg | 2 + .../yson2/test/cases/IgnoreCaseFuncs.sql | 3 + 18 files changed, 306 insertions(+), 33 deletions(-) create mode 100644 ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_IgnoreCaseFuncs_/results.txt create mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/IgnoreCaseFuncs.cfg create mode 100644 ydb/library/yql/udfs/common/datetime2/test/cases/IgnoreCaseFuncs.sql create mode 100644 ydb/library/yql/udfs/common/yson2/test/canondata/test.test_IgnoreCaseFuncs_/results.txt create mode 100644 ydb/library/yql/udfs/common/yson2/test/cases/IgnoreCaseFuncs.cfg create mode 100644 ydb/library/yql/udfs/common/yson2/test/cases/IgnoreCaseFuncs.sql diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index fb7239de3edf..b7fbd068b213 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -7590,11 +7590,16 @@ template return IGraphTransformer::TStatus::Error; } + cached.NormalizedName = description.NormalizedName; cached.FunctionType = description.CallableType; cached.RunConfigType = description.RunConfigType ? description.RunConfigType : ctx.Expr.MakeType(); cached.NormalizedUserType = description.NormalizedUserType ? description.NormalizedUserType : ctx.Expr.MakeType(); cached.SupportsBlocks = description.SupportsBlocks; cached.IsStrict = description.IsStrict; + + if (name != cached.NormalizedName) { + ctx.Types.UdfTypeCache[std::make_tuple(cached.NormalizedName, TString(typeConfig), userType)] = cached; + } } TStringBuf typeConfig = ""; @@ -7623,7 +7628,7 @@ template TStringBuf fileAlias = udfInfo ? udfInfo->FileAlias : ""_sb; auto ret = ctx.Expr.Builder(input->Pos()) .Callable("Udf") - .Add(0, input->HeadPtr()) + .Atom(0, cached.NormalizedName) .Add(1, runConfigValue) .Add(2, ExpandType(input->Pos(), *cached.NormalizedUserType, ctx.Expr)) .Atom(3, typeConfig) diff --git a/ydb/library/yql/core/yql_type_annotation.h b/ydb/library/yql/core/yql_type_annotation.h index 7b5b26b05087..e92bb923d1ea 100644 --- a/ydb/library/yql/core/yql_type_annotation.h +++ b/ydb/library/yql/core/yql_type_annotation.h @@ -273,6 +273,7 @@ enum class EBlockEngineMode { }; struct TUdfCachedInfo { + TString NormalizedName; const TTypeAnnotationNode* FunctionType = nullptr; const TTypeAnnotationNode* RunConfigType = nullptr; const TTypeAnnotationNode* NormalizedUserType = nullptr; diff --git a/ydb/library/yql/core/yql_udf_index.cpp b/ydb/library/yql/core/yql_udf_index.cpp index fa4b2a668ee0..7484951945dd 100644 --- a/ydb/library/yql/core/yql_udf_index.cpp +++ b/ydb/library/yql/core/yql_udf_index.cpp @@ -80,40 +80,133 @@ void AddResolveResultToRegistry(const TResolveResult& resolveResult, const TMap< TUdfIndex::TUdfIndex() { } -TUdfIndex::TUdfIndex(const TMap& resources) +void TUdfIndex::SetCaseSentiveSearch(bool caseSensitive) { + CaseSensitive_ = caseSensitive; +} + +TUdfIndex::TUdfIndex(const TMap& resources, bool caseSensitive) : Resources_(resources) + , CaseSensitive_(caseSensitive) { - + for (const auto& x : Resources_) { + ICaseModules_[to_lower(x.first)].insert(x.first); + } } -bool TUdfIndex::ContainsModule(const TString& moduleName) const { +bool TUdfIndex::ContainsModuleStrict(const TString& moduleName) const { return Resources_.contains(moduleName); } +bool TUdfIndex::CanonizeModule(TString& moduleName) const { + if (Resources_.contains(moduleName)) { + return true; + } + + if (CaseSensitive_) { + return false; + } + + auto p = ICaseModules_.FindPtr(to_lower(moduleName)); + if (!p) { + return false; + } + + Y_ENSURE(p->size() > 0); + if (p->size() > 1) { + return false; + } + + moduleName = *p->begin(); + return true; +} + +TUdfIndex::EStatus TUdfIndex::ContainsModule(const TString& moduleName) const { + if (Resources_.contains(moduleName)) { + return EStatus::Found; + } + + if (CaseSensitive_) { + return EStatus::NotFound; + } + + auto p = ICaseModules_.FindPtr(to_lower(moduleName)); + if (!p) { + return EStatus::NotFound; + } + + Y_ENSURE(p->size() > 0); + return p->size() > 1 ? EStatus::Ambigious : EStatus::Found; +} + bool TUdfIndex::ContainsAnyModule(const TSet& modules) const { return AnyOf(modules, [this](auto& m) { - return this->ContainsModule(m); + return Resources_.contains(m); }); } -bool TUdfIndex::FindFunction(const TString& moduleName, const TString& functionName, TFunctionInfo& function) const { - auto r = FindResourceByModule(moduleName); +TUdfIndex::EStatus TUdfIndex::FindFunction(const TString& moduleName, const TString& functionName, TFunctionInfo& function) const { + auto r = Resources_.FindPtr(moduleName); if (!r) { - return false; + if (CaseSensitive_) { + return EStatus::NotFound; + } + + auto p = ICaseModules_.FindPtr(to_lower(moduleName)); + if (!p) { + return EStatus::NotFound; + } + + Y_ENSURE(p->size() > 0); + if (p->size() > 1) { + return EStatus::Ambigious; + } + + r = Resources_.FindPtr(*p->begin()); + Y_ENSURE(r); } - auto f = r->Functions.FindPtr(functionName); + auto f = (*r)->Functions.FindPtr(functionName); if (!f) { - return false; + if (CaseSensitive_) { + return EStatus::NotFound; + } + + auto p = (*r)->ICaseFuncNames.FindPtr(to_lower(functionName)); + if (!p) { + return EStatus::NotFound; + } + + Y_ENSURE(p->size() > 0); + if (p->size() > 1) { + return EStatus::Ambigious; + } + + f = (*r)->Functions.FindPtr(*p->begin()); + Y_ENSURE(f); } function = *f; - return true; + return EStatus::Found; } TResourceInfo::TPtr TUdfIndex::FindResourceByModule(const TString& moduleName) const { auto p = Resources_.FindPtr(moduleName); - return p ? *p : nullptr; + if (!p) { + if (CaseSensitive_) { + return nullptr; + } + + auto n = ICaseModules_.FindPtr(to_lower(moduleName)); + Y_ENSURE(n->size() > 0); + if (n->size() > 1) { + return nullptr; + } + + p = Resources_.FindPtr(*n->begin()); + Y_ENSURE(p); + } + + return *p; } TSet TUdfIndex::FindResourcesByModules(const TSet& modules) const { @@ -130,6 +223,11 @@ TSet TUdfIndex::FindResourcesByModules(const TSet& void TUdfIndex::UnregisterResource(TResourceInfo::TPtr resource) { for (auto& m : resource->Modules) { Resources_.erase(m); + auto& names = ICaseModules_[to_lower(m)]; + names.erase(m); + if (names.empty()) { + ICaseModules_.erase(to_lower(m)); + } } // resource pointer should be alive here to avoid problems with erase } @@ -170,11 +268,12 @@ void TUdfIndex::RegisterResource(const TResourceInfo::TPtr& resource, EOverrideM for (auto& m : resource->Modules) { Resources_.emplace(m, resource); + ICaseModules_[to_lower(m)].insert(m); } } TIntrusivePtr TUdfIndex::Clone() const { - return new TUdfIndex(Resources_); + return new TUdfIndex(Resources_, CaseSensitive_); } void TUdfIndex::RegisterResources(const TVector& resources, EOverrideMode mode) { diff --git a/ydb/library/yql/core/yql_udf_index.h b/ydb/library/yql/core/yql_udf_index.h index 41896d191da1..1ac882bd81eb 100644 --- a/ydb/library/yql/core/yql_udf_index.h +++ b/ydb/library/yql/core/yql_udf_index.h @@ -72,10 +72,12 @@ struct TResourceInfo : public TThrRefBase { TDownloadLink Link; TSet Modules; TMap Functions; + TMap> ICaseFuncNames; void SetFunctions(const TVector& functions) { for (auto& f : functions) { Functions.emplace(f.Name, f); + ICaseFuncNames[to_lower(f.Name)].insert(f.Name); } } }; @@ -96,12 +98,21 @@ class TUdfIndex : public TThrRefBase { RaiseError }; + enum class EStatus { + Found, + NotFound, + Ambigious + }; + public: TUdfIndex(); - bool ContainsModule(const TString& moduleName) const; - bool FindFunction(const TString& moduleName, const TString& functionName, TFunctionInfo& function) const; + void SetCaseSentiveSearch(bool caseSensitive); + bool CanonizeModule(TString& moduleName) const; + EStatus ContainsModule(const TString& moduleName) const; + EStatus FindFunction(const TString& moduleName, const TString& functionName, TFunctionInfo& function) const; TResourceInfo::TPtr FindResourceByModule(const TString& moduleName) const; + bool ContainsModuleStrict(const TString& moduleName) const; /* New resource can contain already registered module. In this case 'mode' will be used to resolve conflicts. @@ -114,7 +125,7 @@ class TUdfIndex : public TThrRefBase { TIntrusivePtr Clone() const; private: - explicit TUdfIndex(const TMap& resources); + explicit TUdfIndex(const TMap& resources, bool caseSensitive); bool ContainsAnyModule(const TSet& modules) const; TSet FindResourcesByModules(const TSet& modules) const; @@ -123,6 +134,8 @@ class TUdfIndex : public TThrRefBase { private: // module => Resource TMap Resources_; + bool CaseSensitive_ = true; + TMap> ICaseModules_; }; void LoadRichMetadataToUdfIndex(const IUdfResolver& resolver, const TVector& paths, bool isTrusted, TUdfIndex::EOverrideMode mode, TUdfIndex& registry); diff --git a/ydb/library/yql/core/yql_udf_resolver.h b/ydb/library/yql/core/yql_udf_resolver.h index 68c0ba0525a9..593032c70ecb 100644 --- a/ydb/library/yql/core/yql_udf_resolver.h +++ b/ydb/library/yql/core/yql_udf_resolver.h @@ -42,6 +42,7 @@ class IUdfResolver : public TThrRefBase { THashMap SecureParams; // output + TString NormalizedName; const TTypeAnnotationNode* NormalizedUserType = nullptr; const TTypeAnnotationNode* RunConfigType = nullptr; const TTypeAnnotationNode* CallableType = nullptr; diff --git a/ydb/library/yql/providers/common/udf_resolve/yql_outproc_udf_resolver.cpp b/ydb/library/yql/providers/common/udf_resolve/yql_outproc_udf_resolver.cpp index 42df4d8753f0..f22f86374a25 100644 --- a/ydb/library/yql/providers/common/udf_resolve/yql_outproc_udf_resolver.cpp +++ b/ydb/library/yql/providers/common/udf_resolve/yql_outproc_udf_resolver.cpp @@ -346,6 +346,7 @@ class TOutProcUdfResolver : public IUdfResolver { ctx.AddError(TIssue(udf->Pos, udfRes.GetError())); hasErrors = true; } else { + udf->NormalizedName = udf->Name; udf->CallableType = ParseTypeFromYson(TStringBuf{udfRes.GetCallableType()}, ctx, udf->Pos); if (!udf->CallableType) { hasErrors = true; diff --git a/ydb/library/yql/providers/common/udf_resolve/yql_simple_udf_resolver.cpp b/ydb/library/yql/providers/common/udf_resolve/yql_simple_udf_resolver.cpp index fa221333ac49..c2a41231f1ca 100644 --- a/ydb/library/yql/providers/common/udf_resolve/yql_simple_udf_resolver.cpp +++ b/ydb/library/yql/providers/common/udf_resolve/yql_simple_udf_resolver.cpp @@ -200,6 +200,7 @@ bool LoadFunctionsMetadata(const TVector& functions, continue; } + udf.NormalizedName = udf.Name; udf.CallableType = ConvertMiniKQLType(udf.Pos, funcInfo.FunctionType, ctx); YQL_ENSURE(udf.CallableType); if (funcInfo.RunConfigType) { diff --git a/ydb/library/yql/providers/common/udf_resolve/yql_udf_resolver_with_index.cpp b/ydb/library/yql/providers/common/udf_resolve/yql_udf_resolver_with_index.cpp index 20bf53786d33..395d0d81a12c 100644 --- a/ydb/library/yql/providers/common/udf_resolve/yql_udf_resolver_with_index.cpp +++ b/ydb/library/yql/providers/common/udf_resolve/yql_udf_resolver_with_index.cpp @@ -69,7 +69,7 @@ class TUdfResolverWithIndex : public IUdfResolver { TMaybe GetSystemModulePath(const TStringBuf& moduleName) const override { with_lock(Lock_) { TString moduleNameStr(moduleName); - if (!UdfIndex_->ContainsModule(moduleNameStr)) { + if (!UdfIndex_->ContainsModuleStrict(moduleNameStr)) { return Nothing(); } @@ -115,7 +115,7 @@ class TUdfResolverWithIndex : public IUdfResolver { bool ContainsModule(const TStringBuf& moduleName) const override { TString moduleNameStr = TString(moduleName); - if (UdfIndex_->ContainsModule(moduleNameStr)) { + if (UdfIndex_->ContainsModuleStrict(moduleNameStr)) { return true; } @@ -142,17 +142,29 @@ class TUdfResolverWithIndex : public IUdfResolver { */ TString moduleNameStr = TString(moduleName); - if (!UdfIndex_->ContainsModule(moduleNameStr)) { + auto moduleStatus = UdfIndex_->ContainsModule(moduleNameStr); + if (moduleStatus == TUdfIndex::EStatus::NotFound) { fallbackFunction = &function; return true; } + if (moduleStatus == TUdfIndex::EStatus::Ambigious) { + ctx.AddError(TIssue(function.Pos, TStringBuilder() << "Ambigious module name: " << moduleName)); + return false; + } + TFunctionInfo info; - if (!UdfIndex_->FindFunction(moduleNameStr, function.Name, info)) { + auto functionStatus = UdfIndex_->FindFunction(moduleNameStr, function.Name, info); + if (functionStatus == TUdfIndex::EStatus::NotFound) { ctx.AddError(TIssue(function.Pos, TStringBuilder() << "Function not found: " << function.Name)); return false; } + if (functionStatus == TUdfIndex::EStatus::Ambigious) { + ctx.AddError(TIssue(function.Pos, TStringBuilder() << "Ambigious function: " << function.Name)); + return false; + } + TResourceFile::TPtr file = DownloadFileWithModule(moduleName, function.Pos, ctx); if (!file) { return false; @@ -161,6 +173,7 @@ class TUdfResolverWithIndex : public IUdfResolver { additionalImport = &file->Import_; if (info.IsTypeAwareness) { + function.Name = info.Name; fallbackFunction = &function; return true; } @@ -170,6 +183,7 @@ class TUdfResolverWithIndex : public IUdfResolver { return false; } + function.NormalizedName = info.Name; function.CallableType = ParseTypeFromYson(TStringBuf{info.CallableType}, ctx, function.Pos); if (!function.CallableType) { ctx.AddError(TIssue(function.Pos, TStringBuilder() << "Failed to build callable type from YSON for function " << function.Name)); @@ -205,26 +219,29 @@ class TUdfResolverWithIndex : public IUdfResolver { TResourceFile::TPtr DownloadFileWithModule(const TStringBuf& module) const { TString moduleName(module); - const auto it = DownloadedFiles_.find(module); - if (it != DownloadedFiles_.end()) { - return it->second; - } - auto resource = UdfIndex_->FindResourceByModule(moduleName); if (!resource) { ythrow yexception() << "No resource has been found for registered module " << moduleName; } + auto canonizedModuleName = moduleName; + Y_ENSURE(UdfIndex_->CanonizeModule(canonizedModuleName)); + + const auto it = DownloadedFiles_.find(canonizedModuleName); + if (it != DownloadedFiles_.end()) { + return it->second; + } + // token is empty for urls for now // assumption: file path is frozen already, no need to put into file storage const TDownloadLink& downloadLink = resource->Link; TFileLinkPtr link = downloadLink.IsUrl ? FileStorage_->PutUrl(downloadLink.Path, {}) : CreateFakeFileLink(downloadLink.Path, downloadLink.Md5); - TResourceFile::TPtr file = TResourceFile::Create(moduleName, resource->Modules, link); + TResourceFile::TPtr file = TResourceFile::Create(canonizedModuleName, resource->Modules, link); for (auto& d : resource->Modules) { auto p = DownloadedFiles_.emplace(d, file); if (!p.second) { // should not happen because UdfIndex handles conflicts - ythrow yexception() << "file already downloaded for module " << moduleName << ", conflicting path " << downloadLink.Path << ", existing local file " << p.first->second->Link_->GetPath(); + ythrow yexception() << "file already downloaded for module " << canonizedModuleName << ", conflicting path " << downloadLink.Path << ", existing local file " << p.first->second->Link_->GetPath(); } } diff --git a/ydb/library/yql/providers/config/yql_config_provider.cpp b/ydb/library/yql/providers/config/yql_config_provider.cpp index 4346a072b228..b3cab30b179c 100644 --- a/ydb/library/yql/providers/config/yql_config_provider.cpp +++ b/ydb/library/yql/providers/config/yql_config_provider.cpp @@ -749,7 +749,19 @@ namespace { return false; } } - else if (name == "DqEngine") { + else if (name == "UdfIgnoreCase" || name == "UdfStrictCase") { + if (args.size() != 0) { + ctx.AddError(TIssue(pos, TStringBuilder() << "Expected no arguments, but got " << args.size())); + return false; + } + + if (!Types.UdfIndex) { + ctx.AddError(TIssue(pos, "UdfIndex is not available")); + return false; + } + + Types.UdfIndex->SetCaseSentiveSearch(name == "UdfStrictCase"); + } else if (name == "DqEngine") { if (args.size() != 1) { ctx.AddError(TIssue(pos, TStringBuilder() << "Expected at most 1 argument, but got " << args.size())); return false; diff --git a/ydb/library/yql/tests/common/udf_test/test.py b/ydb/library/yql/tests/common/udf_test/test.py index b709daa61a89..9affa81e9006 100644 --- a/ydb/library/yql/tests/common/udf_test/test.py +++ b/ydb/library/yql/tests/common/udf_test/test.py @@ -56,11 +56,14 @@ def test(case): cfg = yql_utils.get_program_cfg(None, case, DATA_PATH) files = {} diff_tool = None + scan_udfs = False for item in cfg: if item[0] == 'file': files[item[1]] = item[2] if item[0] == 'diff_tool': diff_tool = item[1:] + if item[0] == 'scan_udfs': + scan_udfs = True in_tables = yql_utils.get_input_tables(None, cfg, DATA_PATH, def_attr=yql_utils.KSV_ATTR) @@ -90,7 +93,7 @@ def test(case): check_error=not xfail, extra_env=extra_env, require_udf_resolver=True, - scan_udfs=False + scan_udfs=scan_udfs ) if xfail: diff --git a/ydb/library/yql/udfs/common/datetime2/test/canondata/result.json b/ydb/library/yql/udfs/common/datetime2/test/canondata/result.json index 6e475365ea6f..0ab7651bdcb8 100644 --- a/ydb/library/yql/udfs/common/datetime2/test/canondata/result.json +++ b/ydb/library/yql/udfs/common/datetime2/test/canondata/result.json @@ -54,6 +54,11 @@ "uri": "file://test.test_Get_/results.txt" } ], + "test.test[IgnoreCaseFuncs]": [ + { + "uri": "file://test.test_IgnoreCaseFuncs_/results.txt" + } + ], "test.test[ImplicitSplit]": [ { "uri": "file://test.test_ImplicitSplit_/results.txt" diff --git a/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_IgnoreCaseFuncs_/results.txt b/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_IgnoreCaseFuncs_/results.txt new file mode 100644 index 000000000000..2da9f03e5c9a --- /dev/null +++ b/ydb/library/yql/udfs/common/datetime2/test/canondata/test.test_IgnoreCaseFuncs_/results.txt @@ -0,0 +1,36 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Uint16" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Uint8" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "2001"; + "1" + ] + ] + } + ] + } +] \ No newline at end of file diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/IgnoreCaseFuncs.cfg b/ydb/library/yql/udfs/common/datetime2/test/cases/IgnoreCaseFuncs.cfg new file mode 100644 index 000000000000..9a686c75c4f7 --- /dev/null +++ b/ydb/library/yql/udfs/common/datetime2/test/cases/IgnoreCaseFuncs.cfg @@ -0,0 +1 @@ +scan_udfs \ No newline at end of file diff --git a/ydb/library/yql/udfs/common/datetime2/test/cases/IgnoreCaseFuncs.sql b/ydb/library/yql/udfs/common/datetime2/test/cases/IgnoreCaseFuncs.sql new file mode 100644 index 000000000000..4378b0a6a628 --- /dev/null +++ b/ydb/library/yql/udfs/common/datetime2/test/cases/IgnoreCaseFuncs.sql @@ -0,0 +1,3 @@ +pragma config.flags("UdfIgnoreCase"); +select + DATETIME::GETYEAR(Date('2001-01-01')),datetime::getmonth(Date('2001-01-01')) diff --git a/ydb/library/yql/udfs/common/yson2/test/canondata/result.json b/ydb/library/yql/udfs/common/yson2/test/canondata/result.json index e8db385e4542..962904ec60a3 100644 --- a/ydb/library/yql/udfs/common/yson2/test/canondata/result.json +++ b/ydb/library/yql/udfs/common/yson2/test/canondata/result.json @@ -94,6 +94,11 @@ "uri": "file://test.test_GoodForYsonBadForJson_/results.txt" } ], + "test.test[IgnoreCaseFuncs]": [ + { + "uri": "file://test.test_IgnoreCaseFuncs_/results.txt" + } + ], "test.test[ImplicitFromRes]": [ { "uri": "file://test.test_ImplicitFromRes_/results.txt" @@ -109,14 +114,14 @@ "uri": "file://test.test_JsonSerializeSkipMapEntity_/results.txt" } ], - "test.test[JsonWithUtf8]": [ + "test.test[JsonWithNanAsString]": [ { - "uri": "file://test.test_JsonWithUtf8_/results.txt" + "uri": "file://test.test_JsonWithNanAsString_/results.txt" } ], - "test.test[JsonWithNanAsString]": [ + "test.test[JsonWithUtf8]": [ { - "uri": "file://test.test_JsonWithNanAsString_/results.txt" + "uri": "file://test.test_JsonWithUtf8_/results.txt" } ], "test.test[Lists]": [ diff --git a/ydb/library/yql/udfs/common/yson2/test/canondata/test.test_IgnoreCaseFuncs_/results.txt b/ydb/library/yql/udfs/common/yson2/test/canondata/test.test_IgnoreCaseFuncs_/results.txt new file mode 100644 index 000000000000..b6d6e7d1db10 --- /dev/null +++ b/ydb/library/yql/udfs/common/yson2/test/canondata/test.test_IgnoreCaseFuncs_/results.txt @@ -0,0 +1,65 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ]; + [ + "column1"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [] + ]; + {}; + { + "$type" = "int64"; + "$value" = "1" + }; + [ + "3" + ] + ] + ] + } + ] + } +] \ No newline at end of file diff --git a/ydb/library/yql/udfs/common/yson2/test/cases/IgnoreCaseFuncs.cfg b/ydb/library/yql/udfs/common/yson2/test/cases/IgnoreCaseFuncs.cfg new file mode 100644 index 000000000000..135ef7371d89 --- /dev/null +++ b/ydb/library/yql/udfs/common/yson2/test/cases/IgnoreCaseFuncs.cfg @@ -0,0 +1,2 @@ +scan_udfs + diff --git a/ydb/library/yql/udfs/common/yson2/test/cases/IgnoreCaseFuncs.sql b/ydb/library/yql/udfs/common/yson2/test/cases/IgnoreCaseFuncs.sql new file mode 100644 index 000000000000..d57d650984e4 --- /dev/null +++ b/ydb/library/yql/udfs/common/yson2/test/cases/IgnoreCaseFuncs.sql @@ -0,0 +1,3 @@ +pragma config.flags("UdfIgnoreCase"); +select + YSON::PARSE('[]'),yson::parse('{}'y),yson::from(1),yson::getlength('[1;2;3]'y); From 3baf09e91a26f27622112d510a8bfbdfc8815809 Mon Sep 17 00:00:00 2001 From: Vitaly Stoyan Date: Thu, 24 Oct 2024 16:54:24 +0300 Subject: [PATCH 2/4] test --- ydb/library/yql/core/ut/yql_udf_index_ut.cpp | 109 ++++++++++++++----- ydb/library/yql/core/yql_udf_index.cpp | 4 + 2 files changed, 85 insertions(+), 28 deletions(-) diff --git a/ydb/library/yql/core/ut/yql_udf_index_ut.cpp b/ydb/library/yql/core/ut/yql_udf_index_ut.cpp index c0c6fc939753..66a8f957ec98 100644 --- a/ydb/library/yql/core/ut/yql_udf_index_ut.cpp +++ b/ydb/library/yql/core/ut/yql_udf_index_ut.cpp @@ -43,7 +43,7 @@ void EnsureLinksEqual(const TDownloadLink& link1, const TDownloadLink& link2) { void EnsureContainsFunction(TUdfIndex::TPtr index, TString module, const TFunctionInfo& f) { TFunctionInfo existingFunc; - UNIT_ASSERT(index->FindFunction(module, f.Name, existingFunc)); + UNIT_ASSERT(index->FindFunction(module, f.Name, existingFunc) == TUdfIndex::EStatus::Found); EnsureFunctionsEqual(f, existingFunc); } } @@ -52,15 +52,15 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) { Y_UNIT_TEST(Empty) { auto index1 = MakeIntrusive(); - UNIT_ASSERT(!index1->ContainsModule("M1")); + UNIT_ASSERT_EQUAL(index1->ContainsModule("M1"), TUdfIndex::EStatus::NotFound); UNIT_ASSERT(index1->FindResourceByModule("M1") == nullptr); TFunctionInfo f1; - UNIT_ASSERT(!index1->FindFunction("M1", "M1.F1", f1)); + UNIT_ASSERT_EQUAL(index1->FindFunction("M1", "M1.F1", f1), TUdfIndex::EStatus::NotFound); auto index2 = index1->Clone(); - UNIT_ASSERT(!index2->ContainsModule("M1")); + UNIT_ASSERT_EQUAL(index2->ContainsModule("M1"), TUdfIndex::EStatus::NotFound); UNIT_ASSERT(index2->FindResourceByModule("M1") == nullptr); - UNIT_ASSERT(!index2->FindFunction("M1", "M1.F1", f1)); + UNIT_ASSERT_EQUAL(index2->FindFunction("M1", "M1.F1", f1), TUdfIndex::EStatus::NotFound); } Y_UNIT_TEST(SingleModuleAndFunction) { @@ -72,8 +72,8 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) { b.AddFunction(func1); index1->RegisterResource(b.Build(), TUdfIndex::EOverrideMode::RaiseError); - UNIT_ASSERT(index1->ContainsModule("M1")); - UNIT_ASSERT(!index1->ContainsModule("M2")); + UNIT_ASSERT_EQUAL(index1->ContainsModule("M1"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index1->ContainsModule("M2"), TUdfIndex::EStatus::NotFound); UNIT_ASSERT(index1->FindResourceByModule("M2") == nullptr); auto resource1 = index1->FindResourceByModule("M1"); @@ -81,19 +81,19 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) { EnsureLinksEqual(resource1->Link, link1); TFunctionInfo f1; - UNIT_ASSERT(!index1->FindFunction("M2", "M2.F1", f1)); + UNIT_ASSERT_EQUAL(index1->FindFunction("M2", "M2.F1", f1), TUdfIndex::EStatus::NotFound); - UNIT_ASSERT(index1->FindFunction("M1", "M1.F1", f1)); + UNIT_ASSERT_EQUAL(index1->FindFunction("M1", "M1.F1", f1), TUdfIndex::EStatus::Found); EnsureFunctionsEqual(f1, func1); // ensure both indexes contain the same info auto index2 = index1->Clone(); - UNIT_ASSERT(index1->ContainsModule("M1")); - UNIT_ASSERT(index2->ContainsModule("M1")); + UNIT_ASSERT_EQUAL(index1->ContainsModule("M1"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index2->ContainsModule("M1"), TUdfIndex::EStatus::Found); TFunctionInfo f2; - UNIT_ASSERT(index2->FindFunction("M1", "M1.F1", f2)); + UNIT_ASSERT_EQUAL(index2->FindFunction("M1", "M1.F1", f2), TUdfIndex::EStatus::Found); EnsureFunctionsEqual(f1, f2); auto resource2 = index2->FindResourceByModule("M1"); @@ -140,11 +140,11 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) { EnsureLinksEqual(r22->Link, link2); // check modules - UNIT_ASSERT(index1->ContainsModule("M1")); - UNIT_ASSERT(index1->ContainsModule("M2")); - UNIT_ASSERT(index1->ContainsModule("M3")); - UNIT_ASSERT(index1->ContainsModule("M4")); - UNIT_ASSERT(!index1->ContainsModule("M5")); + UNIT_ASSERT_EQUAL(index1->ContainsModule("M1"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index1->ContainsModule("M2"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index1->ContainsModule("M3"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index1->ContainsModule("M4"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index1->ContainsModule("M5"), TUdfIndex::EStatus::NotFound); EnsureContainsFunction(index1, "M1", func11); EnsureContainsFunction(index1, "M1", func12); @@ -157,8 +157,8 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) { TFunctionInfo f; // known func, but non-existent module - UNIT_ASSERT(!index1->FindFunction("M5", "M1.F1", f)); - UNIT_ASSERT(!index1->FindFunction("M2", "M3.F1", f)); + UNIT_ASSERT_EQUAL(index1->FindFunction("M5", "M1.F1", f), TUdfIndex::EStatus::NotFound); + UNIT_ASSERT_EQUAL(index1->FindFunction("M2", "M3.F1", f), TUdfIndex::EStatus::NotFound); } Y_UNIT_TEST(ConflictRaiseError) { @@ -199,7 +199,7 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) { EnsureContainsFunction(index1, "M2", func13); TFunctionInfo f; - UNIT_ASSERT(!index1->FindFunction("M3", "M3.F1", f)); + UNIT_ASSERT_EQUAL(index1->FindFunction("M3", "M3.F1", f), TUdfIndex::EStatus::NotFound); } Y_UNIT_TEST(ConflictPreserveExisting) { @@ -240,7 +240,7 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) { EnsureContainsFunction(index1, "M2", func13); TFunctionInfo f; - UNIT_ASSERT(!index1->FindFunction("M3", "M3.F1", f)); + UNIT_ASSERT_EQUAL(index1->FindFunction("M3", "M3.F1", f), TUdfIndex::EStatus::NotFound); } Y_UNIT_TEST(ConflictReplace1WithNew) { @@ -299,9 +299,9 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) { // not here anymore TFunctionInfo f; - UNIT_ASSERT(!index1->FindFunction("M1", "M1.F1", f)); - UNIT_ASSERT(!index1->FindFunction("M1", "M1.F2", f)); - UNIT_ASSERT(!index1->FindFunction("M2", "M2.F1", f)); + UNIT_ASSERT_EQUAL(index1->FindFunction("M1", "M1.F1", f), TUdfIndex::EStatus::NotFound); + UNIT_ASSERT_EQUAL(index1->FindFunction("M1", "M1.F2", f), TUdfIndex::EStatus::NotFound); + UNIT_ASSERT_EQUAL(index1->FindFunction("M2", "M2.F1", f), TUdfIndex::EStatus::NotFound); } Y_UNIT_TEST(ConflictReplace2WithNew) { @@ -359,10 +359,63 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) { // not here anymore TFunctionInfo f; - UNIT_ASSERT(!index1->FindFunction("M1", "M1.F2", f)); - UNIT_ASSERT(!index1->FindFunction("M2", "M2.F1", f)); + UNIT_ASSERT_EQUAL(index1->FindFunction("M1", "M1.F2", f), TUdfIndex::EStatus::NotFound); + UNIT_ASSERT_EQUAL(index1->FindFunction("M2", "M2.F1", f), TUdfIndex::EStatus::NotFound); - UNIT_ASSERT(!index1->FindFunction("M3", "M3.F3", f)); - UNIT_ASSERT(!index1->FindFunction("M4", "M4.F4", f)); + UNIT_ASSERT_EQUAL(index1->FindFunction("M3", "M3.F3", f), TUdfIndex::EStatus::NotFound); + UNIT_ASSERT_EQUAL(index1->FindFunction("M4", "M4.F4", f), TUdfIndex::EStatus::NotFound); + } + + Y_UNIT_TEST(SetInsensitiveSearch) { + auto index1 = MakeIntrusive(); + index1->SetCaseSentiveSearch(false); + auto func1 = BuildFunctionInfo("M1.FA", 1); + auto func2 = BuildFunctionInfo("M1.fa", 1); + auto func3 = BuildFunctionInfo("M1.g", 1); + auto func4 = BuildFunctionInfo("mx.h", 1); + auto func5 = BuildFunctionInfo("MX.g", 1); + auto link1 = TDownloadLink::File("file1"); + + TResourceBuilder b(link1); + b.AddFunction(func1); + b.AddFunction(func2); + b.AddFunction(func3); + b.AddFunction(func4); + b.AddFunction(func5); + + index1->RegisterResource(b.Build(), TUdfIndex::EOverrideMode::RaiseError); + + auto checkIndex = [&](auto index) { + UNIT_ASSERT_EQUAL(index1->ContainsModule("M1"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index1->ContainsModule("m1"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index1->ContainsModule("mx"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index1->ContainsModule("MX"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index1->ContainsModule("mX"), TUdfIndex::EStatus::Ambigious); + UNIT_ASSERT_EQUAL(index1->ContainsModule("M3"), TUdfIndex::EStatus::NotFound); + + UNIT_ASSERT(index->FindResourceByModule("M3") == nullptr); + auto resource1 = index->FindResourceByModule("m1"); + UNIT_ASSERT(resource1 != nullptr); + EnsureLinksEqual(resource1->Link, link1); + + TFunctionInfo f; + UNIT_ASSERT_EQUAL(index->FindFunction("m1", "M1.FA", f), TUdfIndex::EStatus::Found); + EnsureFunctionsEqual(f, func1); + UNIT_ASSERT_EQUAL(index->FindFunction("m1", "m1.Fa", f), TUdfIndex::EStatus::Ambigious); + UNIT_ASSERT_EQUAL(index->FindFunction("m1", "M1.fa", f), TUdfIndex::EStatus::Found); + EnsureFunctionsEqual(f, func2); + UNIT_ASSERT_EQUAL(index->FindFunction("m1", "m1.g", f), TUdfIndex::EStatus::Found); + EnsureFunctionsEqual(f, func3); + UNIT_ASSERT_EQUAL(index->FindFunction("Mx", "mx.h", f), TUdfIndex::EStatus::Ambigious); + UNIT_ASSERT_EQUAL(index->FindFunction("mx", "mx.H", f), TUdfIndex::EStatus::Found); + EnsureFunctionsEqual(f, func4); + UNIT_ASSERT_EQUAL(index->FindFunction("MX", "mx.g", f), TUdfIndex::EStatus::Found); + EnsureFunctionsEqual(f, func5); + }; + + checkIndex(index1); + // ensure both indexes contain the same info + auto index2 = index1->Clone(); + checkIndex(index2); } } diff --git a/ydb/library/yql/core/yql_udf_index.cpp b/ydb/library/yql/core/yql_udf_index.cpp index 7484951945dd..d0e565506d64 100644 --- a/ydb/library/yql/core/yql_udf_index.cpp +++ b/ydb/library/yql/core/yql_udf_index.cpp @@ -197,6 +197,10 @@ TResourceInfo::TPtr TUdfIndex::FindResourceByModule(const TString& moduleName) c } auto n = ICaseModules_.FindPtr(to_lower(moduleName)); + if (!n) { + return nullptr; + } + Y_ENSURE(n->size() > 0); if (n->size() > 1) { return nullptr; From 431d9281333d3272e9732d4509d162f6183ebc47 Mon Sep 17 00:00:00 2001 From: Vitaly Stoyan Date: Thu, 24 Oct 2024 16:57:38 +0300 Subject: [PATCH 3/4] fix --- ydb/library/yql/core/ut/yql_udf_index_ut.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ydb/library/yql/core/ut/yql_udf_index_ut.cpp b/ydb/library/yql/core/ut/yql_udf_index_ut.cpp index 66a8f957ec98..5b0082452df9 100644 --- a/ydb/library/yql/core/ut/yql_udf_index_ut.cpp +++ b/ydb/library/yql/core/ut/yql_udf_index_ut.cpp @@ -386,12 +386,12 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) { index1->RegisterResource(b.Build(), TUdfIndex::EOverrideMode::RaiseError); auto checkIndex = [&](auto index) { - UNIT_ASSERT_EQUAL(index1->ContainsModule("M1"), TUdfIndex::EStatus::Found); - UNIT_ASSERT_EQUAL(index1->ContainsModule("m1"), TUdfIndex::EStatus::Found); - UNIT_ASSERT_EQUAL(index1->ContainsModule("mx"), TUdfIndex::EStatus::Found); - UNIT_ASSERT_EQUAL(index1->ContainsModule("MX"), TUdfIndex::EStatus::Found); - UNIT_ASSERT_EQUAL(index1->ContainsModule("mX"), TUdfIndex::EStatus::Ambigious); - UNIT_ASSERT_EQUAL(index1->ContainsModule("M3"), TUdfIndex::EStatus::NotFound); + UNIT_ASSERT_EQUAL(index->ContainsModule("M1"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index->ContainsModule("m1"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index->ContainsModule("mx"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index->ContainsModule("MX"), TUdfIndex::EStatus::Found); + UNIT_ASSERT_EQUAL(index->ContainsModule("mX"), TUdfIndex::EStatus::Ambigious); + UNIT_ASSERT_EQUAL(index->ContainsModule("M3"), TUdfIndex::EStatus::NotFound); UNIT_ASSERT(index->FindResourceByModule("M3") == nullptr); auto resource1 = index->FindResourceByModule("m1"); From 11296fd428fd1abdcf5e957ab43912117670307a Mon Sep 17 00:00:00 2001 From: Vitaly Stoyan Date: Thu, 24 Oct 2024 18:42:46 +0300 Subject: [PATCH 4/4] qplayer --- .../core/qplayer/udf_resolver/yql_qplayer_udf_resolver.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ydb/library/yql/core/qplayer/udf_resolver/yql_qplayer_udf_resolver.cpp b/ydb/library/yql/core/qplayer/udf_resolver/yql_qplayer_udf_resolver.cpp index 44a104fc0418..c14e7e8f7cee 100644 --- a/ydb/library/yql/core/qplayer/udf_resolver/yql_qplayer_udf_resolver.cpp +++ b/ydb/library/yql/core/qplayer/udf_resolver/yql_qplayer_udf_resolver.cpp @@ -109,6 +109,7 @@ class TResolver : public IUdfResolver { TString SaveValue(const TFunction* f) const { auto node = NYT::TNode() + ("NormalizedName", f->NormalizedName) ("CallableType", TypeToYsonNode(f->CallableType)); if (f->NormalizedUserType && f->NormalizedUserType->GetKind() != ETypeAnnotationKind::Void) { node("NormalizedUserType", TypeToYsonNode(f->NormalizedUserType)); @@ -131,6 +132,12 @@ class TResolver : public IUdfResolver { void LoadValue(TFunction* f, const TString& value, TExprContext& ctx) const { auto node = NYT::NodeFromYsonString(value); + if (node.HasKey("NormalizedName")) { + f->NormalizedName = node["NormalizedName"].AsString(); + } else { + f->NormalizedName = f->Name; + } + f->CallableType = ParseTypeFromYson(node["CallableType"], ctx); if (node.HasKey("NormalizedUserType")) { f->NormalizedUserType = ParseTypeFromYson(node["NormalizedUserType"], ctx);