From 5bc86593641f8da1cd06fd9020812b6fe95fceab Mon Sep 17 00:00:00 2001 From: Vitaly Stoyan Date: Tue, 27 Feb 2024 14:11:45 +0000 Subject: [PATCH] init --- .../public/purecalc/common/no_llvm/ya.make | 2 + .../yql/public/purecalc/common/worker.cpp | 3 +- .../yql/public/purecalc/common/ya.make | 2 + ydb/library/yql/tools/purebench/purebench.cpp | 146 ++++++++++++++++++ ydb/library/yql/tools/purebench/ya.make | 30 ++++ ydb/library/yql/tools/ya.make | 1 + 6 files changed, 183 insertions(+), 1 deletion(-) create mode 100644 ydb/library/yql/tools/purebench/purebench.cpp create mode 100644 ydb/library/yql/tools/purebench/ya.make diff --git a/ydb/library/yql/public/purecalc/common/no_llvm/ya.make b/ydb/library/yql/public/purecalc/common/no_llvm/ya.make index 3a90e236e1fa..929cbd3b774c 100644 --- a/ydb/library/yql/public/purecalc/common/no_llvm/ya.make +++ b/ydb/library/yql/public/purecalc/common/no_llvm/ya.make @@ -8,6 +8,8 @@ PEERDIR( ydb/library/yql/minikql/invoke_builtins/no_llvm ydb/library/yql/minikql/comp_nodes/no_llvm ydb/library/yql/minikql/codegen/no_llvm + ydb/library/yql/parser/pg_wrapper + ydb/library/yql/parser/pg_wrapper/interface ) END() diff --git a/ydb/library/yql/public/purecalc/common/worker.cpp b/ydb/library/yql/public/purecalc/common/worker.cpp index b32560f420ae..38a2af43b945 100644 --- a/ydb/library/yql/public/purecalc/common/worker.cpp +++ b/ydb/library/yql/public/purecalc/common/worker.cpp @@ -25,6 +25,7 @@ #include #include +#include using namespace NYql; using namespace NYql::NPureCalc; @@ -116,7 +117,7 @@ TWorkerGraph::TWorkerGraph( explorer.Walk(rootNode.GetNode(), Env_); auto compositeNodeFactory = NKikimr::NMiniKQL::GetCompositeWithBuiltinFactory( - {NKikimr::NMiniKQL::GetYqlFactory()} + {NKikimr::NMiniKQL::GetYqlFactory(), NYql::GetPgFactory()} ); auto nodeFactory = [&]( diff --git a/ydb/library/yql/public/purecalc/common/ya.make b/ydb/library/yql/public/purecalc/common/ya.make index 8baa2cfd504a..855f3daae433 100644 --- a/ydb/library/yql/public/purecalc/common/ya.make +++ b/ydb/library/yql/public/purecalc/common/ya.make @@ -7,6 +7,8 @@ PEERDIR( ydb/library/yql/minikql/computation/llvm14 ydb/library/yql/minikql/invoke_builtins/llvm14 ydb/library/yql/minikql/comp_nodes/llvm14 + ydb/library/yql/parser/pg_wrapper + ydb/library/yql/parser/pg_wrapper/interface ) END() diff --git a/ydb/library/yql/tools/purebench/purebench.cpp b/ydb/library/yql/tools/purebench/purebench.cpp new file mode 100644 index 000000000000..e7979a48e1f7 --- /dev/null +++ b/ydb/library/yql/tools/purebench/purebench.cpp @@ -0,0 +1,146 @@ +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include + +using namespace NYql; +using namespace NYql::NPureCalc; + +int Main(int argc, const char *argv[]) +{ + Y_UNUSED(NUdf::GetStaticSymbols()); + using namespace NLastGetopt; + TOpts opts = TOpts::Default(); + ui64 count; + ui32 repeats; + TString genSql, testSql; + bool showResults; + TString udfsDir; + TString LLVMSettings; + opts.AddHelpOption(); + opts.AddLongOption("ndebug", "should be at first argument, do not show debug info in error output").NoArgument(); + opts.AddLongOption('c', "count", "count of input rows").StoreResult(&count).DefaultValue(1000000); + opts.AddLongOption('g', "gen-sql", "SQL query to generate data").StoreResult(&genSql).DefaultValue("select index from Input"); + opts.AddLongOption('t', "test-sql", "SQL query to test").StoreResult(&testSql).DefaultValue("select count(*) as count from Input"); + opts.AddLongOption('r', "repeats", "number of iterations").StoreResult(&repeats).DefaultValue(10); + opts.AddLongOption('w', "show-results", "show results of test SQL").StoreResult(&showResults).DefaultValue(true); + opts.AddLongOption("udfs-dir", "directory with UDFs").StoreResult(&udfsDir).DefaultValue(""); + opts.AddLongOption("llvm-settings", "LLVM settings").StoreResult(&LLVMSettings).DefaultValue(""); + opts.SetFreeArgsMax(0); + TOptsParseResult res(&opts, argc, argv); + + auto factoryOptions = TProgramFactoryOptions(); + factoryOptions.SetUDFsDir(udfsDir); + factoryOptions.SetLLVMSettings(LLVMSettings); + auto factory = MakeProgramFactory(factoryOptions); + + NYT::TNode members{NYT::TNode::CreateList()}; + auto typeNode = NYT::TNode::CreateList() + .Add("DataType") + .Add("Uint64"); + + members.Add(NYT::TNode::CreateList() + .Add("index") + .Add(typeNode)); + NYT::TNode schema = NYT::TNode::CreateList() + .Add("StructType") + .Add(members); + + auto inputSpec1 = TSkiffInputSpec(TVector{schema}); + auto outputSpec1 = TSkiffOutputSpec({NYT::TNode::CreateEntity()}); + auto genProgram = factory->MakePullListProgram( + inputSpec1, + outputSpec1, + genSql, + ETranslationMode::SQL); + + TStringStream stream; + NSkiff::TUncheckedSkiffWriter writer{&stream}; + for (ui64 i = 0; i < count; ++i) { + writer.WriteVariant16Tag(0); + writer.WriteUint64(i); + } + writer.Finish(); + auto input1 = TStringStream(stream); + Cerr << "Input data size: " << input1.Size() << "\n"; + auto handle1 = genProgram->Apply(&input1); + TStringStream output1; + handle1->Run(&output1); + Cerr << "Generated data size: " << output1.Size() << "\n"; + + Cerr << "Dry run of test sql...\n"; + auto inputSpec2 = TSkiffInputSpec(genProgram->MakeOutputSchema()); + auto outputSpec2 = TYsonOutputSpec({NYT::TNode::CreateEntity()}); + auto testProgram = factory->MakePullListProgram( + inputSpec2, + outputSpec2, + testSql, + ETranslationMode::SQL); + auto input2 = TStringStream(output1); + auto handle2 = testProgram->Apply(&input2); + TStringStream output2; + handle2->Run(&output2); + if (showResults) { + TStringInput in(output2.Str()); + NYson::ReformatYsonStream(&in, &Cerr, NYson::EYsonFormat::Pretty, NYson::EYsonType::ListFragment); + } + + Cerr << "Run benchmark...\n"; + TVector times; + TSimpleTimer allTimer; + for (ui32 i = 0; i < repeats; ++i) { + TSimpleTimer timer; + auto input2 = TStringStream(output1); + auto handle2 = testProgram->Apply(&input2); + TNullOutput output2; + handle2->Run(&output2); + times.push_back(timer.Get()); + } + + Cout << "Elapsed: " << allTimer.Get() << "\n"; + Sort(times); + times.erase(times.end() - times.size() / 3, times.end()); + double s = 0; + for (auto t : times) { + s += std::log(t.MicroSeconds()); + } + + double score = output1.Size() / std::exp(s / times.size()); + Cout << "Bench score: " << Prec(score, 4) << "\n"; + + NLog::CleanupLogger(); + return 0; +} + +int main(int argc, const char *argv[]) { + if (argc > 1 && TString(argv[1]) != TStringBuf("--ndebug")) { + Cerr << "purebench ABI version: " << NKikimr::NUdf::CurrentAbiVersionStr() << Endl; + } + + NYql::NBacktrace::RegisterKikimrFatalActions(); + NYql::NBacktrace::EnableKikimrSymbolize(); + + try { + return Main(argc, argv); + } catch (const TCompileError& e) { + Cerr << e.GetIssues(); + } catch (...) { + Cerr << CurrentExceptionMessage() << Endl; + return 1; + } +} diff --git a/ydb/library/yql/tools/purebench/ya.make b/ydb/library/yql/tools/purebench/ya.make new file mode 100644 index 000000000000..d8492fad4f17 --- /dev/null +++ b/ydb/library/yql/tools/purebench/ya.make @@ -0,0 +1,30 @@ +PROGRAM(purebench) + +ALLOCATOR(J) + +SRCS( + purebench.cpp +) + +IF (OS_LINUX) + # prevent external python extensions to lookup protobuf symbols (and maybe + # other common stuff) in main binary + EXPORTS_SCRIPT(${ARCADIA_ROOT}/ydb/library/yql/tools/exports.symlist) +ENDIF() + +PEERDIR( + library/cpp/getopt + library/cpp/svnversion + ydb/library/yql/utils/backtrace + ydb/library/yql/utils/log + ydb/library/yql/public/udf + ydb/library/yql/public/udf/service/exception_policy + library/cpp/skiff + library/cpp/yson + ydb/library/yql/public/purecalc/io_specs/mkql + ydb/library/yql/public/purecalc +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/library/yql/tools/ya.make b/ydb/library/yql/tools/ya.make index 147e26ae74b8..eb2ffbbe82a9 100644 --- a/ydb/library/yql/tools/ya.make +++ b/ydb/library/yql/tools/ya.make @@ -6,6 +6,7 @@ RECURSE( mrrun pgrun pg-make-test + purebench sql2yql sql_formatter udf_dep_stub