Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

purebench tool #2287

Merged
merged 1 commit into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ydb/library/yql/public/purecalc/common/no_llvm/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ PEERDIR(
ydb/library/yql/minikql/invoke_builtins/no_llvm
ydb/library/yql/minikql/comp_nodes/no_llvm
ydb/library/yql/minikql/codegen/no_llvm
ydb/library/yql/parser/pg_wrapper
ydb/library/yql/parser/pg_wrapper/interface
)

END()
Expand Down
3 changes: 2 additions & 1 deletion ydb/library/yql/public/purecalc/common/worker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include <util/stream/file.h>
#include <ydb/library/yql/minikql/computation/mkql_custom_list.h>
#include <ydb/library/yql/parser/pg_wrapper/interface/comp_factory.h>

using namespace NYql;
using namespace NYql::NPureCalc;
Expand Down Expand Up @@ -116,7 +117,7 @@ TWorkerGraph::TWorkerGraph(
explorer.Walk(rootNode.GetNode(), Env_);

auto compositeNodeFactory = NKikimr::NMiniKQL::GetCompositeWithBuiltinFactory(
{NKikimr::NMiniKQL::GetYqlFactory()}
{NKikimr::NMiniKQL::GetYqlFactory(), NYql::GetPgFactory()}
);

auto nodeFactory = [&](
Expand Down
2 changes: 2 additions & 0 deletions ydb/library/yql/public/purecalc/common/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ PEERDIR(
ydb/library/yql/minikql/computation/llvm14
ydb/library/yql/minikql/invoke_builtins/llvm14
ydb/library/yql/minikql/comp_nodes/llvm14
ydb/library/yql/parser/pg_wrapper
ydb/library/yql/parser/pg_wrapper/interface
)

END()
Expand Down
146 changes: 146 additions & 0 deletions ydb/library/yql/tools/purebench/purebench.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#include <library/cpp/svnversion/svnversion.h>
#include <library/cpp/getopt/last_getopt.h>

#include <ydb/library/yql/public/purecalc/purecalc.h>
#include <ydb/library/yql/public/purecalc/io_specs/mkql/spec.h>

#include <ydb/library/yql/utils/log/log.h>
#include <ydb/library/yql/utils/backtrace/backtrace.h>
#include <ydb/library/yql/public/udf/udf_registrator.h>
#include <ydb/library/yql/public/udf/udf_version.h>

#include <library/cpp/skiff/skiff.h>
#include <library/cpp/yson/writer.h>

#include <util/datetime/cputimer.h>
#include <util/stream/format.h>
#include <util/stream/null.h>

#include <cmath>

using namespace NYql;
using namespace NYql::NPureCalc;

int Main(int argc, const char *argv[])
{
Y_UNUSED(NUdf::GetStaticSymbols());
using namespace NLastGetopt;
TOpts opts = TOpts::Default();
ui64 count;
ui32 repeats;
TString genSql, testSql;
bool showResults;
TString udfsDir;
TString LLVMSettings;
opts.AddHelpOption();
opts.AddLongOption("ndebug", "should be at first argument, do not show debug info in error output").NoArgument();
opts.AddLongOption('c', "count", "count of input rows").StoreResult(&count).DefaultValue(1000000);
opts.AddLongOption('g', "gen-sql", "SQL query to generate data").StoreResult(&genSql).DefaultValue("select index from Input");
opts.AddLongOption('t', "test-sql", "SQL query to test").StoreResult(&testSql).DefaultValue("select count(*) as count from Input");
opts.AddLongOption('r', "repeats", "number of iterations").StoreResult(&repeats).DefaultValue(10);
opts.AddLongOption('w', "show-results", "show results of test SQL").StoreResult(&showResults).DefaultValue(true);
opts.AddLongOption("udfs-dir", "directory with UDFs").StoreResult(&udfsDir).DefaultValue("");
opts.AddLongOption("llvm-settings", "LLVM settings").StoreResult(&LLVMSettings).DefaultValue("");
opts.SetFreeArgsMax(0);
TOptsParseResult res(&opts, argc, argv);

auto factoryOptions = TProgramFactoryOptions();
factoryOptions.SetUDFsDir(udfsDir);
factoryOptions.SetLLVMSettings(LLVMSettings);
auto factory = MakeProgramFactory(factoryOptions);

NYT::TNode members{NYT::TNode::CreateList()};
auto typeNode = NYT::TNode::CreateList()
.Add("DataType")
.Add("Uint64");

members.Add(NYT::TNode::CreateList()
.Add("index")
.Add(typeNode));
NYT::TNode schema = NYT::TNode::CreateList()
.Add("StructType")
.Add(members);

auto inputSpec1 = TSkiffInputSpec(TVector<NYT::TNode>{schema});
auto outputSpec1 = TSkiffOutputSpec({NYT::TNode::CreateEntity()});
auto genProgram = factory->MakePullListProgram(
inputSpec1,
outputSpec1,
genSql,
ETranslationMode::SQL);

TStringStream stream;
NSkiff::TUncheckedSkiffWriter writer{&stream};
for (ui64 i = 0; i < count; ++i) {
writer.WriteVariant16Tag(0);
writer.WriteUint64(i);
}
writer.Finish();
auto input1 = TStringStream(stream);
Cerr << "Input data size: " << input1.Size() << "\n";
auto handle1 = genProgram->Apply(&input1);
TStringStream output1;
handle1->Run(&output1);
Cerr << "Generated data size: " << output1.Size() << "\n";

Cerr << "Dry run of test sql...\n";
auto inputSpec2 = TSkiffInputSpec(genProgram->MakeOutputSchema());
auto outputSpec2 = TYsonOutputSpec({NYT::TNode::CreateEntity()});
auto testProgram = factory->MakePullListProgram(
inputSpec2,
outputSpec2,
testSql,
ETranslationMode::SQL);
auto input2 = TStringStream(output1);
auto handle2 = testProgram->Apply(&input2);
TStringStream output2;
handle2->Run(&output2);
if (showResults) {
TStringInput in(output2.Str());
NYson::ReformatYsonStream(&in, &Cerr, NYson::EYsonFormat::Pretty, NYson::EYsonType::ListFragment);
}

Cerr << "Run benchmark...\n";
TVector<TDuration> times;
TSimpleTimer allTimer;
for (ui32 i = 0; i < repeats; ++i) {
TSimpleTimer timer;
auto input2 = TStringStream(output1);
auto handle2 = testProgram->Apply(&input2);
TNullOutput output2;
handle2->Run(&output2);
times.push_back(timer.Get());
}

Cout << "Elapsed: " << allTimer.Get() << "\n";
Sort(times);
times.erase(times.end() - times.size() / 3, times.end());
double s = 0;
for (auto t : times) {
s += std::log(t.MicroSeconds());
}

double score = output1.Size() / std::exp(s / times.size());
Cout << "Bench score: " << Prec(score, 4) << "\n";

NLog::CleanupLogger();
return 0;
}

int main(int argc, const char *argv[]) {
if (argc > 1 && TString(argv[1]) != TStringBuf("--ndebug")) {
Cerr << "purebench ABI version: " << NKikimr::NUdf::CurrentAbiVersionStr() << Endl;
}

NYql::NBacktrace::RegisterKikimrFatalActions();
NYql::NBacktrace::EnableKikimrSymbolize();

try {
return Main(argc, argv);
} catch (const TCompileError& e) {
Cerr << e.GetIssues();
} catch (...) {
Cerr << CurrentExceptionMessage() << Endl;
return 1;
}
}
30 changes: 30 additions & 0 deletions ydb/library/yql/tools/purebench/ya.make
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
PROGRAM(purebench)

ALLOCATOR(J)

SRCS(
purebench.cpp
)

IF (OS_LINUX)
# prevent external python extensions to lookup protobuf symbols (and maybe
# other common stuff) in main binary
EXPORTS_SCRIPT(${ARCADIA_ROOT}/ydb/library/yql/tools/exports.symlist)
ENDIF()

PEERDIR(
library/cpp/getopt
library/cpp/svnversion
ydb/library/yql/utils/backtrace
ydb/library/yql/utils/log
ydb/library/yql/public/udf
ydb/library/yql/public/udf/service/exception_policy
library/cpp/skiff
library/cpp/yson
ydb/library/yql/public/purecalc/io_specs/mkql
ydb/library/yql/public/purecalc
)

YQL_LAST_ABI_VERSION()

END()
1 change: 1 addition & 0 deletions ydb/library/yql/tools/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ RECURSE(
mrrun
pgrun
pg-make-test
purebench
sql2yql
sql_formatter
udf_dep_stub
Expand Down
Loading