Skip to content

Commit

Permalink
Merge 5bc8659 into be94821
Browse files Browse the repository at this point in the history
  • Loading branch information
vitstn authored Feb 27, 2024
2 parents be94821 + 5bc8659 commit a0a1643
Show file tree
Hide file tree
Showing 6 changed files with 183 additions and 1 deletion.
2 changes: 2 additions & 0 deletions ydb/library/yql/public/purecalc/common/no_llvm/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ PEERDIR(
ydb/library/yql/minikql/invoke_builtins/no_llvm
ydb/library/yql/minikql/comp_nodes/no_llvm
ydb/library/yql/minikql/codegen/no_llvm
ydb/library/yql/parser/pg_wrapper
ydb/library/yql/parser/pg_wrapper/interface
)

END()
Expand Down
3 changes: 2 additions & 1 deletion ydb/library/yql/public/purecalc/common/worker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include <util/stream/file.h>
#include <ydb/library/yql/minikql/computation/mkql_custom_list.h>
#include <ydb/library/yql/parser/pg_wrapper/interface/comp_factory.h>

using namespace NYql;
using namespace NYql::NPureCalc;
Expand Down Expand Up @@ -116,7 +117,7 @@ TWorkerGraph::TWorkerGraph(
explorer.Walk(rootNode.GetNode(), Env_);

auto compositeNodeFactory = NKikimr::NMiniKQL::GetCompositeWithBuiltinFactory(
{NKikimr::NMiniKQL::GetYqlFactory()}
{NKikimr::NMiniKQL::GetYqlFactory(), NYql::GetPgFactory()}
);

auto nodeFactory = [&](
Expand Down
2 changes: 2 additions & 0 deletions ydb/library/yql/public/purecalc/common/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ PEERDIR(
ydb/library/yql/minikql/computation/llvm14
ydb/library/yql/minikql/invoke_builtins/llvm14
ydb/library/yql/minikql/comp_nodes/llvm14
ydb/library/yql/parser/pg_wrapper
ydb/library/yql/parser/pg_wrapper/interface
)

END()
Expand Down
146 changes: 146 additions & 0 deletions ydb/library/yql/tools/purebench/purebench.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#include <library/cpp/svnversion/svnversion.h>
#include <library/cpp/getopt/last_getopt.h>

#include <ydb/library/yql/public/purecalc/purecalc.h>
#include <ydb/library/yql/public/purecalc/io_specs/mkql/spec.h>

#include <ydb/library/yql/utils/log/log.h>
#include <ydb/library/yql/utils/backtrace/backtrace.h>
#include <ydb/library/yql/public/udf/udf_registrator.h>
#include <ydb/library/yql/public/udf/udf_version.h>

#include <library/cpp/skiff/skiff.h>
#include <library/cpp/yson/writer.h>

#include <util/datetime/cputimer.h>
#include <util/stream/format.h>
#include <util/stream/null.h>

#include <cmath>

using namespace NYql;
using namespace NYql::NPureCalc;

int Main(int argc, const char *argv[])
{
Y_UNUSED(NUdf::GetStaticSymbols());
using namespace NLastGetopt;
TOpts opts = TOpts::Default();
ui64 count;
ui32 repeats;
TString genSql, testSql;
bool showResults;
TString udfsDir;
TString LLVMSettings;
opts.AddHelpOption();
opts.AddLongOption("ndebug", "should be at first argument, do not show debug info in error output").NoArgument();
opts.AddLongOption('c', "count", "count of input rows").StoreResult(&count).DefaultValue(1000000);
opts.AddLongOption('g', "gen-sql", "SQL query to generate data").StoreResult(&genSql).DefaultValue("select index from Input");
opts.AddLongOption('t', "test-sql", "SQL query to test").StoreResult(&testSql).DefaultValue("select count(*) as count from Input");
opts.AddLongOption('r', "repeats", "number of iterations").StoreResult(&repeats).DefaultValue(10);
opts.AddLongOption('w', "show-results", "show results of test SQL").StoreResult(&showResults).DefaultValue(true);
opts.AddLongOption("udfs-dir", "directory with UDFs").StoreResult(&udfsDir).DefaultValue("");
opts.AddLongOption("llvm-settings", "LLVM settings").StoreResult(&LLVMSettings).DefaultValue("");
opts.SetFreeArgsMax(0);
TOptsParseResult res(&opts, argc, argv);

auto factoryOptions = TProgramFactoryOptions();
factoryOptions.SetUDFsDir(udfsDir);
factoryOptions.SetLLVMSettings(LLVMSettings);
auto factory = MakeProgramFactory(factoryOptions);

NYT::TNode members{NYT::TNode::CreateList()};
auto typeNode = NYT::TNode::CreateList()
.Add("DataType")
.Add("Uint64");

members.Add(NYT::TNode::CreateList()
.Add("index")
.Add(typeNode));
NYT::TNode schema = NYT::TNode::CreateList()
.Add("StructType")
.Add(members);

auto inputSpec1 = TSkiffInputSpec(TVector<NYT::TNode>{schema});
auto outputSpec1 = TSkiffOutputSpec({NYT::TNode::CreateEntity()});
auto genProgram = factory->MakePullListProgram(
inputSpec1,
outputSpec1,
genSql,
ETranslationMode::SQL);

TStringStream stream;
NSkiff::TUncheckedSkiffWriter writer{&stream};
for (ui64 i = 0; i < count; ++i) {
writer.WriteVariant16Tag(0);
writer.WriteUint64(i);
}
writer.Finish();
auto input1 = TStringStream(stream);
Cerr << "Input data size: " << input1.Size() << "\n";
auto handle1 = genProgram->Apply(&input1);
TStringStream output1;
handle1->Run(&output1);
Cerr << "Generated data size: " << output1.Size() << "\n";

Cerr << "Dry run of test sql...\n";
auto inputSpec2 = TSkiffInputSpec(genProgram->MakeOutputSchema());
auto outputSpec2 = TYsonOutputSpec({NYT::TNode::CreateEntity()});
auto testProgram = factory->MakePullListProgram(
inputSpec2,
outputSpec2,
testSql,
ETranslationMode::SQL);
auto input2 = TStringStream(output1);
auto handle2 = testProgram->Apply(&input2);
TStringStream output2;
handle2->Run(&output2);
if (showResults) {
TStringInput in(output2.Str());
NYson::ReformatYsonStream(&in, &Cerr, NYson::EYsonFormat::Pretty, NYson::EYsonType::ListFragment);
}

Cerr << "Run benchmark...\n";
TVector<TDuration> times;
TSimpleTimer allTimer;
for (ui32 i = 0; i < repeats; ++i) {
TSimpleTimer timer;
auto input2 = TStringStream(output1);
auto handle2 = testProgram->Apply(&input2);
TNullOutput output2;
handle2->Run(&output2);
times.push_back(timer.Get());
}

Cout << "Elapsed: " << allTimer.Get() << "\n";
Sort(times);
times.erase(times.end() - times.size() / 3, times.end());
double s = 0;
for (auto t : times) {
s += std::log(t.MicroSeconds());
}

double score = output1.Size() / std::exp(s / times.size());
Cout << "Bench score: " << Prec(score, 4) << "\n";

NLog::CleanupLogger();
return 0;
}

int main(int argc, const char *argv[]) {
if (argc > 1 && TString(argv[1]) != TStringBuf("--ndebug")) {
Cerr << "purebench ABI version: " << NKikimr::NUdf::CurrentAbiVersionStr() << Endl;
}

NYql::NBacktrace::RegisterKikimrFatalActions();
NYql::NBacktrace::EnableKikimrSymbolize();

try {
return Main(argc, argv);
} catch (const TCompileError& e) {
Cerr << e.GetIssues();
} catch (...) {
Cerr << CurrentExceptionMessage() << Endl;
return 1;
}
}
30 changes: 30 additions & 0 deletions ydb/library/yql/tools/purebench/ya.make
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
PROGRAM(purebench)

ALLOCATOR(J)

SRCS(
purebench.cpp
)

IF (OS_LINUX)
# prevent external python extensions to lookup protobuf symbols (and maybe
# other common stuff) in main binary
EXPORTS_SCRIPT(${ARCADIA_ROOT}/ydb/library/yql/tools/exports.symlist)
ENDIF()

PEERDIR(
library/cpp/getopt
library/cpp/svnversion
ydb/library/yql/utils/backtrace
ydb/library/yql/utils/log
ydb/library/yql/public/udf
ydb/library/yql/public/udf/service/exception_policy
library/cpp/skiff
library/cpp/yson
ydb/library/yql/public/purecalc/io_specs/mkql
ydb/library/yql/public/purecalc
)

YQL_LAST_ABI_VERSION()

END()
1 change: 1 addition & 0 deletions ydb/library/yql/tools/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ RECURSE(
mrrun
pgrun
pg-make-test
purebench
sql2yql
sql_formatter
udf_dep_stub
Expand Down

0 comments on commit a0a1643

Please sign in to comment.