From a240aa7ad24628094d458f50c62853a7937099a4 Mon Sep 17 00:00:00 2001 From: Igor Munkin Date: Mon, 15 Jul 2024 22:42:03 +0500 Subject: [PATCH] Introduce CLI option to purebench to print compiled and optimized AST (#6693) --- .../yql/public/purecalc/common/interface.cpp | 6 ++++++ .../yql/public/purecalc/common/interface.h | 10 ++++++++++ .../public/purecalc/common/program_factory.cpp | 4 ++++ .../public/purecalc/common/program_factory.h | 1 + .../public/purecalc/common/worker_factory.cpp | 17 ++++++++++++++--- .../yql/public/purecalc/common/worker_factory.h | 4 ++++ ydb/library/yql/tools/purebench/purebench.cpp | 15 +++++++++++++++ 7 files changed, 54 insertions(+), 3 deletions(-) diff --git a/ydb/library/yql/public/purecalc/common/interface.cpp b/ydb/library/yql/public/purecalc/common/interface.cpp index 32e7f088705c..3697f68bbb60 100644 --- a/ydb/library/yql/public/purecalc/common/interface.cpp +++ b/ydb/library/yql/public/purecalc/common/interface.cpp @@ -28,6 +28,7 @@ TProgramFactoryOptions::TProgramFactoryOptions() , UserData_() , LLVMSettings("OFF") , BlockEngineSettings("disable") + , ExprOutputStream(nullptr) , CountersProvider(nullptr) , NativeYtTypeFlags(0) , UseSystemColumns(false) @@ -83,6 +84,11 @@ TProgramFactoryOptions& TProgramFactoryOptions::SetBlockEngineSettings(TStringBu return *this; } +TProgramFactoryOptions& TProgramFactoryOptions::SetExprOutputStream(IOutputStream* exprOutputStream) { + ExprOutputStream = exprOutputStream; + return *this; +} + TProgramFactoryOptions& TProgramFactoryOptions::SetCountersProvider(NKikimr::NUdf::ICountersProvider* countersProvider) { CountersProvider = countersProvider; return *this; diff --git a/ydb/library/yql/public/purecalc/common/interface.h b/ydb/library/yql/public/purecalc/common/interface.h index b5300f1a9c9b..13d50be32aea 100644 --- a/ydb/library/yql/public/purecalc/common/interface.h +++ b/ydb/library/yql/public/purecalc/common/interface.h @@ -250,6 +250,9 @@ namespace NYql { /// decision to the platform heuristics. TString BlockEngineSettings; + /// Output stream to dump the compiled and optimized expressions. + IOutputStream* ExprOutputStream; + /// Provider for generic counters which can be used to export statistics from UDFs. NKikimr::NUdf::ICountersProvider* CountersProvider; @@ -323,6 +326,13 @@ namespace NYql { */ TProgramFactoryOptions& SetBlockEngineSettings(TStringBuf blockEngineSettings); + /** + * Set the stream to dump the compiled and optimized expressions. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetExprOutputStream(IOutputStream* exprOutputStream); + /** * Set new counters provider. Passed pointer should stay alive for as long as the processor factory * stays alive. diff --git a/ydb/library/yql/public/purecalc/common/program_factory.cpp b/ydb/library/yql/public/purecalc/common/program_factory.cpp index 266aab1c3778..07b207fa90c4 100644 --- a/ydb/library/yql/public/purecalc/common/program_factory.cpp +++ b/ydb/library/yql/public/purecalc/common/program_factory.cpp @@ -10,6 +10,7 @@ using namespace NYql::NPureCalc; TProgramFactory::TProgramFactory(const TProgramFactoryOptions& options) : Options_(options) + , ExprOutputStream_(Options_.ExprOutputStream) , CountersProvider_(nullptr) { EnsureLoggingInitialized(); @@ -83,6 +84,7 @@ IPullStreamWorkerFactoryPtr TProgramFactory::MakePullStreamWorkerFactory( Modules_, Options_.LLVMSettings, BlockEngineMode_, + ExprOutputStream_, CountersProvider_, mode, syntaxVersion, @@ -111,6 +113,7 @@ IPullListWorkerFactoryPtr TProgramFactory::MakePullListWorkerFactory( Modules_, Options_.LLVMSettings, BlockEngineMode_, + ExprOutputStream_, CountersProvider_, mode, syntaxVersion, @@ -143,6 +146,7 @@ IPushStreamWorkerFactoryPtr TProgramFactory::MakePushStreamWorkerFactory( Modules_, Options_.LLVMSettings, BlockEngineMode_, + ExprOutputStream_, CountersProvider_, mode, syntaxVersion, diff --git a/ydb/library/yql/public/purecalc/common/program_factory.h b/ydb/library/yql/public/purecalc/common/program_factory.h index 9fe7d432b824..44378a3768c4 100644 --- a/ydb/library/yql/public/purecalc/common/program_factory.h +++ b/ydb/library/yql/public/purecalc/common/program_factory.h @@ -24,6 +24,7 @@ namespace NYql { IModuleResolver::TPtr ModuleResolver_; TUserDataTable UserData_; EBlockEngineMode BlockEngineMode_; + IOutputStream* ExprOutputStream_; THashMap Modules_; NKikimr::NUdf::ICountersProvider* CountersProvider_; diff --git a/ydb/library/yql/public/purecalc/common/worker_factory.cpp b/ydb/library/yql/public/purecalc/common/worker_factory.cpp index ee2000c0037a..d3221bd9d477 100644 --- a/ydb/library/yql/public/purecalc/common/worker_factory.cpp +++ b/ydb/library/yql/public/purecalc/common/worker_factory.cpp @@ -39,6 +39,7 @@ TWorkerFactory::TWorkerFactory(TWorkerFactoryOptions options, EProcessorM , UserData_(std::move(options.UserData)) , LLVMSettings_(std::move(options.LLVMSettings)) , BlockEngineMode_(options.BlockEngineMode) + , ExprOutputStream_(options.ExprOutputStream) , CountersProvider_(options.CountersProvider_) , NativeYtTypeFlags_(options.NativeYtTypeFlags_) , DeterministicTimeProviderSeed_(options.DeterministicTimeProviderSeed_) @@ -304,9 +305,19 @@ TExprNode::TPtr TWorkerFactory::Compile( ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "Failed to optimize"; } - if (ETraceLevel::TRACE_DETAIL <= StdDbgLevel()) { - Cdbg << "After optimization:" << Endl; - ConvertToAst(*exprRoot, ExprContext_, 0, true).Root->PrettyPrintTo(Cdbg, TAstPrintFlags::PerLine | TAstPrintFlags::ShortQuote | TAstPrintFlags::AdaptArbitraryContent); + IOutputStream* exprOut = nullptr; + if (ExprOutputStream_) { + exprOut = ExprOutputStream_; + } else if (ETraceLevel::TRACE_DETAIL <= StdDbgLevel()) { + exprOut = &Cdbg; + } + + if (exprOut) { + *exprOut << "After optimization:" << Endl; + ConvertToAst(*exprRoot, ExprContext_, 0, true).Root + ->PrettyPrintTo(*exprOut, TAstPrintFlags::PerLine + | TAstPrintFlags::ShortQuote + | TAstPrintFlags::AdaptArbitraryContent); } return exprRoot; } diff --git a/ydb/library/yql/public/purecalc/common/worker_factory.h b/ydb/library/yql/public/purecalc/common/worker_factory.h index eab658c21355..f241f5a5635d 100644 --- a/ydb/library/yql/public/purecalc/common/worker_factory.h +++ b/ydb/library/yql/public/purecalc/common/worker_factory.h @@ -24,6 +24,7 @@ namespace NYql { const THashMap& Modules; TString LLVMSettings; EBlockEngineMode BlockEngineMode; + IOutputStream* ExprOutputStream; NKikimr::NUdf::ICountersProvider* CountersProvider_; ETranslationMode TranslationMode_; ui16 SyntaxVersion_; @@ -43,6 +44,7 @@ namespace NYql { const THashMap& Modules, TString LLVMSettings, EBlockEngineMode BlockEngineMode, + IOutputStream* ExprOutputStream, NKikimr::NUdf::ICountersProvider* CountersProvider, ETranslationMode translationMode, ui16 syntaxVersion, @@ -61,6 +63,7 @@ namespace NYql { , Modules(Modules) , LLVMSettings(std::move(LLVMSettings)) , BlockEngineMode(BlockEngineMode) + , ExprOutputStream(ExprOutputStream) , CountersProvider_(CountersProvider) , TranslationMode_(translationMode) , SyntaxVersion_(syntaxVersion) @@ -90,6 +93,7 @@ namespace NYql { TVector> UsedColumns_; TString LLVMSettings_; EBlockEngineMode BlockEngineMode_; + IOutputStream* ExprOutputStream_; NKikimr::NUdf::ICountersProvider* CountersProvider_; ui64 NativeYtTypeFlags_; TMaybe DeterministicTimeProviderSeed_; diff --git a/ydb/library/yql/tools/purebench/purebench.cpp b/ydb/library/yql/tools/purebench/purebench.cpp index 00c2305b65a8..c09141e72f96 100644 --- a/ydb/library/yql/tools/purebench/purebench.cpp +++ b/ydb/library/yql/tools/purebench/purebench.cpp @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -33,6 +34,7 @@ int Main(int argc, const char *argv[]) TString udfsDir; TString LLVMSettings; TString blockEngineSettings; + TString exprFile; opts.AddHelpOption(); opts.AddLongOption("ndebug", "should be at first argument, do not show debug info in error output").NoArgument(); opts.AddLongOption('b', "blocks-engine", "Block engine settings").StoreResult(&blockEngineSettings).DefaultValue("disable"); @@ -45,6 +47,8 @@ int Main(int argc, const char *argv[]) opts.AddLongOption("pt", "use PG syntax for test query").NoArgument(); opts.AddLongOption("udfs-dir", "directory with UDFs").StoreResult(&udfsDir).DefaultValue(""); opts.AddLongOption("llvm-settings", "LLVM settings").StoreResult(&LLVMSettings).DefaultValue(""); + opts.AddLongOption("print-expr", "print rebuild AST before execution").NoArgument(); + opts.AddLongOption("expr-file", "print AST to that file instead of stdout").StoreResult(&exprFile); opts.SetFreeArgsMax(0); TOptsParseResult res(&opts, argc, argv); @@ -52,6 +56,17 @@ int Main(int argc, const char *argv[]) factoryOptions.SetUDFsDir(udfsDir); factoryOptions.SetLLVMSettings(LLVMSettings); factoryOptions.SetBlockEngineSettings(blockEngineSettings); + + IOutputStream* exprOut = nullptr; + THolder exprFileHolder; + if (res.Has("print-expr")) { + exprOut = &Cout; + } else if (!exprFile.empty()) { + exprFileHolder.Reset(new TFixedBufferFileOutput(exprFile)); + exprOut = exprFileHolder.Get(); + } + factoryOptions.SetExprOutputStream(exprOut); + auto factory = MakeProgramFactory(factoryOptions); NYT::TNode members{NYT::TNode::CreateList()};