From f68868e639a36dea99187977308fe333011fd61c Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Fri, 14 Apr 2023 16:25:37 +0200 Subject: [PATCH] add profiler support to benchmarks --- benchmark/blas/blas_common.hpp | 49 +++++++++++++++++++-- benchmark/conversions/conversions.cpp | 34 +++++++++++--- benchmark/preconditioner/preconditioner.cpp | 28 +++++++++++- benchmark/solver/solver_common.hpp | 32 ++++++++++++-- benchmark/sparse_blas/sparse_blas.cpp | 31 +++++++++++-- benchmark/spmv/spmv_common.hpp | 29 ++++++++++-- benchmark/utils/general.hpp | 41 +++++++++++++++++ benchmark/utils/generator.hpp | 29 ++++++++++++ core/log/profiler_hook.cpp | 18 +++++++- include/ginkgo/core/log/profiler_hook.hpp | 5 ++- 10 files changed, 272 insertions(+), 24 deletions(-) diff --git a/benchmark/blas/blas_common.hpp b/benchmark/blas/blas_common.hpp index 9c54257e922..082c6edcff1 100644 --- a/benchmark/blas/blas_common.hpp +++ b/benchmark/blas/blas_common.hpp @@ -434,6 +434,28 @@ dimensions parse_dims(rapidjson::Value& test_case) } +std::string describe(rapidjson::Value& test_case) +{ + std::stringstream ss; + auto optional_output = [&](const char* name) { + if (test_case.HasMember(name) && test_case[name].IsInt64()) { + ss << name << " = " << test_case[name].GetInt64() << " "; + } + }; + optional_output("n"); + optional_output("k"); + optional_output("m"); + optional_output("r"); + optional_output("stride"); + optional_output("stride_x"); + optional_output("stride_y"); + optional_output("stride_A"); + optional_output("stride_B"); + optional_output("stride_C"); + return ss.str(); +} + + template void apply_blas(const char* operation_name, std::shared_ptr exec, std::shared_ptr timer, const OpMap& operation_map, @@ -501,6 +523,17 @@ void run_blas_benchmarks(std::shared_ptr exec, { auto operations = split(FLAGS_operations, ','); auto& allocator = test_cases.GetAllocator(); + auto profiler_hook = create_profiler_hook(exec); + if (profiler_hook) { + exec->add_logger(profiler_hook); + } + auto annotate = + [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { + if (profiler_hook) { + return profiler_hook->user_range(name); + } + return {}; + }; for (auto& test_case : test_cases.GetArray()) { try { @@ -521,10 +554,17 @@ void run_blas_benchmarks(std::shared_ptr exec, if (do_print) { std::clog << "Running test case: " << test_case << std::endl; } - + // annotate the test case + // This string needs to outlive `test_case_range` to make sure we + // don't use its const char* c_str() after it was freed. + auto test_case_str = describe(test_case); + auto test_case_range = annotate(test_case_str.c_str()); for (const auto& operation_name : operations) { - apply_blas(operation_name.c_str(), exec, timer, operation_map, - test_case, allocator); + { + auto operation_range = annotate(operation_name.c_str()); + apply_blas(operation_name.c_str(), exec, timer, + operation_map, test_case, allocator); + } if (do_print) { std::clog << "Current state:" << std::endl @@ -538,4 +578,7 @@ void run_blas_benchmarks(std::shared_ptr exec, << std::endl; } } + if (profiler_hook) { + exec->remove_logger(profiler_hook); + } } diff --git a/benchmark/conversions/conversions.cpp b/benchmark/conversions/conversions.cpp index 403f416d375..e0277093c8c 100644 --- a/benchmark/conversions/conversions.cpp +++ b/benchmark/conversions/conversions.cpp @@ -131,6 +131,19 @@ int main(int argc, char* argv[]) } auto& allocator = test_cases.GetAllocator(); + auto profiler_hook = create_profiler_hook(exec); + if (profiler_hook) { + exec->add_logger(profiler_hook); + } + auto annotate = + [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { + if (profiler_hook) { + return profiler_hook->user_range(name); + } + return {}; + }; + + DefaultSystemGenerator<> generator{}; for (auto& test_case : test_cases.GetArray()) { std::clog << "Benchmarking conversions. " << std::endl; @@ -146,7 +159,7 @@ int main(int argc, char* argv[]) std::clog << "Running test case: " << test_case << std::endl; gko::matrix_data data; try { - data = DefaultSystemGenerator<>::generate_matrix_data(test_case); + data = generator.generate_matrix_data(test_case); } catch (std::exception& e) { std::cerr << "Error setting up matrix data, what(): " << e.what() << std::endl; @@ -160,6 +173,11 @@ int main(int argc, char* argv[]) std::clog << "Matrix is of size (" << data.size[0] << ", " << data.size[1] << ")" << std::endl; add_or_set_member(test_case, "size", data.size[0], allocator); + // annotate the test case + // This string needs to outlive `test_case_range` to make sure we + // don't use its const char* c_str() after it was freed. + auto test_case_str = generator.describe_config(test_case); + auto test_case_range = annotate(test_case_str.c_str()); for (const auto& format_from : formats) { try { auto matrix_from = @@ -175,10 +193,13 @@ int main(int argc, char* argv[]) conversion_case.HasMember(conversion_name.c_str())) { continue; } - - convert_matrix(matrix_from.get(), format_to.c_str(), - conversion_name.c_str(), exec, test_case, - allocator); + { + auto conversion_range = + annotate(conversion_name.c_str()); + convert_matrix(matrix_from.get(), format_to.c_str(), + conversion_name.c_str(), exec, test_case, + allocator); + } std::clog << "Current state:" << std::endl << test_cases << std::endl; } @@ -202,6 +223,9 @@ int main(int argc, char* argv[]) } } } + if (profiler_hook) { + exec->remove_logger(profiler_hook); + } std::cout << test_cases << std::endl; } diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp index 46370657a45..c1a2a4ab003 100644 --- a/benchmark/preconditioner/preconditioner.cpp +++ b/benchmark/preconditioner/preconditioner.cpp @@ -286,6 +286,17 @@ int main(int argc, char* argv[]) } auto& allocator = test_cases.GetAllocator(); + auto profiler_hook = create_profiler_hook(exec); + if (profiler_hook) { + exec->add_logger(profiler_hook); + } + auto annotate = + [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { + if (profiler_hook) { + return profiler_hook->user_range(name); + } + return {}; + }; DefaultSystemGenerator<> generator{}; @@ -308,6 +319,12 @@ int main(int argc, char* argv[]) } std::clog << "Running test case: " << test_case << std::endl; + // annotate the test case + // This string needs to outlive `test_case_range` to make sure we + // don't use its const char* c_str() after it was freed. + auto test_case_str = generator.describe_config(test_case); + auto test_case_range = annotate(test_case_str.c_str()); + auto data = generator.generate_matrix_data(test_case); auto system_matrix = @@ -322,8 +339,12 @@ int main(int argc, char* argv[]) << std::endl; add_or_set_member(test_case, "size", data.size[0], allocator); for (const auto& precond_name : preconditioners) { - run_preconditioner(precond_name.c_str(), exec, system_matrix, - b.get(), x.get(), test_case, allocator); + { + auto precond_range = annotate(precond_name.c_str()); + run_preconditioner(precond_name.c_str(), exec, + system_matrix, b.get(), x.get(), + test_case, allocator); + } std::clog << "Current state:" << std::endl << test_cases << std::endl; backup_results(test_cases); @@ -333,6 +354,9 @@ int main(int argc, char* argv[]) << std::endl; } } + if (profiler_hook) { + exec->remove_logger(profiler_hook); + } std::cout << test_cases << std::endl; } diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp index fc10d50050a..b83d04f368f 100644 --- a/benchmark/solver/solver_common.hpp +++ b/benchmark/solver/solver_common.hpp @@ -610,6 +610,17 @@ void run_solver_benchmarks(std::shared_ptr exec, } auto& allocator = test_cases.GetAllocator(); + auto profiler_hook = create_profiler_hook(exec); + if (profiler_hook) { + exec->add_logger(profiler_hook); + } + auto annotate = + [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { + if (profiler_hook) { + return profiler_hook->user_range(name); + } + return {}; + }; for (auto& test_case : test_cases.GetArray()) { try { @@ -628,6 +639,12 @@ void run_solver_benchmarks(std::shared_ptr exec, })) { continue; } + // annotate the test case + // This string needs to outlive `test_case_range` to make sure we + // don't use its const char* c_str() after it was freed. + auto test_case_str = system_generator.describe_config(test_case); + auto test_case_range = annotate(test_case_str.c_str()); + if (do_print) { std::clog << "Running test case: " << test_case << std::endl; } @@ -660,16 +677,20 @@ void run_solver_benchmarks(std::shared_ptr exec, allocator); auto precond_solver_name = begin(precond_solvers); for (const auto& solver_name : solvers) { + auto solver_range = annotate(solver_name.c_str()); for (const auto& precond_name : preconds) { if (do_print) { std::clog << "\tRunning solver: " << *precond_solver_name << std::endl; } - solve_system(solver_name, precond_name, - precond_solver_name->c_str(), exec, timer, - system_matrix, b.get(), x.get(), test_case, - allocator); + { + auto precond_range = annotate(precond_name.c_str()); + solve_system(solver_name, precond_name, + precond_solver_name->c_str(), exec, timer, + system_matrix, b.get(), x.get(), test_case, + allocator); + } if (do_print) { backup_results(test_cases); } @@ -686,6 +707,9 @@ void run_solver_benchmarks(std::shared_ptr exec, } } } + if (profiler_hook) { + exec->remove_logger(profiler_hook); + } } diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp index 2cb1a1f63ea..249d5d77427 100644 --- a/benchmark/sparse_blas/sparse_blas.cpp +++ b/benchmark/sparse_blas/sparse_blas.cpp @@ -166,9 +166,22 @@ int main(int argc, char* argv[]) print_general_information(extra_information); auto& allocator = test_cases.GetAllocator(); + auto profiler_hook = create_profiler_hook(exec); + if (profiler_hook) { + exec->add_logger(profiler_hook); + } + auto annotate = + [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { + if (profiler_hook) { + return profiler_hook->user_range(name); + } + return {}; + }; auto operations = split(FLAGS_operations, ','); + DefaultSystemGenerator<> generator{}; + for (auto& test_case : test_cases.GetArray()) { try { // set up benchmark @@ -180,8 +193,12 @@ int main(int argc, char* argv[]) } auto& sp_blas_case = test_case[benchmark_name]; std::clog << "Running test case: " << test_case << std::endl; - auto data = - DefaultSystemGenerator<>::generate_matrix_data(test_case); + // annotate the test case + // This string needs to outlive `test_case_range` to make sure we + // don't use its const char* c_str() after it was freed. + auto test_case_str = generator.describe_config(test_case); + auto test_case_range = annotate(test_case_str.c_str()); + auto data = generator.generate_matrix_data(test_case); data.ensure_row_major_order(); std::clog << "Matrix is of size (" << data.size[0] << ", " << data.size[1] << "), " << data.nonzeros.size() @@ -196,8 +213,11 @@ int main(int argc, char* argv[]) for (const auto& operation_name : operations) { if (FLAGS_overwrite || !sp_blas_case.HasMember(operation_name.c_str())) { - apply_sparse_blas(operation_name.c_str(), exec, mtx.get(), - sp_blas_case, allocator); + { + auto operation_range = annotate(operation_name.c_str()); + apply_sparse_blas(operation_name.c_str(), exec, + mtx.get(), sp_blas_case, allocator); + } std::clog << "Current state:" << std::endl << test_cases << std::endl; backup_results(test_cases); @@ -215,6 +235,9 @@ int main(int argc, char* argv[]) } } } + if (profiler_hook) { + exec->remove_logger(profiler_hook); + } std::cout << test_cases << std::endl; } diff --git a/benchmark/spmv/spmv_common.hpp b/benchmark/spmv/spmv_common.hpp index 7fe1aad436d..e7aaa683e55 100644 --- a/benchmark/spmv/spmv_common.hpp +++ b/benchmark/spmv/spmv_common.hpp @@ -160,6 +160,17 @@ void run_spmv_benchmark(std::shared_ptr exec, std::shared_ptr timer, bool do_print) { auto& allocator = test_cases.GetAllocator(); + auto profiler_hook = create_profiler_hook(exec); + if (profiler_hook) { + exec->add_logger(profiler_hook); + } + auto annotate = + [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { + if (profiler_hook) { + return profiler_hook->user_range(name); + } + return {}; + }; for (auto& test_case : test_cases.GetArray()) { try { @@ -181,6 +192,12 @@ void run_spmv_benchmark(std::shared_ptr exec, if (do_print) { std::clog << "Running test case: " << test_case << std::endl; } + // annotate the test case + // This string needs to outlive `test_case_range` to make sure we + // don't use its const char* c_str() after it was freed. + auto test_case_str = system_generator.describe_config(test_case); + auto test_case_range = annotate(test_case_str.c_str()); + auto data = system_generator.generate_matrix_data(test_case); auto nrhs = FLAGS_nrhs; @@ -213,9 +230,12 @@ void run_spmv_benchmark(std::shared_ptr exec, exec->synchronize(); } for (const auto& format_name : formats) { - apply_spmv(format_name.c_str(), exec, system_generator, timer, - data, b.get(), x.get(), answer.get(), test_case, - allocator); + { + auto format_range = annotate(format_name.c_str()); + apply_spmv(format_name.c_str(), exec, system_generator, + timer, data, b.get(), x.get(), answer.get(), + test_case, allocator); + } if (do_print) { std::clog << "Current state:" << std::endl << test_cases << std::endl; @@ -246,6 +266,9 @@ void run_spmv_benchmark(std::shared_ptr exec, } } } + if (profiler_hook) { + exec->remove_logger(profiler_hook); + } } #endif // GINKGO_BENCHMARK_SPMV_SPMV_COMMON_HPP diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index dcdb5552ed1..b37999a0e70 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -93,6 +93,15 @@ DEFINE_bool(keep_errors, true, DEFINE_bool(nested_names, false, "If set, separately logs nested operations"); +DEFINE_bool(profile, false, + "If set, enables profiler mode: 1 repetition, 0 warmup " + "repetitions, profiler_hook=auto (if it is not otherwise set)"); + +DEFINE_string( + profiler_hook, "none", + "Which profiler annotation mode to use, if any. Options are nvtx, roctx, " + "vtune, tau, debug, auto (choose based on executor)."); + DEFINE_uint32(seed, 42, "Seed used for the random number generator"); DEFINE_uint32(warmup, 2, "Warm-up repetitions"); @@ -153,6 +162,13 @@ void initialize_argument_parsing(int* argc, char** argv[], std::string& header, ver << gko::version_info::get(); gflags::SetVersionString(ver.str()); gflags::ParseCommandLineFlags(argc, argv, true); + if (FLAGS_profile) { + FLAGS_repetitions = "1"; + FLAGS_warmup = 0; + if (FLAGS_profiler_hook == "none") { + FLAGS_profiler_hook = "auto"; + } + } } /** @@ -181,6 +197,31 @@ void print_general_information(const std::string& extra) } +std::shared_ptr create_profiler_hook( + std::shared_ptr exec) +{ + using gko::log::ProfilerHook; + std::map()>> + hook_map{ + {"none", [] { return std::shared_ptr{}; }}, + {"auto", [&] { return ProfilerHook::create_for_executor(exec); }}, + {"nvtx", [] { return ProfilerHook::create_nvtx(); }}, + {"roctx", [] { return ProfilerHook::create_roctx(); }}, + {"tau", [] { return ProfilerHook::create_tau(); }}, + {"vtune", [] { return ProfilerHook::create_vtune(); }}, + {"debug", [] { + return ProfilerHook::create_custom( + [](const char* name, gko::log::profile_event_category) { + std::clog << "DEBUG: begin " << name << '\n'; + }, + [](const char* name, gko::log::profile_event_category) { + std::clog << "DEBUG: end " << name << '\n'; + }); + }}}; + return hook_map.at(FLAGS_profiler_hook)(); +} + + // Returns a random number engine std::default_random_engine& get_engine() { diff --git a/benchmark/utils/generator.hpp b/benchmark/utils/generator.hpp index f55c51fbf56..5524ad19744 100644 --- a/benchmark/utils/generator.hpp +++ b/benchmark/utils/generator.hpp @@ -67,6 +67,20 @@ struct DefaultSystemGenerator { } } + static std::string describe_config(rapidjson::Value& config) + { + if (config.HasMember("filename")) { + return config.GetString(); + } else if (config.HasMember("stencil")) { + std::stringstream ss; + ss << "stencil(" << config["size"].GetInt64() << "," + << config["stencil"].GetString() << ")"; + return ss.str(); + } else { + throw std::runtime_error("No known way to describe config."); + } + } + static std::shared_ptr generate_matrix_with_optimal_format( std::shared_ptr exec, rapidjson::Value& config) { @@ -175,6 +189,21 @@ struct DistributedDefaultSystemGenerator { } } + std::string describe_config(rapidjson::Value& config) const + { + if (config.HasMember("filename")) { + return config.GetString(); + } else if (config.HasMember("stencil")) { + std::stringstream ss; + ss << "stencil(" << config["size"].GetInt64() << "," + << config["stencil"].GetString() << "," + << config["comm_pattern"].GetString() << ")"; + return ss.str(); + } else { + throw std::runtime_error("No known way to describe config."); + } + } + std::shared_ptr generate_matrix_with_optimal_format( std::shared_ptr exec, rapidjson::Value& config) const { diff --git a/core/log/profiler_hook.cpp b/core/log/profiler_hook.cpp index 8149045af33..1c3f2582a13 100644 --- a/core/log/profiler_hook.cpp +++ b/core/log/profiler_hook.cpp @@ -438,6 +438,11 @@ std::shared_ptr ProfilerHook::create_custom(hook_function begin, } +profiling_scope_guard::profiling_scope_guard() + : empty_{true}, name_{}, category_{profile_event_category::internal} +{} + + /** * Scope guard that annotates its scope with the provided profiler hooks. */ @@ -461,10 +466,19 @@ profiling_scope_guard::profiling_scope_guard(profiling_scope_guard&& other) : empty_{std::exchange(other.empty_, true)}, name_{std::exchange(other.name_, nullptr)}, category_{other.category_}, - end_{ - std::exchange(other.end_, [](const char*, profile_event_category) {})} + end_{std::move(other.end_)} {} +profiling_scope_guard& profiling_scope_guard::operator=( + profiling_scope_guard&& other) +{ + empty_ = std::exchange(other.empty_, true); + name_ = std::exchange(other.name_, nullptr); + category_ = other.category_; + end_ = std::move(other.end_); + return *this; +} + } // namespace log } // namespace gko diff --git a/include/ginkgo/core/log/profiler_hook.hpp b/include/ginkgo/core/log/profiler_hook.hpp index 9d4ff4aee7a..1aa92ecd451 100644 --- a/include/ginkgo/core/log/profiler_hook.hpp +++ b/include/ginkgo/core/log/profiler_hook.hpp @@ -427,6 +427,9 @@ class ProfilerHook : public Logger { */ class profiling_scope_guard { public: + /** Creates an empty (moved-from) scope guard. */ + profiling_scope_guard(); + /** * Creates the scope guard * @@ -450,7 +453,7 @@ class profiling_scope_guard { profiling_scope_guard& operator=(const profiling_scope_guard&) = delete; - profiling_scope_guard& operator=(profiling_scope_guard&&) = delete; + profiling_scope_guard& operator=(profiling_scope_guard&&); private: bool empty_;