Skip to content

Commit

Permalink
add work estimates to profiler_hook logger
Browse files Browse the repository at this point in the history
  • Loading branch information
upsj committed Feb 16, 2025
1 parent 5642f5e commit 10f9a0a
Show file tree
Hide file tree
Showing 5 changed files with 456 additions and 94 deletions.
89 changes: 60 additions & 29 deletions core/log/profiler_hook.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand All @@ -9,6 +9,7 @@
#include <sstream>

#include <ginkgo/core/base/name_demangling.hpp>
#include <ginkgo/core/base/work_estimate.hpp>
#include <ginkgo/core/log/logger.hpp>
#include <ginkgo/core/solver/solver_base.hpp>
#include <ginkgo/core/stop/criterion.hpp>
Expand All @@ -33,7 +34,7 @@ void ProfilerHook::on_allocation_completed(const gko::Executor* exec,
const gko::uintptr&) const
{
this->maybe_synchronize(exec);
this->end_hook_("allocate", profile_event_category::memory);
this->end_hook_("allocate", profile_event_category::memory, {});
}


Expand All @@ -49,7 +50,7 @@ void ProfilerHook::on_free_completed(const gko::Executor* exec,
const gko::uintptr&) const
{
this->maybe_synchronize(exec);
this->end_hook_("free", profile_event_category::memory);
this->end_hook_("free", profile_event_category::memory, {});
}


Expand All @@ -67,11 +68,12 @@ void ProfilerHook::on_copy_started(const gko::Executor* from,
void ProfilerHook::on_copy_completed(const gko::Executor* from,
const gko::Executor* to,
const gko::uintptr&, const gko::uintptr&,
const gko::size_type&) const
const gko::size_type& size) const
{
this->maybe_synchronize(from);
this->maybe_synchronize(to);
this->end_hook_("copy", profile_event_category::operation);
this->end_hook_("copy", profile_event_category::operation,
memory_bound_work_estimate{size, size});
}


Expand All @@ -87,7 +89,8 @@ void ProfilerHook::on_operation_completed(const Executor* exec,
const Operation* operation) const
{
this->maybe_synchronize(exec);
this->end_hook_(operation->get_name(), profile_event_category::operation);
this->end_hook_(operation->get_name(), profile_event_category::operation,
operation->get_work_estimate());
}


Expand All @@ -109,7 +112,7 @@ void ProfilerHook::on_polymorphic_object_copy_completed(
std::stringstream ss;
ss << "copy(" << stringify_object(from) << "," << stringify_object(to)
<< ")";
this->end_hook_(ss.str().c_str(), profile_event_category::object);
this->end_hook_(ss.str().c_str(), profile_event_category::object, {});
}


Expand All @@ -131,7 +134,7 @@ void ProfilerHook::on_polymorphic_object_move_completed(
std::stringstream ss;
ss << "move(" << stringify_object(from) << "," << stringify_object(to)
<< ")";
this->end_hook_(ss.str().c_str(), profile_event_category::object);
this->end_hook_(ss.str().c_str(), profile_event_category::object, {});
}


Expand All @@ -153,9 +156,9 @@ void ProfilerHook::on_linop_apply_completed(const LinOp* A, const LinOp* b,
std::stringstream ss;
ss << "apply(" << stringify_object(A) << ")";
if (dynamic_cast<const solver::IterativeBase*>(A)) {
this->end_hook_("iteration", profile_event_category::solver);
this->end_hook_("iteration", profile_event_category::solver, {});
}
this->end_hook_(ss.str().c_str(), profile_event_category::linop);
this->end_hook_(ss.str().c_str(), profile_event_category::linop, {});
}


Expand Down Expand Up @@ -183,9 +186,9 @@ void ProfilerHook::on_linop_advanced_apply_completed(const LinOp* A,
std::stringstream ss;
ss << "advanced_apply(" << stringify_object(A) << ")";
if (dynamic_cast<const solver::IterativeBase*>(A)) {
this->end_hook_("iteration", profile_event_category::solver);
this->end_hook_("iteration", profile_event_category::solver, {});
}
this->end_hook_(ss.str().c_str(), profile_event_category::linop);
this->end_hook_(ss.str().c_str(), profile_event_category::linop, {});
}


Expand All @@ -203,7 +206,7 @@ void ProfilerHook::on_linop_factory_generate_completed(
{
std::stringstream ss;
ss << "generate(" << stringify_object(factory) << ")";
this->end_hook_(ss.str().c_str(), profile_event_category::factory);
this->end_hook_(ss.str().c_str(), profile_event_category::factory, {});
}


Expand Down Expand Up @@ -240,7 +243,7 @@ void ProfilerHook::on_criterion_check_completed(
{
std::stringstream ss;
ss << "check(" << stringify_object(criterion) << ")";
this->end_hook_(ss.str().c_str(), profile_event_category::criterion);
this->end_hook_(ss.str().c_str(), profile_event_category::criterion, {});
}


Expand All @@ -252,7 +255,7 @@ void ProfilerHook::on_iteration_complete(
{
if (num_iterations > 0 &&
dynamic_cast<const solver::IterativeBase*>(solver) && !stopped) {
this->end_hook_("iteration", profile_event_category::solver);
this->end_hook_("iteration", profile_event_category::solver, {});
this->begin_hook_("iteration", profile_event_category::solver);
}
}
Expand Down Expand Up @@ -292,8 +295,12 @@ void ProfilerHook::set_object_name(ptr_param<const PolymorphicObject> obj,

profiling_scope_guard ProfilerHook::user_range(const char* name) const
{
return profiling_scope_guard{name, profile_event_category::user,
begin_hook_, end_hook_};
return profiling_scope_guard{
name, profile_event_category::user, begin_hook_,
[end_hook = end_hook_](const char* name,
profile_event_category category) {
end_hook(name, category, {});
}};
}


Expand All @@ -303,12 +310,31 @@ void ProfilerHook::set_synchronization(bool synchronize)
}


static ProfilerHook::hook_function wrap_work_estimate_hook_function(
ProfilerHook::work_estimate_hook_function end_hook)
{
return [end_hook](const char* name, profile_event_category category) {
end_hook(name, category, {});
};
}


static ProfilerHook::work_estimate_hook_function wrap_hook_function(
ProfilerHook::hook_function end_hook)
{
return [end_hook](const char* name, profile_event_category category,
std::optional<kernel_work_estimate>) {
end_hook(name, category);
};
}


void ProfilerHook::maybe_synchronize(const Executor* exec) const
{
if (synchronize_) {
profiling_scope_guard sync_guard{"synchronize",
profile_event_category::internal,
begin_hook_, end_hook_};
profiling_scope_guard sync_guard{
"synchronize", profile_event_category::internal, begin_hook_,
wrap_work_estimate_hook_function(end_hook_)};
exec->synchronize();
}
}
Expand All @@ -327,7 +353,7 @@ std::string ProfilerHook::stringify_object(const PolymorphicObject* obj) const
}


ProfilerHook::ProfilerHook(hook_function begin, hook_function end)
ProfilerHook::ProfilerHook(hook_function begin, work_estimate_hook_function end)
: synchronize_{false}, begin_hook_{begin}, end_hook_{end}
{}

Expand Down Expand Up @@ -355,30 +381,27 @@ std::shared_ptr<ProfilerHook> ProfilerHook::create_tau(bool initialize)
new int, tau_finalize_deleter{}};
}
}
return std::shared_ptr<ProfilerHook>{new ProfilerHook{begin_tau, end_tau}};
return create_custom(begin_tau, end_tau);
}


std::shared_ptr<ProfilerHook> ProfilerHook::create_vtune()
{
auto fns = create_vtune_fns();
return std::shared_ptr<ProfilerHook>{
new ProfilerHook{std::move(fns.first), std::move(fns.second)}};
return create_custom(std::move(fns.first), std::move(fns.second));
}


std::shared_ptr<ProfilerHook> ProfilerHook::create_nvtx(uint32 color_rgb)
{
init_nvtx();
return std::shared_ptr<ProfilerHook>{
new ProfilerHook{begin_nvtx_fn(color_rgb), end_nvtx}};
return create_custom(begin_nvtx_fn(color_rgb), end_nvtx);
}


std::shared_ptr<ProfilerHook> ProfilerHook::create_roctx()
{
return std::shared_ptr<ProfilerHook>{
new ProfilerHook{begin_roctx, end_roctx}};
return create_custom(begin_roctx, end_roctx);
}


Expand All @@ -400,10 +423,18 @@ std::shared_ptr<ProfilerHook> ProfilerHook::create_for_executor(
}


std::shared_ptr<ProfilerHook> ProfilerHook::create_custom(
hook_function begin, work_estimate_hook_function end)
{
return std::shared_ptr<ProfilerHook>{
new ProfilerHook{begin, std::move(end)}};
}


std::shared_ptr<ProfilerHook> ProfilerHook::create_custom(hook_function begin,
hook_function end)
{
return std::shared_ptr<ProfilerHook>{new ProfilerHook{begin, end}};
return create_custom(begin, wrap_hook_function(std::move(end)));
}


Expand Down
55 changes: 49 additions & 6 deletions core/log/profiler_hook_summary.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand All @@ -8,6 +8,8 @@
#include <unordered_map>
#include <vector>

#include <ginkgo/core/base/work_estimate.hpp>

#include "core/log/profiler_hook.hpp"


Expand Down Expand Up @@ -95,8 +97,36 @@ void pop_all(Summary& s)
<< "\"\n";
#endif
}
s.pop(s.get_top_name().c_str(), true);
s.pop(s.get_top_name().c_str(), {}, true);
}
}


std::optional<kernel_work_estimate> accumulate_work_estimate(
const char* name, std::optional<kernel_work_estimate> existing,
std::optional<kernel_work_estimate> work, bool first)
{
if (first) {
GKO_ASSERT(!existing.has_value());
return work;
}
if (work.has_value() != existing.has_value()) {
#if GKO_VERBOSE_LEVEL >= 1
// this can happen if some parts use GKO_REGISTER_OPERATION
// and some use GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE
std::cerr << "WARNING: Operation " << name
<< " has inconsistent work estimate - some kernel calls "
"provide an estimate, but others don't.\n";
#endif
}
if (work.has_value() && existing.has_value()) {
// if they have different types, this throws an exception
return *existing + *work;
}
if (existing.has_value()) {
return existing;
}
return {};
}


Expand Down Expand Up @@ -167,7 +197,8 @@ struct summary : summary_base {
overhead += cpu_clock::now() - cpu_now;
}

void pop(const char* name, bool allow_pop_root = false)
void pop(const char* name, std::optional<kernel_work_estimate> work,
bool allow_pop_root = false)
{
const auto cpu_now = cpu_clock::now();
std::lock_guard<std::mutex> guard{mutex};
Expand All @@ -187,6 +218,8 @@ struct summary : summary_base {
const auto elapsed = timer->difference_async(partial_entry.second, now);
release_time_point(std::move(partial_entry.second));
release_time_point(std::move(now));
entry.work_estimate = accumulate_work_estimate(
name, entry.work_estimate, work, entry.count == 0);
entry.count++;
entry.inclusive += elapsed;
entry.exclusive += elapsed;
Expand All @@ -211,6 +244,7 @@ struct nested_summary : summary_base {
int64 parent_id;
std::chrono::nanoseconds elapsed{};
int64 count{};
std::optional<kernel_work_estimate> work_estimate{};

entry(int64 name_id, int64 node_id, int64 parent_id)
: name_id{name_id}, node_id{node_id}, parent_id{parent_id}
Expand Down Expand Up @@ -291,7 +325,8 @@ struct nested_summary : summary_base {
overhead += cpu_clock::now() - cpu_now;
}

void pop(const char* name, bool allow_pop_root = false)
void pop(const char* name, std::optional<kernel_work_estimate> work,
bool allow_pop_root = false)
{
const auto cpu_now = cpu_clock::now();
std::lock_guard<std::mutex> guard{mutex};
Expand All @@ -311,6 +346,8 @@ struct nested_summary : summary_base {
const auto elapsed = timer->difference_async(partial_entry.start, now);
release_time_point(std::move(partial_entry.start));
release_time_point(std::move(now));
node.work_estimate = accumulate_work_estimate(name, node.work_estimate,
work, node.count == 0);
node.count++;
node.elapsed += elapsed;
const auto cpu_now4 = cpu_clock::now();
Expand Down Expand Up @@ -387,7 +424,10 @@ std::shared_ptr<ProfilerHook> ProfilerHook::create_summary(
data->check_nesting = debug_check_nesting;
return std::shared_ptr<ProfilerHook>{new ProfilerHook{
[data](const char* name, profile_event_category) { data->push(name); },
[data](const char* name, profile_event_category) { data->pop(name); }}};
[data](const char* name, profile_event_category,
std::optional<kernel_work_estimate> work) {
data->pop(name, work);
}}};
}


Expand All @@ -409,7 +449,10 @@ std::shared_ptr<ProfilerHook> ProfilerHook::create_nested_summary(
data->check_nesting = debug_check_nesting;
return std::shared_ptr<ProfilerHook>{new ProfilerHook{
[data](const char* name, profile_event_category) { data->push(name); },
[data](const char* name, profile_event_category) { data->pop(name); }}};
[data](const char* name, profile_event_category,
std::optional<kernel_work_estimate> work) {
data->pop(name, work);
}}};
}


Expand Down
Loading

0 comments on commit 10f9a0a

Please sign in to comment.