From 97aa3076245715b6274596d84442e3e4778d6638 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sun, 16 Feb 2025 20:34:10 +0100 Subject: [PATCH] add work estimate framework --- core/CMakeLists.txt | 1 + core/base/work_estimate.cpp | 68 ++++++++ core/components/prefix_sum_kernels.hpp | 16 +- core/matrix/csr.cpp | 6 +- core/matrix/dense.cpp | 11 +- core/matrix/dense_kernels.hpp | 50 +++++- core/matrix/ell.cpp | 6 +- core/matrix/hybrid.cpp | 6 +- core/matrix/sellp.cpp | 6 +- include/ginkgo/core/base/executor.hpp | 179 ++++++++++++++------- include/ginkgo/core/base/work_estimate.hpp | 63 ++++++++ 11 files changed, 337 insertions(+), 75 deletions(-) create mode 100644 core/base/work_estimate.cpp create mode 100644 include/ginkgo/core/base/work_estimate.hpp diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 54ce8fb59e6..abe7c8877a8 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -42,6 +42,7 @@ target_sources(${ginkgo_core} base/segmented_array.cpp base/timer.cpp base/version.cpp + base/work_estimate.cpp components/range_minimum_query.cpp config/config.cpp config/config_helper.cpp diff --git a/core/base/work_estimate.cpp b/core/base/work_estimate.cpp new file mode 100644 index 00000000000..29a80d1dc47 --- /dev/null +++ b/core/base/work_estimate.cpp @@ -0,0 +1,68 @@ +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include + + +namespace gko { + + +compute_bound_work_estimate operator+(compute_bound_work_estimate a, + compute_bound_work_estimate b) +{ + return {a.flops + b.flops}; +} + + +compute_bound_work_estimate& compute_bound_work_estimate::operator+=( + compute_bound_work_estimate other) +{ + *this = *this + other; + return *this; +} + + +memory_bound_work_estimate operator+(memory_bound_work_estimate a, + memory_bound_work_estimate b) +{ + return {a.bytes_read + b.bytes_read, a.bytes_written + b.bytes_written}; +} + + +memory_bound_work_estimate& memory_bound_work_estimate::operator+=( + memory_bound_work_estimate other) +{ + *this = *this + other; + return *this; +} + + +custom_work_estimate operator+(custom_work_estimate a, custom_work_estimate b) +{ + GKO_ASSERT(a.operation_count_name == b.operation_count_name); + return {a.operation_count_name, a.operations + b.operations}; +} + + +custom_work_estimate& custom_work_estimate::operator+=( + custom_work_estimate other) +{ + *this = *this + other; + return *this; +} + + +kernel_work_estimate operator+(kernel_work_estimate a, kernel_work_estimate b) +{ + // this fails with std::bad_variant_access if the two estimates are of + // different types + return std::visit( + [b](auto a) -> kernel_work_estimate { + return a + std::get(b); + }, + a); +} + + +} // namespace gko diff --git a/core/components/prefix_sum_kernels.hpp b/core/components/prefix_sum_kernels.hpp index 8b68b54e29f..c04b92e5bfb 100644 --- a/core/components/prefix_sum_kernels.hpp +++ b/core/components/prefix_sum_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -10,6 +10,7 @@ #include #include +#include #include "core/base/kernel_declaration.hpp" @@ -53,6 +54,19 @@ GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(components, #undef GKO_DECLARE_ALL_AS_TEMPLATES +namespace work_estimate::components { + + +template +kernel_work_estimate prefix_sum_nonnegative(IndexType* counts, + size_type num_entries) +{ + return memory_bound_work_estimate{num_entries * sizeof(IndexType), + num_entries * sizeof(IndexType)}; +} + + +} // namespace work_estimate::components } // namespace kernels } // namespace gko diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index ca418241bf9..83ab9573c56 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -83,8 +83,8 @@ GKO_REGISTER_OPERATION(is_sorted_by_column_index, csr::is_sorted_by_column_index); GKO_REGISTER_OPERATION(extract_diagonal, csr::extract_diagonal); GKO_REGISTER_OPERATION(fill_array, components::fill_array); -GKO_REGISTER_OPERATION(prefix_sum_nonnegative, - components::prefix_sum_nonnegative); +GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(prefix_sum_nonnegative, + components::prefix_sum_nonnegative); GKO_REGISTER_OPERATION(inplace_absolute_array, components::inplace_absolute_array); GKO_REGISTER_OPERATION(outplace_absolute_array, diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 308b5e8f11e..b0d10e153bc 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -40,17 +40,18 @@ namespace dense { namespace { -GKO_REGISTER_OPERATION(simple_apply, dense::simple_apply); +GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(simple_apply, dense::simple_apply); GKO_REGISTER_OPERATION(apply, dense::apply); -GKO_REGISTER_OPERATION(copy, dense::copy); -GKO_REGISTER_OPERATION(fill, dense::fill); +GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(copy, dense::copy); +GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(fill, dense::fill); GKO_REGISTER_OPERATION(scale, dense::scale); GKO_REGISTER_OPERATION(inv_scale, dense::inv_scale); GKO_REGISTER_OPERATION(add_scaled, dense::add_scaled); GKO_REGISTER_OPERATION(sub_scaled, dense::sub_scaled); GKO_REGISTER_OPERATION(add_scaled_diag, dense::add_scaled_diag); GKO_REGISTER_OPERATION(sub_scaled_diag, dense::sub_scaled_diag); -GKO_REGISTER_OPERATION(compute_dot, dense::compute_dot_dispatch); +GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(compute_dot, + dense::compute_dot_dispatch); GKO_REGISTER_OPERATION(compute_conj_dot, dense::compute_conj_dot_dispatch); GKO_REGISTER_OPERATION(compute_norm2, dense::compute_norm2_dispatch); GKO_REGISTER_OPERATION(compute_norm1, dense::compute_norm1); diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 7422b431aa0..62de611433a 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -476,6 +477,53 @@ GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(dense, GKO_DECLARE_ALL_AS_TEMPLATES); #undef GKO_DECLARE_ALL_AS_TEMPLATES +namespace work_estimate { +namespace dense { + + +template +kernel_work_estimate simple_apply(const matrix::Dense* a, + const matrix::Dense* b, + matrix::Dense* c) +{ + const auto a_rows = a->get_size()[0]; + const auto a_cols = a->get_size()[1]; + const auto b_cols = b->get_size()[1]; + return compute_bound_work_estimate{2 * a_rows * a_cols * b_cols}; +} + + +template +kernel_work_estimate copy(const matrix::Dense* input, + matrix::Dense* output) +{ + const auto memsize = input->get_size()[0] * input->get_size()[1]; + return memory_bound_work_estimate{memsize * sizeof(InValueType), + memsize * sizeof(OutValueType)}; +} + + +template +kernel_work_estimate fill(matrix::Dense* mat, ValueType value) +{ + return memory_bound_work_estimate{ + 0, mat->get_size()[0] * mat->get_size()[1] * sizeof(ValueType)}; +} + + +template +kernel_work_estimate compute_dot_dispatch(const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result, + array& tmp) +{ + const auto num_elements = x->get_size()[0] * x->get_size()[1]; + return memory_bound_work_estimate{2 * num_elements * sizeof(ValueType), 0}; +} + + +} // namespace dense +} // namespace work_estimate } // namespace kernels } // namespace gko diff --git a/core/matrix/ell.cpp b/core/matrix/ell.cpp index da166ba541e..b6951088fe7 100644 --- a/core/matrix/ell.cpp +++ b/core/matrix/ell.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -42,8 +42,8 @@ GKO_REGISTER_OPERATION(convert_to_csr, ell::convert_to_csr); GKO_REGISTER_OPERATION(count_nonzeros_per_row, ell::count_nonzeros_per_row); GKO_REGISTER_OPERATION(extract_diagonal, ell::extract_diagonal); GKO_REGISTER_OPERATION(fill_array, components::fill_array); -GKO_REGISTER_OPERATION(prefix_sum_nonnegative, - components::prefix_sum_nonnegative); +GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(prefix_sum_nonnegative, + components::prefix_sum_nonnegative); GKO_REGISTER_OPERATION(inplace_absolute_array, components::inplace_absolute_array); GKO_REGISTER_OPERATION(outplace_absolute_array, diff --git a/core/matrix/hybrid.cpp b/core/matrix/hybrid.cpp index 4b36b7115ac..39e6aff8dd6 100644 --- a/core/matrix/hybrid.cpp +++ b/core/matrix/hybrid.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -42,8 +42,8 @@ GKO_REGISTER_OPERATION(compute_coo_row_ptrs, hybrid::compute_coo_row_ptrs); GKO_REGISTER_OPERATION(convert_idxs_to_ptrs, components::convert_idxs_to_ptrs); GKO_REGISTER_OPERATION(convert_to_csr, hybrid::convert_to_csr); GKO_REGISTER_OPERATION(fill_array, components::fill_array); -GKO_REGISTER_OPERATION(prefix_sum_nonnegative, - components::prefix_sum_nonnegative); +GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(prefix_sum_nonnegative, + components::prefix_sum_nonnegative); GKO_REGISTER_OPERATION(inplace_absolute_array, components::inplace_absolute_array); GKO_REGISTER_OPERATION(outplace_absolute_array, diff --git a/core/matrix/sellp.cpp b/core/matrix/sellp.cpp index d4cff180295..cde12bfb71b 100644 --- a/core/matrix/sellp.cpp +++ b/core/matrix/sellp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -31,8 +31,8 @@ namespace { GKO_REGISTER_OPERATION(spmv, sellp::spmv); GKO_REGISTER_OPERATION(advanced_spmv, sellp::advanced_spmv); GKO_REGISTER_OPERATION(convert_idxs_to_ptrs, components::convert_idxs_to_ptrs); -GKO_REGISTER_OPERATION(prefix_sum_nonnegative, - components::prefix_sum_nonnegative); +GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(prefix_sum_nonnegative, + components::prefix_sum_nonnegative); GKO_REGISTER_OPERATION(compute_slice_sets, sellp::compute_slice_sets); GKO_REGISTER_OPERATION(fill_in_matrix_data, sellp::fill_in_matrix_data); GKO_REGISTER_OPERATION(fill_in_dense, sellp::fill_in_dense); diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 224860b72b7..0fc12523cdd 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -273,6 +274,16 @@ class Operation { * @return the operation's name */ virtual const char* get_name() const noexcept; + + /** + * Returns a work estimate for this operation. + * + * @return a work estimate for this operation, if available + */ + virtual std::optional get_work_estimate() const + { + return {}; + } }; @@ -295,7 +306,8 @@ class RegisteredOperation : public Operation { * Creates a RegisteredOperation object from a functor and a name. * * @param name the name to be used for this operation - * @param op a functor object which will be called with the executor. + * @param num_params the number of parameters of the operation + * @param op a functor object which will be called with the executor */ RegisteredOperation(const char* name, Closure op) : name_(name), op_(std::move(op)) @@ -334,6 +346,34 @@ class RegisteredOperation : public Operation { }; +template +class RegisteredOperationWithWorkEstimate + : public RegisteredOperation { +public: + /** + * Creates a RegisteredOperationWithWorkEstimate object from an operation + * and estimate functor and a name. + * + * @param name the name to be used for this operation + * @param op a functor object which will be called with the executor + * @param estimate a functor object which will provide the work estimate + */ + RegisteredOperationWithWorkEstimate(const char* name, OperationClosure op, + EstimateClosure estimate) + : RegisteredOperation{name, std::move(op)}, + estimate_{std::move(estimate)} + {} + + std::optional get_work_estimate() const override + { + return estimate_(); + } + +private: + EstimateClosure estimate_; +}; + + template RegisteredOperation make_register_operation(const char* name, Closure op) @@ -342,9 +382,64 @@ RegisteredOperation make_register_operation(const char* name, } +template +RegisteredOperationWithWorkEstimate +make_register_operation_with_estimate(const char* name, int num_params, + OperationClosure op, + EstimateClosure estimate) +{ + return RegisteredOperationWithWorkEstimate{ + name, std::move(op), std::move(estimate)}; +} + + } // namespace detail +/** + * Internal helper macro that provides the generic lambda to dispatch a kernel. + */ +#define GKO_REGISTER_OPERATION_GENERIC_LAMBDA(_name, _kernel, _args) \ + [&_args...](auto exec) { \ + using exec_type = decltype(exec); \ + if (std::is_same< \ + exec_type, \ + std::shared_ptr>::value) { \ + ::gko::kernels::reference::_kernel( \ + std::dynamic_pointer_cast( \ + exec), \ + std::forward(args)...); \ + } else if (std::is_same< \ + exec_type, \ + std::shared_ptr>::value) { \ + ::gko::kernels::omp::_kernel( \ + std::dynamic_pointer_cast(exec), \ + std::forward(args)...); \ + } else if (std::is_same< \ + exec_type, \ + std::shared_ptr>::value) { \ + ::gko::kernels::cuda::_kernel( \ + std::dynamic_pointer_cast(exec), \ + std::forward(args)...); \ + } else if (std::is_same< \ + exec_type, \ + std::shared_ptr>::value) { \ + ::gko::kernels::hip::_kernel( \ + std::dynamic_pointer_cast(exec), \ + std::forward(args)...); \ + } else if (std::is_same< \ + exec_type, \ + std::shared_ptr>::value) { \ + ::gko::kernels::dpcpp::_kernel( \ + std::dynamic_pointer_cast(exec), \ + std::forward(args)...); \ + } else { \ + GKO_NOT_IMPLEMENTED; \ + } \ + } + + /** * Binds a set of device-specific kernels to an Operation. * @@ -416,60 +511,16 @@ RegisteredOperation make_register_operation(const char* name, * * @ingroup Executor */ -#define GKO_REGISTER_OPERATION(_name, _kernel) \ - template \ - auto make_##_name(Args&&... args) \ - { \ - return ::gko::detail::make_register_operation( \ - #_kernel, [&args...](auto exec) { \ - using exec_type = decltype(exec); \ - if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ - ::gko::kernels::reference::_kernel( \ - std::dynamic_pointer_cast< \ - const ::gko::ReferenceExecutor>(exec), \ - std::forward(args)...); \ - } else if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ - ::gko::kernels::omp::_kernel( \ - std::dynamic_pointer_cast( \ - exec), \ - std::forward(args)...); \ - } else if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ - ::gko::kernels::cuda::_kernel( \ - std::dynamic_pointer_cast( \ - exec), \ - std::forward(args)...); \ - } else if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ - ::gko::kernels::hip::_kernel( \ - std::dynamic_pointer_cast( \ - exec), \ - std::forward(args)...); \ - } else if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ - ::gko::kernels::dpcpp::_kernel( \ - std::dynamic_pointer_cast( \ - exec), \ - std::forward(args)...); \ - } else { \ - GKO_NOT_IMPLEMENTED; \ - } \ - }); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ +#define GKO_REGISTER_OPERATION(_name, _kernel) \ + template \ + auto make_##_name(Args&&... args) \ + { \ + return ::gko::detail::make_register_operation( \ + #_kernel, \ + GKO_REGISTER_OPERATION_GENERIC_LAMBDA(_name, _kernel, args)); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ "semi-colon warnings") @@ -523,6 +574,22 @@ RegisteredOperation make_register_operation(const char* name, "semi-colon warnings") +#define GKO_REGISTER_OPERATION_WITH_WORK_ESTIMATE(_name, _kernel) \ + template \ + auto make_##_name(Args&&... args) \ + { \ + return ::gko::detail::make_register_operation_with_estimate( \ + #_kernel, sizeof...(Args), \ + GKO_REGISTER_OPERATION_GENERIC_LAMBDA(_name, _kernel, args), [&] { \ + return ::gko::kernels::work_estimate::_kernel( \ + std::forward(args)...); \ + }); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + + #define GKO_DECLARE_EXECUTOR_FRIEND(_type, ...) friend class _type /** diff --git a/include/ginkgo/core/base/work_estimate.hpp b/include/ginkgo/core/base/work_estimate.hpp new file mode 100644 index 00000000000..b97d2d76c30 --- /dev/null +++ b/include/ginkgo/core/base/work_estimate.hpp @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#ifndef GKO_PUBLIC_CORE_BASE_WORK_ESTIMATE_HPP_ +#define GKO_PUBLIC_CORE_BASE_WORK_ESTIMATE_HPP_ + + +#include + +#include + + +namespace gko { + + +/** Work estimate for a kernel that is likely compute-bound. */ +struct compute_bound_work_estimate { + size_type flops; + + friend compute_bound_work_estimate operator+(compute_bound_work_estimate a, + compute_bound_work_estimate b); + + compute_bound_work_estimate& operator+=(compute_bound_work_estimate other); +}; + + +/** Work estimate for a kernel that is likely memory-bound. */ +struct memory_bound_work_estimate { + size_type bytes_read; + size_type bytes_written; + + friend memory_bound_work_estimate operator+(memory_bound_work_estimate a, + memory_bound_work_estimate b); + + memory_bound_work_estimate& operator+=(memory_bound_work_estimate other); +}; + + +/** Work estimate based on a custom operation count. */ +struct custom_work_estimate { + std::string operation_count_name; + size_type operations; + + friend custom_work_estimate operator+(custom_work_estimate a, + custom_work_estimate b); + + custom_work_estimate& operator+=(custom_work_estimate other); +}; + + +using kernel_work_estimate = + std::variant; + + +kernel_work_estimate operator+(kernel_work_estimate a, kernel_work_estimate b); + + +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_BASE_WORK_ESTIMATE_HPP_