From 39125dd8e7801deb88d945ab801ceed0d625c642 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 10 Aug 2023 20:34:04 +0200 Subject: [PATCH 1/3] allow specifying allocator for benchmarks --- benchmark/utils/general.hpp | 54 +++++++++++++++++++++++++++++--- core/device_hooks/cuda_hooks.cpp | 4 +++ core/device_hooks/hip_hooks.cpp | 3 ++ 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index 5c6d849fe36..335ed687002 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -58,6 +59,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + + #include "benchmark/utils/json.hpp" #include "benchmark/utils/timer.hpp" #include "benchmark/utils/types.hpp" @@ -69,6 +73,10 @@ DEFINE_string(executor, "reference", "The executor used to run the benchmarks, one of: reference, " "omp, cuda, hip"); +DEFINE_string(allocator, "default", + "The allocator used in the executor. Only relevant for CUDA and " + "HIP executors, one of: default, async, host, unified"); + DEFINE_uint32(device_id, 0, "ID of the device where to run the code"); DEFINE_bool(overwrite, false, @@ -329,6 +337,40 @@ void backup_results(rapidjson::Document& results) } +inline std::shared_ptr create_cuda_allocator() +{ + std::string flag{FLAGS_allocator}; + if (flag == "default") { + return std::make_shared(); + } else if (flag == "async") { + return std::make_shared(nullptr); + } else if (flag == "unified") { + return std::make_shared(FLAGS_device_id); + } else if (flag == "host") { + return std::make_shared(FLAGS_device_id); + } else { + throw std::runtime_error{"Unknown allocator type " + flag}; + } +} + + +inline std::shared_ptr create_hip_allocator() +{ + std::string flag{FLAGS_allocator}; + if (flag == "default") { + return std::make_shared(); + } else if (flag == "async") { + return std::make_shared(nullptr); + } else if (flag == "unified") { + return std::make_shared(FLAGS_device_id); + } else if (flag == "host") { + return std::make_shared(FLAGS_device_id); + } else { + throw std::runtime_error{"Unknown allocator type " + flag}; + } +} + + // executor mapping const std::map(bool)>> executor_factory{ @@ -337,12 +379,14 @@ const std::map(bool)>> {"cuda", [](bool) { return gko::CudaExecutor::create(FLAGS_device_id, - gko::OmpExecutor::create()); + gko::OmpExecutor::create(), + create_cuda_allocator()); }}, {"hip", [](bool) { return gko::HipExecutor::create(FLAGS_device_id, - gko::OmpExecutor::create()); + gko::OmpExecutor::create(), + create_hip_allocator()); }}, {"dpcpp", [](bool use_gpu_timer) { auto property = dpcpp_queue_property::in_order; @@ -369,14 +413,16 @@ const std::map Date: Thu, 10 Aug 2023 21:03:25 +0200 Subject: [PATCH 2/3] fix CUDA_VERSION availability --- cuda/base/memory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda/base/memory.cpp b/cuda/base/memory.cpp index f605d9135ea..b5bfb14ac74 100644 --- a/cuda/base/memory.cpp +++ b/cuda/base/memory.cpp @@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include From 21b4cd602f27cec0a1541db59c2ff2e9bbf85c08 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 10 Aug 2023 21:46:26 +0200 Subject: [PATCH 3/3] increase repetitions for sparse_blas --- benchmark/sparse_blas/sparse_blas.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp index cfa56ef81fe..d906e9f9e12 100644 --- a/benchmark/sparse_blas/sparse_blas.cpp +++ b/benchmark/sparse_blas/sparse_blas.cpp @@ -127,9 +127,12 @@ void apply_sparse_blas(const char* operation_name, allocator); auto gen_logger = create_operations_logger( FLAGS_gpu_timer, FLAGS_nested_names, exec, - test_case[operation_name]["components"], allocator, 1); + test_case[operation_name]["components"], allocator, + repetitions); exec->add_logger(gen_logger); - op->run(); + for (unsigned i = 0; i < repetitions; i++) { + op->run(); + } exec->remove_logger(gen_logger); } op->write_stats(test_case[operation_name], allocator);