From 5c0330efeb65380c87269d519e1dcfe11e9abfc0 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 7 Dec 2022 10:34:55 +0100 Subject: [PATCH 1/4] Remove GPU to PU binding for HIP and CUDA --- include/ginkgo/core/base/executor.hpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 7623411d657..c535a7b3f00 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -1526,10 +1526,13 @@ class CudaExecutor : public detail::ExecutorBase, this->get_exec_info().num_pu_per_cu = 0; this->CudaExecutor::populate_exec_info( machine_topology::get_instance()); - if (this->get_exec_info().closest_pu_ids.size()) { - machine_topology::get_instance()->bind_to_pus( - this->get_closest_pus()); - } + // FIXME: Binding GPU to the closest pus seems to have significant + // slowdowns on some systems + // if (this->get_exec_info().closest_pu_ids.size()) { + // machine_topology::get_instance()->bind_to_pus( + // this->get_closest_pus()); + // } + // it only gets attribute from device, so it should not be affected by // DeviceReset. this->set_gpu_property(); @@ -1732,10 +1735,13 @@ class HipExecutor : public detail::ExecutorBase, this->get_exec_info().num_computing_units = 0; this->get_exec_info().num_pu_per_cu = 0; this->HipExecutor::populate_exec_info(machine_topology::get_instance()); - if (this->get_exec_info().closest_pu_ids.size()) { - machine_topology::get_instance()->bind_to_pus( - this->get_closest_pus()); - } + // FIXME: Binding GPU to the closest pus seems to have significant + // slowdowns on some systems + // if (this->get_exec_info().closest_pu_ids.size()) { + // machine_topology::get_instance()->bind_to_pus( + // this->get_closest_pus()); + // } + // it only gets attribute from device, so it should not be affected by // DeviceReset. this->set_gpu_property(); From 635fec4d00be6377786a2856194a4c6542a0c9a1 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 7 Dec 2022 10:35:19 +0100 Subject: [PATCH 2/4] Add itercount param for dist example --- examples/distributed-solver/distributed-solver.cpp | 2 +- include/ginkgo/core/base/executor.hpp | 12 ------------ 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/examples/distributed-solver/distributed-solver.cpp b/examples/distributed-solver/distributed-solver.cpp index d9a1050f32d..527dd189f07 100644 --- a/examples/distributed-solver/distributed-solver.cpp +++ b/examples/distributed-solver/distributed-solver.cpp @@ -216,7 +216,7 @@ int main(int argc, char* argv[]) auto Ainv = solver::build() .with_criteria( - gko::stop::Iteration::build().with_max_iters(100u).on(exec), + gko::stop::Iteration::build().with_max_iters(num_rows).on(exec), gko::stop::ResidualNorm::build() .with_baseline(gko::stop::mode::absolute) .with_reduction_factor(1e-4) diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index c535a7b3f00..7f9689bda97 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -1526,12 +1526,6 @@ class CudaExecutor : public detail::ExecutorBase, this->get_exec_info().num_pu_per_cu = 0; this->CudaExecutor::populate_exec_info( machine_topology::get_instance()); - // FIXME: Binding GPU to the closest pus seems to have significant - // slowdowns on some systems - // if (this->get_exec_info().closest_pu_ids.size()) { - // machine_topology::get_instance()->bind_to_pus( - // this->get_closest_pus()); - // } // it only gets attribute from device, so it should not be affected by // DeviceReset. @@ -1735,12 +1729,6 @@ class HipExecutor : public detail::ExecutorBase, this->get_exec_info().num_computing_units = 0; this->get_exec_info().num_pu_per_cu = 0; this->HipExecutor::populate_exec_info(machine_topology::get_instance()); - // FIXME: Binding GPU to the closest pus seems to have significant - // slowdowns on some systems - // if (this->get_exec_info().closest_pu_ids.size()) { - // machine_topology::get_instance()->bind_to_pus( - // this->get_closest_pus()); - // } // it only gets attribute from device, so it should not be affected by // DeviceReset. From b50f2d56f357f62e5f046b9b8d23b2bcbefff901 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 8 Dec 2022 07:45:09 +0100 Subject: [PATCH 3/4] Remove binding to closest PU altogether. --- examples/distributed-solver/distributed-solver.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/distributed-solver/distributed-solver.cpp b/examples/distributed-solver/distributed-solver.cpp index 527dd189f07..e579689f9e5 100644 --- a/examples/distributed-solver/distributed-solver.cpp +++ b/examples/distributed-solver/distributed-solver.cpp @@ -104,6 +104,8 @@ int main(int argc, char* argv[]) const auto executor_string = argc >= 2 ? argv[1] : "reference"; const auto grid_dim = static_cast(argc >= 3 ? std::atoi(argv[2]) : 100); + const auto num_iters = + static_cast(argc >= 4 ? std::atoi(argv[3]) : 1000); // Pick the requested executor. std::map()>> @@ -216,7 +218,8 @@ int main(int argc, char* argv[]) auto Ainv = solver::build() .with_criteria( - gko::stop::Iteration::build().with_max_iters(num_rows).on(exec), + gko::stop::Iteration::build().with_max_iters(num_iters).on( + exec), gko::stop::ResidualNorm::build() .with_baseline(gko::stop::mode::absolute) .with_reduction_factor(1e-4) From 1cfd83cd0ef89a8e8faa6267749e34b77d906086 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 8 Dec 2022 20:22:49 +0100 Subject: [PATCH 4/4] Fix doc for additional param --- examples/distributed-solver/distributed-solver.cpp | 3 ++- examples/distributed-solver/doc/intro.dox | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/distributed-solver/distributed-solver.cpp b/examples/distributed-solver/distributed-solver.cpp index e579689f9e5..809f47a7159 100644 --- a/examples/distributed-solver/distributed-solver.cpp +++ b/examples/distributed-solver/distributed-solver.cpp @@ -91,7 +91,8 @@ int main(int argc, char* argv[]) if (argc == 2 && (std::string(argv[1]) == "--help")) { if (rank == 0) { std::cerr << "Usage: " << argv[0] - << " [executor] [num_grid_points] " << std::endl; + << " [executor] [num_grid_points] [num_iterations] " + << std::endl; } std::exit(-1); } diff --git a/examples/distributed-solver/doc/intro.dox b/examples/distributed-solver/doc/intro.dox index 4f5e6532b6f..da8f7cb13aa 100644 --- a/examples/distributed-solver/doc/intro.dox +++ b/examples/distributed-solver/doc/intro.dox @@ -2,7 +2,7 @@

Introduction

This distributed solver example should help you understand the basics of using Ginkgo in a distributed setting. The example will solve a simple 1D Laplace equation where the system can be distributed row-wise to multiple processes. -To run the solver with multiple processes, use `mpirun -n NUM_PROCS ./distributed-solver [executor] [num_grid_points]`. +To run the solver with multiple processes, use `mpirun -n NUM_PROCS ./distributed-solver [executor] [num_grid_points] [num_iterations]`. If you are using GPU devices, please make sure that you run this example with at most as many processes as you have GPU devices available.