From 56fbc58470ccddb3861e807e9f801985f2682028 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Wed, 15 Jan 2025 13:41:33 +0100 Subject: [PATCH 1/5] add distributed mg example --- examples/CMakeLists.txt | 2 +- .../CMakeLists.txt | 10 + ...ibuted-multigrid-preconditioned-solver.cpp | 246 ++++++++++++++++++ .../doc/builds-on | 1 + .../doc/intro.dox | 9 + .../doc/kind | 1 + .../doc/results.dox | 18 ++ .../doc/short-intro | 1 + .../doc/tooltip | 1 + 9 files changed, 288 insertions(+), 1 deletion(-) create mode 100644 examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt create mode 100644 examples/distributed-multigrid-preconditioned-solver/distributed-multigrid-preconditioned-solver.cpp create mode 100644 examples/distributed-multigrid-preconditioned-solver/doc/builds-on create mode 100644 examples/distributed-multigrid-preconditioned-solver/doc/intro.dox create mode 100644 examples/distributed-multigrid-preconditioned-solver/doc/kind create mode 100644 examples/distributed-multigrid-preconditioned-solver/doc/results.dox create mode 100644 examples/distributed-multigrid-preconditioned-solver/doc/short-intro create mode 100644 examples/distributed-multigrid-preconditioned-solver/doc/tooltip diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index d547ffe83cc..d58b9a59356 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -60,7 +60,7 @@ if(GINKGO_HAVE_PAPI_SDE) endif() if(GINKGO_BUILD_MPI) - list(APPEND EXAMPLES_LIST distributed-solver) + list(APPEND EXAMPLES_LIST distributed-solver distributed-multigrid-preconditioned-solver) endif() find_package(Kokkos 4.1.00 QUIET) diff --git a/examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt b/examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt new file mode 100644 index 00000000000..0de5e4cd44a --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.16) +project(distributed-multigrid-preconditioned-solver) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.10.0 REQUIRED) +endif() + +add_executable(distributed-multigrid-preconditioned-solver distributed-multigrid-preconditioned-solver.cpp) +target_link_libraries(distributed-multigrid-preconditioned-solver Ginkgo::ginkgo) diff --git a/examples/distributed-multigrid-preconditioned-solver/distributed-multigrid-preconditioned-solver.cpp b/examples/distributed-multigrid-preconditioned-solver/distributed-multigrid-preconditioned-solver.cpp new file mode 100644 index 00000000000..66f05ca911a --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver/distributed-multigrid-preconditioned-solver.cpp @@ -0,0 +1,246 @@ +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +// @sect3{Include files} + +// This is the main ginkgo header file. +#include + +// Add the C++ iostream header to output information to the console. +#include +// Add the STL map header for the executor selection +#include +// Add the string manipulation header to handle strings. +#include + + +int main(int argc, char* argv[]) +{ + // @sect3{Initialize the MPI environment} + // Since this is an MPI program, we need to initialize and finalize + // MPI at the begin and end respectively of our program. This can be easily + // done with the following helper construct that uses RAII to automate the + // initialization and finalization. + const gko::experimental::mpi::environment env(argc, argv); + // @sect3{Type Definitions} + // Define the needed types. In a parallel program we need to differentiate + // between global and local indices, thus we have two index types. + using GlobalIndexType = gko::int64; + using LocalIndexType = gko::int32; + // The underlying value type. + using ValueType = double; + // As vector type we use the following, which implements a subset of @ref + // gko::matrix::Dense. + using dist_vec = gko::experimental::distributed::Vector; + // As matrix type we simply use the following type, which can read + // distributed data and be applied to a distributed vector. + using dist_mtx = + gko::experimental::distributed::Matrix; + // We still need a localized vector type to be used as scalars in the + // advanced apply operations. + using vec = gko::matrix::Dense; + // The partition type describes how the rows of the matrices are + // distributed. + using part_type = + gko::experimental::distributed::Partition; + // We can use here the same solver type as you would use in a + // non-distributed program. Please note that not all solvers support + // distributed systems at the moment. + using solver = gko::solver::Cg; + using schwarz = gko::experimental::distributed::preconditioner::Schwarz< + ValueType, LocalIndexType, GlobalIndexType>; + using bj = gko::preconditioner::Jacobi; + using mg = gko::solver::Multigrid; + using pgm = gko::multigrid::Pgm; + + // Create an MPI communicator get the rank of the calling process. + const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD); + const auto rank = comm.rank(); + + // @sect3{User Input Handling} + // User input settings: + // - The executor, defaults to reference. + // - The number of grid points, defaults to 100. + // - The number of iterations, defaults to 1000. + if (argc == 2 && (std::string(argv[1]) == "--help")) { + if (rank == 0) { + std::cerr << "Usage: " << argv[0] + << " [executor] [num_grid_points] [num_iterations] " + << std::endl; + } + std::exit(-1); + } + + ValueType t_init = gko::experimental::mpi::get_walltime(); + + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + const auto grid_dim = + static_cast(argc >= 3 ? std::atoi(argv[2]) : 100); + const auto num_iters = + static_cast(argc >= 4 ? std::atoi(argv[3]) : 1000); + + const std::map(MPI_Comm)>> + executor_factory_mpi{ + {"reference", + [](MPI_Comm) { return gko::ReferenceExecutor::create(); }}, + {"omp", [](MPI_Comm) { return gko::OmpExecutor::create(); }}, + {"cuda", + [](MPI_Comm comm) { + int device_id = gko::experimental::mpi::map_rank_to_device_id( + comm, gko::CudaExecutor::get_num_devices()); + return gko::CudaExecutor::create( + device_id, gko::ReferenceExecutor::create()); + }}, + {"hip", + [](MPI_Comm comm) { + int device_id = gko::experimental::mpi::map_rank_to_device_id( + comm, gko::HipExecutor::get_num_devices()); + return gko::HipExecutor::create( + device_id, gko::ReferenceExecutor::create()); + }}, + {"dpcpp", [](MPI_Comm comm) { + int device_id = 0; + if (gko::DpcppExecutor::get_num_devices("gpu")) { + device_id = gko::experimental::mpi::map_rank_to_device_id( + comm, gko::DpcppExecutor::get_num_devices("gpu")); + } else if (gko::DpcppExecutor::get_num_devices("cpu")) { + device_id = gko::experimental::mpi::map_rank_to_device_id( + comm, gko::DpcppExecutor::get_num_devices("cpu")); + } else { + throw std::runtime_error("No suitable DPC++ devices"); + } + return gko::DpcppExecutor::create( + device_id, gko::ReferenceExecutor::create()); + }}}; + + auto exec = executor_factory_mpi.at(executor_string)(MPI_COMM_WORLD); + + // @sect3{Creating the Distributed Matrix and Vectors} + // As a first step, we create a partition of the rows. The partition + // consists of ranges of consecutive rows which are assigned a part-id. + // These part-ids will be used for the distributed data structures to + // determine which rows will be stored locally. In this example each rank + // has (nearly) the same number of rows, so we can use the following + // specialized constructor. See @ref gko::distributed::Partition for other + // modes of creating a partition. + const auto num_rows = grid_dim; + auto partition = gko::share(part_type::build_from_global_size_uniform( + exec->get_master(), comm.size(), + static_cast(num_rows))); + + // Assemble the matrix using a 3-pt stencil and fill the right-hand-side + // with a sine value. The distributed matrix supports only constructing an + // empty matrix of zero size and filling in the values with + // gko::experimental::distributed::Matrix::read_distributed. Only the data + // that belongs to the rows by this rank will be assembled. + gko::matrix_data A_data; + gko::matrix_data b_data; + gko::matrix_data x_data; + A_data.size = {num_rows, num_rows}; + b_data.size = {num_rows, 1}; + x_data.size = {num_rows, 1}; + const auto range_start = partition->get_range_bounds()[rank]; + const auto range_end = partition->get_range_bounds()[rank + 1]; + for (int i = range_start; i < range_end; i++) { + if (i > 0) { + A_data.nonzeros.emplace_back(i, i - 1, -1); + } + A_data.nonzeros.emplace_back(i, i, 2); + if (i < grid_dim - 1) { + A_data.nonzeros.emplace_back(i, i + 1, -1); + } + b_data.nonzeros.emplace_back(i, 0, std::sin(i * 0.01)); + x_data.nonzeros.emplace_back(i, 0, gko::zero()); + } + + // Take timings. + comm.synchronize(); + ValueType t_init_end = gko::experimental::mpi::get_walltime(); + + // Read the matrix data, currently this is only supported on CPU executors. + // This will also set up the communication pattern needed for the + // distributed matrix-vector multiplication. + auto A_host = gko::share(dist_mtx::create(exec->get_master(), comm)); + auto x_host = dist_vec::create(exec->get_master(), comm); + auto b_host = dist_vec::create(exec->get_master(), comm); + A_host->read_distributed(A_data, partition); + b_host->read_distributed(b_data, partition); + x_host->read_distributed(x_data, partition); + // After reading, the matrix and vector can be moved to the chosen executor, + // since the distributed matrix supports SpMV also on devices. + auto A = gko::share(dist_mtx::create(exec, comm)); + auto x = dist_vec::create(exec, comm); + auto b = dist_vec::create(exec, comm); + A->copy_from(A_host); + b->copy_from(b_host); + x->copy_from(x_host); + + // Take timings. + comm.synchronize(); + ValueType t_read_setup_end = gko::experimental::mpi::get_walltime(); + + + // @sect3{Solve the Distributed System} + // Generate the solver + + // Setup the multigrid factory with default setting + // It uses Schwarz Jacobi as smoother and GMRES as coarse solver + auto mg_factory = gko::share( + mg::build() + .with_mg_level(pgm::build().with_deterministic(true)) + .with_criteria(gko::stop::Iteration::build().with_max_iters(1u)) + .on(exec)); + + // Setup the stopping criterion and logger + const gko::remove_complex reduction_factor{1e-8}; + std::shared_ptr> logger = + gko::log::Convergence::create(); + auto Ainv = solver::build() + .with_preconditioner(mg_factory) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(num_iters), + gko::stop::ResidualNorm::build() + .with_reduction_factor(reduction_factor)) + .on(exec) + ->generate(A); + // Add logger to the generated solver to log the iteration count and + // residual norm + Ainv->add_logger(logger); + + // Take timings. + comm.synchronize(); + ValueType t_solver_generate_end = gko::experimental::mpi::get_walltime(); + + // Apply the distributed solver, this is the same as in the non-distributed + // case. + Ainv->apply(b, x); + + // Take timings. + comm.synchronize(); + ValueType t_end = gko::experimental::mpi::get_walltime(); + + // Get the residual. + auto res_norm = gko::clone(exec->get_master(), + gko::as(logger->get_residual_norm())); + + // @sect3{Printing Results} + // Print the achieved residual norm and timings on rank 0. + if (comm.rank() == 0) { + // clang-format off + std::cout << "\nNum rows in matrix: " << num_rows + << "\nNum ranks: " << comm.size() + << "\nFinal Res norm: " << res_norm->at(0, 0) + << "\nIteration count: " << logger->get_num_iterations() + << "\nInit time: " << t_init_end - t_init + << "\nRead time: " << t_read_setup_end - t_init + << "\nSolver generate time: " << t_solver_generate_end - t_read_setup_end + << "\nSolver apply time: " << t_end - t_solver_generate_end + << "\nTotal time: " << t_end - t_init + << std::endl; + // clang-format on + } +} diff --git a/examples/distributed-multigrid-preconditioned-solver/doc/builds-on b/examples/distributed-multigrid-preconditioned-solver/doc/builds-on new file mode 100644 index 00000000000..f70ab1608ec --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver/doc/builds-on @@ -0,0 +1 @@ +distributed-solver diff --git a/examples/distributed-multigrid-preconditioned-solver/doc/intro.dox b/examples/distributed-multigrid-preconditioned-solver/doc/intro.dox new file mode 100644 index 00000000000..79cfef82425 --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver/doc/intro.dox @@ -0,0 +1,9 @@ + +

Introduction

+This distributed multigrid preconditioned solver example should help you understand using Ginkgo multigrid in a distributed setting. +The example will solve a simple 1D Laplace equation where the system can be distributed row-wise to multiple processes. +Note. Because the stencil is configured equal weighted, the coarsening method does not perform well on this kind of problem. +To run the solver with multiple processes, use `mpirun -n NUM_PROCS ./distributed-solver [executor] [num_grid_points] [num_iterations]`. + +If you are using GPU devices, please make sure that you run this example with at most as many processes as you have GPU +devices available. diff --git a/examples/distributed-multigrid-preconditioned-solver/doc/kind b/examples/distributed-multigrid-preconditioned-solver/doc/kind new file mode 100644 index 00000000000..196aa616342 --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver/doc/kind @@ -0,0 +1 @@ +distributed diff --git a/examples/distributed-multigrid-preconditioned-solver/doc/results.dox b/examples/distributed-multigrid-preconditioned-solver/doc/results.dox new file mode 100644 index 00000000000..2a263e8396b --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver/doc/results.dox @@ -0,0 +1,18 @@ +

Results

+This is the expected output for `mpirun -n 4 ./distributed-multigrid-preconditioned-solver`: + +@code{.cpp} + +Num rows in matrix: 100 +Num ranks: 4 +Final Res norm: 1.87487e-08 +Iteration count: 23 +Init time: 0.000153159 +Read time: 0.000563957 +Solver generate time: 0.000484838 +Solver apply time: 0.073985 +Total time: 0.0750338 + +@endcode + +The timings may vary depending on the machine. diff --git a/examples/distributed-multigrid-preconditioned-solver/doc/short-intro b/examples/distributed-multigrid-preconditioned-solver/doc/short-intro new file mode 100644 index 00000000000..bf52c346411 --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver/doc/short-intro @@ -0,0 +1 @@ +The distributed multigrid preconditioned solver example. diff --git a/examples/distributed-multigrid-preconditioned-solver/doc/tooltip b/examples/distributed-multigrid-preconditioned-solver/doc/tooltip new file mode 100644 index 00000000000..3e6cc291852 --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver/doc/tooltip @@ -0,0 +1 @@ +Solves a distributed linear system. From ef51441d9ad74fbc1b07696fd377dd9913aedf23 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Wed, 15 Jan 2025 13:42:29 +0100 Subject: [PATCH 2/5] add the customized distributed mg --- examples/CMakeLists.txt | 2 +- .../CMakeLists.txt | 10 + ...igrid-preconditioned-solver-customized.cpp | 260 ++++++++++++++++++ .../doc/builds-on | 1 + .../doc/intro.dox | 9 + .../doc/kind | 1 + .../doc/results.dox | 18 ++ .../doc/short-intro | 1 + .../doc/tooltip | 1 + 9 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 examples/distributed-multigrid-preconditioned-solver-customized/CMakeLists.txt create mode 100644 examples/distributed-multigrid-preconditioned-solver-customized/distributed-multigrid-preconditioned-solver-customized.cpp create mode 100644 examples/distributed-multigrid-preconditioned-solver-customized/doc/builds-on create mode 100644 examples/distributed-multigrid-preconditioned-solver-customized/doc/intro.dox create mode 100644 examples/distributed-multigrid-preconditioned-solver-customized/doc/kind create mode 100644 examples/distributed-multigrid-preconditioned-solver-customized/doc/results.dox create mode 100644 examples/distributed-multigrid-preconditioned-solver-customized/doc/short-intro create mode 100644 examples/distributed-multigrid-preconditioned-solver-customized/doc/tooltip diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index d58b9a59356..f891426cbf7 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -60,7 +60,7 @@ if(GINKGO_HAVE_PAPI_SDE) endif() if(GINKGO_BUILD_MPI) - list(APPEND EXAMPLES_LIST distributed-solver distributed-multigrid-preconditioned-solver) + list(APPEND EXAMPLES_LIST distributed-solver distributed-multigrid-preconditioned-solver distributed-multigrid-preconditioned-solver-customized) endif() find_package(Kokkos 4.1.00 QUIET) diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/CMakeLists.txt b/examples/distributed-multigrid-preconditioned-solver-customized/CMakeLists.txt new file mode 100644 index 00000000000..b4b06f119a4 --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver-customized/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.16) +project(distributed-multigrid-preconditioned-solver-customized) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.10.0 REQUIRED) +endif() + +add_executable(distributed-multigrid-preconditioned-solver-customized distributed-multigrid-preconditioned-solver-customized.cpp) +target_link_libraries(distributed-multigrid-preconditioned-solver-customized Ginkgo::ginkgo) diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/distributed-multigrid-preconditioned-solver-customized.cpp b/examples/distributed-multigrid-preconditioned-solver-customized/distributed-multigrid-preconditioned-solver-customized.cpp new file mode 100644 index 00000000000..757e99adde0 --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver-customized/distributed-multigrid-preconditioned-solver-customized.cpp @@ -0,0 +1,260 @@ +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +// @sect3{Include files} + +// This is the main ginkgo header file. +#include + +// Add the C++ iostream header to output information to the console. +#include +// Add the STL map header for the executor selection +#include +// Add the string manipulation header to handle strings. +#include + + +int main(int argc, char* argv[]) +{ + // @sect3{Initialize the MPI environment} + // Since this is an MPI program, we need to initialize and finalize + // MPI at the begin and end respectively of our program. This can be easily + // done with the following helper construct that uses RAII to automate the + // initialization and finalization. + const gko::experimental::mpi::environment env(argc, argv); + // @sect3{Type Definitions} + // Define the needed types. In a parallel program we need to differentiate + // between global and local indices, thus we have two index types. + using GlobalIndexType = gko::int64; + using LocalIndexType = gko::int32; + // The underlying value type. + using ValueType = double; + // As vector type we use the following, which implements a subset of @ref + // gko::matrix::Dense. + using dist_vec = gko::experimental::distributed::Vector; + // As matrix type we simply use the following type, which can read + // distributed data and be applied to a distributed vector. + using dist_mtx = + gko::experimental::distributed::Matrix; + // We still need a localized vector type to be used as scalars in the + // advanced apply operations. + using vec = gko::matrix::Dense; + // The partition type describes how the rows of the matrices are + // distributed. + using part_type = + gko::experimental::distributed::Partition; + // We can use here the same solver type as you would use in a + // non-distributed program. Please note that not all solvers support + // distributed systems at the moment. + using solver = gko::solver::Cg; + using schwarz = gko::experimental::distributed::preconditioner::Schwarz< + ValueType, LocalIndexType, GlobalIndexType>; + using bj = gko::preconditioner::Jacobi; + using mg = gko::solver::Multigrid; + using pgm = gko::multigrid::Pgm; + + // Create an MPI communicator get the rank of the calling process. + const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD); + const auto rank = comm.rank(); + + // @sect3{User Input Handling} + // User input settings: + // - The executor, defaults to reference. + // - The number of grid points, defaults to 100. + // - The number of iterations, defaults to 1000. + if (argc == 2 && (std::string(argv[1]) == "--help")) { + if (rank == 0) { + std::cerr << "Usage: " << argv[0] + << " [executor] [num_grid_points] [num_iterations] " + << std::endl; + } + std::exit(-1); + } + + ValueType t_init = gko::experimental::mpi::get_walltime(); + + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + const auto grid_dim = + static_cast(argc >= 3 ? std::atoi(argv[2]) : 100); + const auto num_iters = + static_cast(argc >= 4 ? std::atoi(argv[3]) : 1000); + + const std::map(MPI_Comm)>> + executor_factory_mpi{ + {"reference", + [](MPI_Comm) { return gko::ReferenceExecutor::create(); }}, + {"omp", [](MPI_Comm) { return gko::OmpExecutor::create(); }}, + {"cuda", + [](MPI_Comm comm) { + int device_id = gko::experimental::mpi::map_rank_to_device_id( + comm, gko::CudaExecutor::get_num_devices()); + return gko::CudaExecutor::create( + device_id, gko::ReferenceExecutor::create()); + }}, + {"hip", + [](MPI_Comm comm) { + int device_id = gko::experimental::mpi::map_rank_to_device_id( + comm, gko::HipExecutor::get_num_devices()); + return gko::HipExecutor::create( + device_id, gko::ReferenceExecutor::create()); + }}, + {"dpcpp", [](MPI_Comm comm) { + int device_id = 0; + if (gko::DpcppExecutor::get_num_devices("gpu")) { + device_id = gko::experimental::mpi::map_rank_to_device_id( + comm, gko::DpcppExecutor::get_num_devices("gpu")); + } else if (gko::DpcppExecutor::get_num_devices("cpu")) { + device_id = gko::experimental::mpi::map_rank_to_device_id( + comm, gko::DpcppExecutor::get_num_devices("cpu")); + } else { + throw std::runtime_error("No suitable DPC++ devices"); + } + return gko::DpcppExecutor::create( + device_id, gko::ReferenceExecutor::create()); + }}}; + + auto exec = executor_factory_mpi.at(executor_string)(MPI_COMM_WORLD); + + // @sect3{Creating the Distributed Matrix and Vectors} + // As a first step, we create a partition of the rows. The partition + // consists of ranges of consecutive rows which are assigned a part-id. + // These part-ids will be used for the distributed data structures to + // determine which rows will be stored locally. In this example each rank + // has (nearly) the same number of rows, so we can use the following + // specialized constructor. See @ref gko::distributed::Partition for other + // modes of creating a partition. + const auto num_rows = grid_dim; + auto partition = gko::share(part_type::build_from_global_size_uniform( + exec->get_master(), comm.size(), + static_cast(num_rows))); + + // Assemble the matrix using a 3-pt stencil and fill the right-hand-side + // with a sine value. The distributed matrix supports only constructing an + // empty matrix of zero size and filling in the values with + // gko::experimental::distributed::Matrix::read_distributed. Only the data + // that belongs to the rows by this rank will be assembled. + gko::matrix_data A_data; + gko::matrix_data b_data; + gko::matrix_data x_data; + A_data.size = {num_rows, num_rows}; + b_data.size = {num_rows, 1}; + x_data.size = {num_rows, 1}; + const auto range_start = partition->get_range_bounds()[rank]; + const auto range_end = partition->get_range_bounds()[rank + 1]; + for (int i = range_start; i < range_end; i++) { + if (i > 0) { + A_data.nonzeros.emplace_back(i, i - 1, -1); + } + A_data.nonzeros.emplace_back(i, i, 2); + if (i < grid_dim - 1) { + A_data.nonzeros.emplace_back(i, i + 1, -1); + } + b_data.nonzeros.emplace_back(i, 0, std::sin(i * 0.01)); + x_data.nonzeros.emplace_back(i, 0, gko::zero()); + } + + // Take timings. + comm.synchronize(); + ValueType t_init_end = gko::experimental::mpi::get_walltime(); + + // Read the matrix data, currently this is only supported on CPU executors. + // This will also set up the communication pattern needed for the + // distributed matrix-vector multiplication. + auto A_host = gko::share(dist_mtx::create(exec->get_master(), comm)); + auto x_host = dist_vec::create(exec->get_master(), comm); + auto b_host = dist_vec::create(exec->get_master(), comm); + A_host->read_distributed(A_data, partition); + b_host->read_distributed(b_data, partition); + x_host->read_distributed(x_data, partition); + // After reading, the matrix and vector can be moved to the chosen executor, + // since the distributed matrix supports SpMV also on devices. + auto A = gko::share(dist_mtx::create(exec, comm)); + auto x = dist_vec::create(exec, comm); + auto b = dist_vec::create(exec, comm); + A->copy_from(A_host); + b->copy_from(b_host); + x->copy_from(x_host); + + // Take timings. + comm.synchronize(); + ValueType t_read_setup_end = gko::experimental::mpi::get_walltime(); + + + // @sect3{Solve the Distributed System} + // Generate the solver + + // Setup the multigrid factory with customized smoother and coarse solver + // Because BlockJacobi does not support distributed matrix, we need wrap it + // in Schwarz. + auto schwarz_bj_factory = + gko::share(schwarz::build().with_local_solver(bj::build()).on(exec)); + auto smoother_factory = gko::share(gko::solver::build_smoother( + schwarz_bj_factory, 2u, static_cast(0.9))); + // Cg supports distributed matrix, so we can use it as we did in + // non-distributed case + auto coarsest_factory = gko::share( + solver::build() + .with_criteria(gko::stop::Iteration::build().with_max_iters(4u)) + .on(exec)); + // It uses Schwarz Jacobi as smoother and GMRES as coarse solver + auto mg_factory = gko::share( + mg::build() + .with_mg_level(pgm::build().with_deterministic(true)) + .with_pre_smoother(smoother_factory) + .with_coarsest_solver(coarsest_factory) + .with_criteria(gko::stop::Iteration::build().with_max_iters(1u)) + .on(exec)); + + // Setup the stopping criterion and logger + const gko::remove_complex reduction_factor{1e-8}; + std::shared_ptr> logger = + gko::log::Convergence::create(); + auto Ainv = solver::build() + .with_preconditioner(mg_factory) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(num_iters), + gko::stop::ResidualNorm::build() + .with_reduction_factor(reduction_factor)) + .on(exec) + ->generate(A); + // Add logger to the generated solver to log the iteration count and + // residual norm + Ainv->add_logger(logger); + + // Take timings. + comm.synchronize(); + ValueType t_solver_generate_end = gko::experimental::mpi::get_walltime(); + + // Apply the distributed solver, this is the same as in the non-distributed + // case. + Ainv->apply(b, x); + + // Take timings. + comm.synchronize(); + ValueType t_end = gko::experimental::mpi::get_walltime(); + + // Get the residual. + auto res_norm = gko::clone(exec->get_master(), + gko::as(logger->get_residual_norm())); + + // @sect3{Printing Results} + // Print the achieved residual norm and timings on rank 0. + if (comm.rank() == 0) { + // clang-format off + std::cout << "\nNum rows in matrix: " << num_rows + << "\nNum ranks: " << comm.size() + << "\nFinal Res norm: " << res_norm->at(0, 0) + << "\nIteration count: " << logger->get_num_iterations() + << "\nInit time: " << t_init_end - t_init + << "\nRead time: " << t_read_setup_end - t_init + << "\nSolver generate time: " << t_solver_generate_end - t_read_setup_end + << "\nSolver apply time: " << t_end - t_solver_generate_end + << "\nTotal time: " << t_end - t_init + << std::endl; + // clang-format on + } +} diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/doc/builds-on b/examples/distributed-multigrid-preconditioned-solver-customized/doc/builds-on new file mode 100644 index 00000000000..e4cd339e18d --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver-customized/doc/builds-on @@ -0,0 +1 @@ +distributed-multigrid-preconditioned-solver diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/doc/intro.dox b/examples/distributed-multigrid-preconditioned-solver-customized/doc/intro.dox new file mode 100644 index 00000000000..17c45bc7403 --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver-customized/doc/intro.dox @@ -0,0 +1,9 @@ + +

Introduction

+This distributed multigrid preconditioned solver example should help you understand customizing Ginkgo multigrid in a distributed setting. +The example will solve a simple 1D Laplace equation where the system can be distributed row-wise to multiple processes. +Note. Because the stencil is configured equal weighted, the coarsening method does not perform well on this kind of problem. +To run the solver with multiple processes, use `mpirun -n NUM_PROCS ./distributed-solver [executor] [num_grid_points] [num_iterations]`. + +If you are using GPU devices, please make sure that you run this example with at most as many processes as you have GPU +devices available. diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/doc/kind b/examples/distributed-multigrid-preconditioned-solver-customized/doc/kind new file mode 100644 index 00000000000..196aa616342 --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver-customized/doc/kind @@ -0,0 +1 @@ +distributed diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/doc/results.dox b/examples/distributed-multigrid-preconditioned-solver-customized/doc/results.dox new file mode 100644 index 00000000000..1a54ada9ae1 --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver-customized/doc/results.dox @@ -0,0 +1,18 @@ +

Results

+This is the expected output for `mpirun -n 4 ./distributed-multigrid-preconditioned-solver-customized`: + +@code{.cpp} + +Num rows in matrix: 100 +Num ranks: 4 +Final Res norm: 1.61045e-08 +Iteration count: 18 +Init time: 0.000117699 +Read time: 0.000522518 +Solver generate time: 0.000430548 +Solver apply time: 0.00183804 +Total time: 0.00279111 + +@endcode + +The timings may vary depending on the machine. diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/doc/short-intro b/examples/distributed-multigrid-preconditioned-solver-customized/doc/short-intro new file mode 100644 index 00000000000..443031b3e39 --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver-customized/doc/short-intro @@ -0,0 +1 @@ +The distributed multigrid preconditioned solver with customized components example. diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/doc/tooltip b/examples/distributed-multigrid-preconditioned-solver-customized/doc/tooltip new file mode 100644 index 00000000000..3e6cc291852 --- /dev/null +++ b/examples/distributed-multigrid-preconditioned-solver-customized/doc/tooltip @@ -0,0 +1 @@ +Solves a distributed linear system. From 5d9d5798ff9c863efe9069fe2cb5392aef05adfb Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Tue, 21 Jan 2025 10:23:19 +0100 Subject: [PATCH 3/5] only keep the distributed mg customized example --- examples/CMakeLists.txt | 2 +- .../doc/builds-on | 2 +- .../CMakeLists.txt | 10 - ...ibuted-multigrid-preconditioned-solver.cpp | 246 ------------------ .../doc/builds-on | 1 - .../doc/intro.dox | 9 - .../doc/kind | 1 - .../doc/results.dox | 18 -- .../doc/short-intro | 1 - .../doc/tooltip | 1 - 10 files changed, 2 insertions(+), 289 deletions(-) delete mode 100644 examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt delete mode 100644 examples/distributed-multigrid-preconditioned-solver/distributed-multigrid-preconditioned-solver.cpp delete mode 100644 examples/distributed-multigrid-preconditioned-solver/doc/builds-on delete mode 100644 examples/distributed-multigrid-preconditioned-solver/doc/intro.dox delete mode 100644 examples/distributed-multigrid-preconditioned-solver/doc/kind delete mode 100644 examples/distributed-multigrid-preconditioned-solver/doc/results.dox delete mode 100644 examples/distributed-multigrid-preconditioned-solver/doc/short-intro delete mode 100644 examples/distributed-multigrid-preconditioned-solver/doc/tooltip diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index f891426cbf7..e76b1c308fb 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -60,7 +60,7 @@ if(GINKGO_HAVE_PAPI_SDE) endif() if(GINKGO_BUILD_MPI) - list(APPEND EXAMPLES_LIST distributed-solver distributed-multigrid-preconditioned-solver distributed-multigrid-preconditioned-solver-customized) + list(APPEND EXAMPLES_LIST distributed-solver distributed-multigrid-preconditioned-solver-customized) endif() find_package(Kokkos 4.1.00 QUIET) diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/doc/builds-on b/examples/distributed-multigrid-preconditioned-solver-customized/doc/builds-on index e4cd339e18d..f70ab1608ec 100644 --- a/examples/distributed-multigrid-preconditioned-solver-customized/doc/builds-on +++ b/examples/distributed-multigrid-preconditioned-solver-customized/doc/builds-on @@ -1 +1 @@ -distributed-multigrid-preconditioned-solver +distributed-solver diff --git a/examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt b/examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt deleted file mode 100644 index 0de5e4cd44a..00000000000 --- a/examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -cmake_minimum_required(VERSION 3.16) -project(distributed-multigrid-preconditioned-solver) - -# We only need to find Ginkgo if we build this example stand-alone -if (NOT GINKGO_BUILD_EXAMPLES) - find_package(Ginkgo 1.10.0 REQUIRED) -endif() - -add_executable(distributed-multigrid-preconditioned-solver distributed-multigrid-preconditioned-solver.cpp) -target_link_libraries(distributed-multigrid-preconditioned-solver Ginkgo::ginkgo) diff --git a/examples/distributed-multigrid-preconditioned-solver/distributed-multigrid-preconditioned-solver.cpp b/examples/distributed-multigrid-preconditioned-solver/distributed-multigrid-preconditioned-solver.cpp deleted file mode 100644 index 66f05ca911a..00000000000 --- a/examples/distributed-multigrid-preconditioned-solver/distributed-multigrid-preconditioned-solver.cpp +++ /dev/null @@ -1,246 +0,0 @@ -// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors -// -// SPDX-License-Identifier: BSD-3-Clause - -// @sect3{Include files} - -// This is the main ginkgo header file. -#include - -// Add the C++ iostream header to output information to the console. -#include -// Add the STL map header for the executor selection -#include -// Add the string manipulation header to handle strings. -#include - - -int main(int argc, char* argv[]) -{ - // @sect3{Initialize the MPI environment} - // Since this is an MPI program, we need to initialize and finalize - // MPI at the begin and end respectively of our program. This can be easily - // done with the following helper construct that uses RAII to automate the - // initialization and finalization. - const gko::experimental::mpi::environment env(argc, argv); - // @sect3{Type Definitions} - // Define the needed types. In a parallel program we need to differentiate - // between global and local indices, thus we have two index types. - using GlobalIndexType = gko::int64; - using LocalIndexType = gko::int32; - // The underlying value type. - using ValueType = double; - // As vector type we use the following, which implements a subset of @ref - // gko::matrix::Dense. - using dist_vec = gko::experimental::distributed::Vector; - // As matrix type we simply use the following type, which can read - // distributed data and be applied to a distributed vector. - using dist_mtx = - gko::experimental::distributed::Matrix; - // We still need a localized vector type to be used as scalars in the - // advanced apply operations. - using vec = gko::matrix::Dense; - // The partition type describes how the rows of the matrices are - // distributed. - using part_type = - gko::experimental::distributed::Partition; - // We can use here the same solver type as you would use in a - // non-distributed program. Please note that not all solvers support - // distributed systems at the moment. - using solver = gko::solver::Cg; - using schwarz = gko::experimental::distributed::preconditioner::Schwarz< - ValueType, LocalIndexType, GlobalIndexType>; - using bj = gko::preconditioner::Jacobi; - using mg = gko::solver::Multigrid; - using pgm = gko::multigrid::Pgm; - - // Create an MPI communicator get the rank of the calling process. - const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD); - const auto rank = comm.rank(); - - // @sect3{User Input Handling} - // User input settings: - // - The executor, defaults to reference. - // - The number of grid points, defaults to 100. - // - The number of iterations, defaults to 1000. - if (argc == 2 && (std::string(argv[1]) == "--help")) { - if (rank == 0) { - std::cerr << "Usage: " << argv[0] - << " [executor] [num_grid_points] [num_iterations] " - << std::endl; - } - std::exit(-1); - } - - ValueType t_init = gko::experimental::mpi::get_walltime(); - - const auto executor_string = argc >= 2 ? argv[1] : "reference"; - const auto grid_dim = - static_cast(argc >= 3 ? std::atoi(argv[2]) : 100); - const auto num_iters = - static_cast(argc >= 4 ? std::atoi(argv[3]) : 1000); - - const std::map(MPI_Comm)>> - executor_factory_mpi{ - {"reference", - [](MPI_Comm) { return gko::ReferenceExecutor::create(); }}, - {"omp", [](MPI_Comm) { return gko::OmpExecutor::create(); }}, - {"cuda", - [](MPI_Comm comm) { - int device_id = gko::experimental::mpi::map_rank_to_device_id( - comm, gko::CudaExecutor::get_num_devices()); - return gko::CudaExecutor::create( - device_id, gko::ReferenceExecutor::create()); - }}, - {"hip", - [](MPI_Comm comm) { - int device_id = gko::experimental::mpi::map_rank_to_device_id( - comm, gko::HipExecutor::get_num_devices()); - return gko::HipExecutor::create( - device_id, gko::ReferenceExecutor::create()); - }}, - {"dpcpp", [](MPI_Comm comm) { - int device_id = 0; - if (gko::DpcppExecutor::get_num_devices("gpu")) { - device_id = gko::experimental::mpi::map_rank_to_device_id( - comm, gko::DpcppExecutor::get_num_devices("gpu")); - } else if (gko::DpcppExecutor::get_num_devices("cpu")) { - device_id = gko::experimental::mpi::map_rank_to_device_id( - comm, gko::DpcppExecutor::get_num_devices("cpu")); - } else { - throw std::runtime_error("No suitable DPC++ devices"); - } - return gko::DpcppExecutor::create( - device_id, gko::ReferenceExecutor::create()); - }}}; - - auto exec = executor_factory_mpi.at(executor_string)(MPI_COMM_WORLD); - - // @sect3{Creating the Distributed Matrix and Vectors} - // As a first step, we create a partition of the rows. The partition - // consists of ranges of consecutive rows which are assigned a part-id. - // These part-ids will be used for the distributed data structures to - // determine which rows will be stored locally. In this example each rank - // has (nearly) the same number of rows, so we can use the following - // specialized constructor. See @ref gko::distributed::Partition for other - // modes of creating a partition. - const auto num_rows = grid_dim; - auto partition = gko::share(part_type::build_from_global_size_uniform( - exec->get_master(), comm.size(), - static_cast(num_rows))); - - // Assemble the matrix using a 3-pt stencil and fill the right-hand-side - // with a sine value. The distributed matrix supports only constructing an - // empty matrix of zero size and filling in the values with - // gko::experimental::distributed::Matrix::read_distributed. Only the data - // that belongs to the rows by this rank will be assembled. - gko::matrix_data A_data; - gko::matrix_data b_data; - gko::matrix_data x_data; - A_data.size = {num_rows, num_rows}; - b_data.size = {num_rows, 1}; - x_data.size = {num_rows, 1}; - const auto range_start = partition->get_range_bounds()[rank]; - const auto range_end = partition->get_range_bounds()[rank + 1]; - for (int i = range_start; i < range_end; i++) { - if (i > 0) { - A_data.nonzeros.emplace_back(i, i - 1, -1); - } - A_data.nonzeros.emplace_back(i, i, 2); - if (i < grid_dim - 1) { - A_data.nonzeros.emplace_back(i, i + 1, -1); - } - b_data.nonzeros.emplace_back(i, 0, std::sin(i * 0.01)); - x_data.nonzeros.emplace_back(i, 0, gko::zero()); - } - - // Take timings. - comm.synchronize(); - ValueType t_init_end = gko::experimental::mpi::get_walltime(); - - // Read the matrix data, currently this is only supported on CPU executors. - // This will also set up the communication pattern needed for the - // distributed matrix-vector multiplication. - auto A_host = gko::share(dist_mtx::create(exec->get_master(), comm)); - auto x_host = dist_vec::create(exec->get_master(), comm); - auto b_host = dist_vec::create(exec->get_master(), comm); - A_host->read_distributed(A_data, partition); - b_host->read_distributed(b_data, partition); - x_host->read_distributed(x_data, partition); - // After reading, the matrix and vector can be moved to the chosen executor, - // since the distributed matrix supports SpMV also on devices. - auto A = gko::share(dist_mtx::create(exec, comm)); - auto x = dist_vec::create(exec, comm); - auto b = dist_vec::create(exec, comm); - A->copy_from(A_host); - b->copy_from(b_host); - x->copy_from(x_host); - - // Take timings. - comm.synchronize(); - ValueType t_read_setup_end = gko::experimental::mpi::get_walltime(); - - - // @sect3{Solve the Distributed System} - // Generate the solver - - // Setup the multigrid factory with default setting - // It uses Schwarz Jacobi as smoother and GMRES as coarse solver - auto mg_factory = gko::share( - mg::build() - .with_mg_level(pgm::build().with_deterministic(true)) - .with_criteria(gko::stop::Iteration::build().with_max_iters(1u)) - .on(exec)); - - // Setup the stopping criterion and logger - const gko::remove_complex reduction_factor{1e-8}; - std::shared_ptr> logger = - gko::log::Convergence::create(); - auto Ainv = solver::build() - .with_preconditioner(mg_factory) - .with_criteria( - gko::stop::Iteration::build().with_max_iters(num_iters), - gko::stop::ResidualNorm::build() - .with_reduction_factor(reduction_factor)) - .on(exec) - ->generate(A); - // Add logger to the generated solver to log the iteration count and - // residual norm - Ainv->add_logger(logger); - - // Take timings. - comm.synchronize(); - ValueType t_solver_generate_end = gko::experimental::mpi::get_walltime(); - - // Apply the distributed solver, this is the same as in the non-distributed - // case. - Ainv->apply(b, x); - - // Take timings. - comm.synchronize(); - ValueType t_end = gko::experimental::mpi::get_walltime(); - - // Get the residual. - auto res_norm = gko::clone(exec->get_master(), - gko::as(logger->get_residual_norm())); - - // @sect3{Printing Results} - // Print the achieved residual norm and timings on rank 0. - if (comm.rank() == 0) { - // clang-format off - std::cout << "\nNum rows in matrix: " << num_rows - << "\nNum ranks: " << comm.size() - << "\nFinal Res norm: " << res_norm->at(0, 0) - << "\nIteration count: " << logger->get_num_iterations() - << "\nInit time: " << t_init_end - t_init - << "\nRead time: " << t_read_setup_end - t_init - << "\nSolver generate time: " << t_solver_generate_end - t_read_setup_end - << "\nSolver apply time: " << t_end - t_solver_generate_end - << "\nTotal time: " << t_end - t_init - << std::endl; - // clang-format on - } -} diff --git a/examples/distributed-multigrid-preconditioned-solver/doc/builds-on b/examples/distributed-multigrid-preconditioned-solver/doc/builds-on deleted file mode 100644 index f70ab1608ec..00000000000 --- a/examples/distributed-multigrid-preconditioned-solver/doc/builds-on +++ /dev/null @@ -1 +0,0 @@ -distributed-solver diff --git a/examples/distributed-multigrid-preconditioned-solver/doc/intro.dox b/examples/distributed-multigrid-preconditioned-solver/doc/intro.dox deleted file mode 100644 index 79cfef82425..00000000000 --- a/examples/distributed-multigrid-preconditioned-solver/doc/intro.dox +++ /dev/null @@ -1,9 +0,0 @@ - -

Introduction

-This distributed multigrid preconditioned solver example should help you understand using Ginkgo multigrid in a distributed setting. -The example will solve a simple 1D Laplace equation where the system can be distributed row-wise to multiple processes. -Note. Because the stencil is configured equal weighted, the coarsening method does not perform well on this kind of problem. -To run the solver with multiple processes, use `mpirun -n NUM_PROCS ./distributed-solver [executor] [num_grid_points] [num_iterations]`. - -If you are using GPU devices, please make sure that you run this example with at most as many processes as you have GPU -devices available. diff --git a/examples/distributed-multigrid-preconditioned-solver/doc/kind b/examples/distributed-multigrid-preconditioned-solver/doc/kind deleted file mode 100644 index 196aa616342..00000000000 --- a/examples/distributed-multigrid-preconditioned-solver/doc/kind +++ /dev/null @@ -1 +0,0 @@ -distributed diff --git a/examples/distributed-multigrid-preconditioned-solver/doc/results.dox b/examples/distributed-multigrid-preconditioned-solver/doc/results.dox deleted file mode 100644 index 2a263e8396b..00000000000 --- a/examples/distributed-multigrid-preconditioned-solver/doc/results.dox +++ /dev/null @@ -1,18 +0,0 @@ -

Results

-This is the expected output for `mpirun -n 4 ./distributed-multigrid-preconditioned-solver`: - -@code{.cpp} - -Num rows in matrix: 100 -Num ranks: 4 -Final Res norm: 1.87487e-08 -Iteration count: 23 -Init time: 0.000153159 -Read time: 0.000563957 -Solver generate time: 0.000484838 -Solver apply time: 0.073985 -Total time: 0.0750338 - -@endcode - -The timings may vary depending on the machine. diff --git a/examples/distributed-multigrid-preconditioned-solver/doc/short-intro b/examples/distributed-multigrid-preconditioned-solver/doc/short-intro deleted file mode 100644 index bf52c346411..00000000000 --- a/examples/distributed-multigrid-preconditioned-solver/doc/short-intro +++ /dev/null @@ -1 +0,0 @@ -The distributed multigrid preconditioned solver example. diff --git a/examples/distributed-multigrid-preconditioned-solver/doc/tooltip b/examples/distributed-multigrid-preconditioned-solver/doc/tooltip deleted file mode 100644 index 3e6cc291852..00000000000 --- a/examples/distributed-multigrid-preconditioned-solver/doc/tooltip +++ /dev/null @@ -1 +0,0 @@ -Solves a distributed linear system. From b6927dd339b33ff2b2df133c7739a123628ae5db Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Thu, 13 Feb 2025 16:22:18 +0100 Subject: [PATCH 4/5] update documentation Co-authored-by: Marcel Koch --- examples/CMakeLists.txt | 2 +- .../CMakeLists.txt | 6 +++--- .../distributed-multigrid-preconditioned-solver.cpp} | 10 +++++++++- .../doc/builds-on | 0 .../doc/intro.dox | 2 +- .../doc/kind | 0 .../doc/results.dox | 2 +- .../doc/short-intro | 0 .../doc/tooltip | 0 9 files changed, 15 insertions(+), 7 deletions(-) rename examples/{distributed-multigrid-preconditioned-solver-customized => distributed-multigrid-preconditioned-solver}/CMakeLists.txt (52%) rename examples/{distributed-multigrid-preconditioned-solver-customized/distributed-multigrid-preconditioned-solver-customized.cpp => distributed-multigrid-preconditioned-solver/distributed-multigrid-preconditioned-solver.cpp} (95%) rename examples/{distributed-multigrid-preconditioned-solver-customized => distributed-multigrid-preconditioned-solver}/doc/builds-on (100%) rename examples/{distributed-multigrid-preconditioned-solver-customized => distributed-multigrid-preconditioned-solver}/doc/intro.dox (78%) rename examples/{distributed-multigrid-preconditioned-solver-customized => distributed-multigrid-preconditioned-solver}/doc/kind (100%) rename examples/{distributed-multigrid-preconditioned-solver-customized => distributed-multigrid-preconditioned-solver}/doc/results.dox (90%) rename examples/{distributed-multigrid-preconditioned-solver-customized => distributed-multigrid-preconditioned-solver}/doc/short-intro (100%) rename examples/{distributed-multigrid-preconditioned-solver-customized => distributed-multigrid-preconditioned-solver}/doc/tooltip (100%) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index e76b1c308fb..d58b9a59356 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -60,7 +60,7 @@ if(GINKGO_HAVE_PAPI_SDE) endif() if(GINKGO_BUILD_MPI) - list(APPEND EXAMPLES_LIST distributed-solver distributed-multigrid-preconditioned-solver-customized) + list(APPEND EXAMPLES_LIST distributed-solver distributed-multigrid-preconditioned-solver) endif() find_package(Kokkos 4.1.00 QUIET) diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/CMakeLists.txt b/examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt similarity index 52% rename from examples/distributed-multigrid-preconditioned-solver-customized/CMakeLists.txt rename to examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt index b4b06f119a4..0de5e4cd44a 100644 --- a/examples/distributed-multigrid-preconditioned-solver-customized/CMakeLists.txt +++ b/examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt @@ -1,10 +1,10 @@ cmake_minimum_required(VERSION 3.16) -project(distributed-multigrid-preconditioned-solver-customized) +project(distributed-multigrid-preconditioned-solver) # We only need to find Ginkgo if we build this example stand-alone if (NOT GINKGO_BUILD_EXAMPLES) find_package(Ginkgo 1.10.0 REQUIRED) endif() -add_executable(distributed-multigrid-preconditioned-solver-customized distributed-multigrid-preconditioned-solver-customized.cpp) -target_link_libraries(distributed-multigrid-preconditioned-solver-customized Ginkgo::ginkgo) +add_executable(distributed-multigrid-preconditioned-solver distributed-multigrid-preconditioned-solver.cpp) +target_link_libraries(distributed-multigrid-preconditioned-solver Ginkgo::ginkgo) diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/distributed-multigrid-preconditioned-solver-customized.cpp b/examples/distributed-multigrid-preconditioned-solver/distributed-multigrid-preconditioned-solver.cpp similarity index 95% rename from examples/distributed-multigrid-preconditioned-solver-customized/distributed-multigrid-preconditioned-solver-customized.cpp rename to examples/distributed-multigrid-preconditioned-solver/distributed-multigrid-preconditioned-solver.cpp index 757e99adde0..658826f09f7 100644 --- a/examples/distributed-multigrid-preconditioned-solver-customized/distributed-multigrid-preconditioned-solver-customized.cpp +++ b/examples/distributed-multigrid-preconditioned-solver/distributed-multigrid-preconditioned-solver.cpp @@ -50,9 +50,16 @@ int main(int argc, char* argv[]) // non-distributed program. Please note that not all solvers support // distributed systems at the moment. using solver = gko::solver::Cg; + // We use the Schwarz preconditioner to extend non-distributed + // preconditioners, like our Jacobi, + // to the distributed case. The Schwarz preconditioner wraps another + // preconditioner, and applies it only to the local part of a distributed + // matrix. This will be used as our distributed multigrid smoother. using schwarz = gko::experimental::distributed::preconditioner::Schwarz< ValueType, LocalIndexType, GlobalIndexType>; using bj = gko::preconditioner::Jacobi; + // Multigrid and Pgm can accept the distributed matrix, so we still use the + // same type as the non-distributed case. using mg = gko::solver::Multigrid; using pgm = gko::multigrid::Pgm; @@ -200,7 +207,8 @@ int main(int argc, char* argv[]) solver::build() .with_criteria(gko::stop::Iteration::build().with_max_iters(4u)) .on(exec)); - // It uses Schwarz Jacobi as smoother and GMRES as coarse solver + // The multigrid preconditioner uses the Schwarz Jacobi as smoother and Cg + // as coarse solver auto mg_factory = gko::share( mg::build() .with_mg_level(pgm::build().with_deterministic(true)) diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/doc/builds-on b/examples/distributed-multigrid-preconditioned-solver/doc/builds-on similarity index 100% rename from examples/distributed-multigrid-preconditioned-solver-customized/doc/builds-on rename to examples/distributed-multigrid-preconditioned-solver/doc/builds-on diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/doc/intro.dox b/examples/distributed-multigrid-preconditioned-solver/doc/intro.dox similarity index 78% rename from examples/distributed-multigrid-preconditioned-solver-customized/doc/intro.dox rename to examples/distributed-multigrid-preconditioned-solver/doc/intro.dox index 17c45bc7403..99304ca4851 100644 --- a/examples/distributed-multigrid-preconditioned-solver-customized/doc/intro.dox +++ b/examples/distributed-multigrid-preconditioned-solver/doc/intro.dox @@ -2,7 +2,7 @@

Introduction

This distributed multigrid preconditioned solver example should help you understand customizing Ginkgo multigrid in a distributed setting. The example will solve a simple 1D Laplace equation where the system can be distributed row-wise to multiple processes. -Note. Because the stencil is configured equal weighted, the coarsening method does not perform well on this kind of problem. +Note. Because the stencil for the discretized Laplacian is configured with equal weight, the coarsening method does not perform well on this kind of problem. To run the solver with multiple processes, use `mpirun -n NUM_PROCS ./distributed-solver [executor] [num_grid_points] [num_iterations]`. If you are using GPU devices, please make sure that you run this example with at most as many processes as you have GPU diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/doc/kind b/examples/distributed-multigrid-preconditioned-solver/doc/kind similarity index 100% rename from examples/distributed-multigrid-preconditioned-solver-customized/doc/kind rename to examples/distributed-multigrid-preconditioned-solver/doc/kind diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/doc/results.dox b/examples/distributed-multigrid-preconditioned-solver/doc/results.dox similarity index 90% rename from examples/distributed-multigrid-preconditioned-solver-customized/doc/results.dox rename to examples/distributed-multigrid-preconditioned-solver/doc/results.dox index 1a54ada9ae1..0189c4c91e6 100644 --- a/examples/distributed-multigrid-preconditioned-solver-customized/doc/results.dox +++ b/examples/distributed-multigrid-preconditioned-solver/doc/results.dox @@ -1,5 +1,5 @@

Results

-This is the expected output for `mpirun -n 4 ./distributed-multigrid-preconditioned-solver-customized`: +This is the expected output for `mpirun -n 4 ./distributed-multigrid-preconditioned-solver`: @code{.cpp} diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/doc/short-intro b/examples/distributed-multigrid-preconditioned-solver/doc/short-intro similarity index 100% rename from examples/distributed-multigrid-preconditioned-solver-customized/doc/short-intro rename to examples/distributed-multigrid-preconditioned-solver/doc/short-intro diff --git a/examples/distributed-multigrid-preconditioned-solver-customized/doc/tooltip b/examples/distributed-multigrid-preconditioned-solver/doc/tooltip similarity index 100% rename from examples/distributed-multigrid-preconditioned-solver-customized/doc/tooltip rename to examples/distributed-multigrid-preconditioned-solver/doc/tooltip From 4ad21786861428f0a5e69a75cd8ab7adb09ac02a Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Tue, 18 Feb 2025 13:23:34 +0100 Subject: [PATCH 5/5] update format --- examples/CMakeLists.txt | 7 ++++++- .../CMakeLists.txt | 12 +++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index d58b9a59356..8f4c848005f 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -60,7 +60,12 @@ if(GINKGO_HAVE_PAPI_SDE) endif() if(GINKGO_BUILD_MPI) - list(APPEND EXAMPLES_LIST distributed-solver distributed-multigrid-preconditioned-solver) + list( + APPEND + EXAMPLES_LIST + distributed-solver + distributed-multigrid-preconditioned-solver + ) endif() find_package(Kokkos 4.1.00 QUIET) diff --git a/examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt b/examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt index 0de5e4cd44a..1c81952c0bb 100644 --- a/examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt +++ b/examples/distributed-multigrid-preconditioned-solver/CMakeLists.txt @@ -2,9 +2,15 @@ cmake_minimum_required(VERSION 3.16) project(distributed-multigrid-preconditioned-solver) # We only need to find Ginkgo if we build this example stand-alone -if (NOT GINKGO_BUILD_EXAMPLES) +if(NOT GINKGO_BUILD_EXAMPLES) find_package(Ginkgo 1.10.0 REQUIRED) endif() -add_executable(distributed-multigrid-preconditioned-solver distributed-multigrid-preconditioned-solver.cpp) -target_link_libraries(distributed-multigrid-preconditioned-solver Ginkgo::ginkgo) +add_executable( + distributed-multigrid-preconditioned-solver + distributed-multigrid-preconditioned-solver.cpp +) +target_link_libraries( + distributed-multigrid-preconditioned-solver + Ginkgo::ginkgo +)