From b43bbe6f469186e23239cf6f331187519853f75b Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Wed, 8 Jul 2020 11:16:25 +0200 Subject: [PATCH 01/16] implement cuda amgx_pgm size2 expect for generate and related test --- common/multigrid/amgx_pgm_kernels.hpp.inc | 279 ++++++++++++++++++ cuda/multigrid/amgx_pgm_kernels.cu | 83 +++++- cuda/test/CMakeLists.txt | 1 + cuda/test/multigrid/CMakeLists.txt | 1 + cuda/test/multigrid/amgx_pgm_kernels.cpp | 341 ++++++++++++++++++++++ 5 files changed, 693 insertions(+), 12 deletions(-) create mode 100644 common/multigrid/amgx_pgm_kernels.hpp.inc create mode 100644 cuda/test/multigrid/CMakeLists.txt create mode 100644 cuda/test/multigrid/amgx_pgm_kernels.cpp diff --git a/common/multigrid/amgx_pgm_kernels.hpp.inc b/common/multigrid/amgx_pgm_kernels.hpp.inc new file mode 100644 index 00000000000..6f460ad98c2 --- /dev/null +++ b/common/multigrid/amgx_pgm_kernels.hpp.inc @@ -0,0 +1,279 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +namespace kernel { + + +template +__global__ __launch_bounds__(default_block_size) void restrict_apply_kernel( + const IndexType *__restrict__ agg, const size_type num_rows, + const size_type num_rhs, const ValueType *__restrict__ b, + const size_type b_stride, ValueType *__restrict__ x, + const size_type x_stride) +{ + auto tidx = thread::get_thread_id_flat(); + auto row = tidx / num_rhs; + if (row >= num_rows) { + return; + } + auto col = tidx % num_rhs; + auto ind = agg[row]; + atomic_add(x + ind * x_stride + col, b[row * b_stride + col]); +} + + +template +__global__ __launch_bounds__(default_block_size) void prolong_applyadd_kernel( + const IndexType *__restrict__ agg, const size_type num_rows, + const size_type num_rhs, const ValueType *__restrict__ b, + const size_type b_stride, ValueType *__restrict__ x, + const size_type x_stride) +{ + auto tidx = thread::get_thread_id_flat(); + auto row = tidx / num_rhs; + if (row >= num_rows) { + return; + } + auto col = tidx % num_rhs; + auto ind = agg[row]; + x[row * x_stride + col] += b[ind * b_stride + col]; +} + + +template +__global__ __launch_bounds__(default_block_size) void replace_kernel( + size_type size, const IndexType *__restrict__ source, + IndexType *__restrict__ result) +{ + auto tidx = thread::get_thread_id_flat(); + if (tidx >= size) { + return; + } + + result[tidx] = source[tidx] == -1; +} + + +template +__global__ __launch_bounds__(default_block_size) void match_edge_kernel( + size_type num, const IndexType *__restrict__ strongest_neighbor_vals, + IndexType *__restrict__ agg_vals) +{ + auto tidx = thread::get_thread_id_flat(); + if (tidx >= num) { + return; + } + if (agg_vals[tidx] != -1) { + return; + } + auto neighbor = strongest_neighbor_vals[tidx]; + if (neighbor != -1 && strongest_neighbor_vals[neighbor] == tidx) { + agg_vals[tidx] = tidx; + agg_vals[neighbor] = tidx; + // Use the smaller index as agg point + } +} + + +template +__global__ __launch_bounds__(default_block_size) void activate_kernel( + size_type num, const IndexType *__restrict__ agg, + IndexType *__restrict__ active_agg) +{ + auto tidx = thread::get_thread_id_flat(); + if (tidx >= num) { + return; + } + active_agg[tidx] = agg[tidx] == -1; +} + + +template +__global__ __launch_bounds__(default_block_size) void fill_agg_kernel( + size_type num, const IndexType *__restrict__ index, + IndexType *__restrict__ result) +{ + auto tidx = thread::get_thread_id_flat(); + if (tidx >= num) { + return; + } + result[index[tidx]] = 1; +} + + +template +__global__ __launch_bounds__(default_block_size) void renumber_kernel( + size_type num, const IndexType *__restrict__ map, + IndexType *__restrict__ result) +{ + auto tidx = thread::get_thread_id_flat(); + if (tidx >= num) { + return; + } + result[tidx] = map[result[tidx]]; +} + + +template +__global__ + __launch_bounds__(default_block_size) void find_strongest_neighbor_kernel( + const size_type num, const IndexType *__restrict__ row_ptrs, + const IndexType *__restrict__ col_idxs, + const ValueType *__restrict__ weight_vals, + const ValueType *__restrict__ diag, const size_type diag_stride, + IndexType *__restrict__ agg, IndexType *__restrict__ strongest_neighbor) +{ + auto row = thread::get_thread_id_flat(); + if (row >= num) { + return; + } + + auto max_weight_unagg = zero(); + auto max_weight_agg = zero(); + IndexType strongest_unagg = -1; + IndexType strongest_agg = -1; + if (agg[row] != -1) { + return; + } + for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; idx++) { + auto col = col_idxs[idx]; + if (col == row) { + continue; + } + auto weight = weight_vals[idx] / max(abs(diag[row * diag_stride]), + abs(diag[col * diag_stride])); + if (agg[col] == -1 && + (weight > max_weight_unagg || + (weight == max_weight_unagg && col > strongest_unagg))) { + max_weight_unagg = weight; + strongest_unagg = col; + } else if (agg[col] != -1 && + (weight > max_weight_agg || + (weight == max_weight_agg && col > strongest_agg))) { + max_weight_agg = weight; + strongest_agg = col; + } + } + + if (strongest_unagg == -1 && strongest_agg != -1) { + // all neighbor is agg, connect to the strongest agg + // Also, no others will use this item as their strongest_neighbor + // because they are already aggregated. Thus, it is determinstic + // behavior + agg[row] = agg[strongest_agg]; + } else if (strongest_unagg != -1) { + // set the strongest neighbor in the unagg group + strongest_neighbor[row] = strongest_unagg; + } else { + // no neighbor + strongest_neighbor[row] = row; + } +} + + +template +__global__ + __launch_bounds__(default_block_size) void assign_to_exist_agg_kernel( + const size_type num, const IndexType *__restrict__ row_ptrs, + const IndexType *__restrict__ col_idxs, + const ValueType *__restrict__ weight_vals, + const ValueType *__restrict__ diag, const size_type diag_stride, + const IndexType *__restrict__ agg_const_val, + IndexType *__restrict__ agg_val) +{ + auto row = thread::get_thread_id_flat(); + if (row >= num) { + return; + } + if (agg_val[row] != -1) { + return; + } + ValueType max_weight_agg = zero(); + IndexType strongest_agg = -1; + for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; idx++) { + auto col = col_idxs[idx]; + if (col == row) { + continue; + } + auto weight = weight_vals[idx] / max(abs(diag[row * diag_stride]), + abs(diag[col * diag_stride])); + if (agg_const_val[col] != -1 && + (weight > max_weight_agg || + (weight == max_weight_agg && col > strongest_agg))) { + max_weight_agg = weight; + strongest_agg = col; + } + } + if (strongest_agg != -1) { + agg_val[row] = agg_const_val[strongest_agg]; + } else { + agg_val[row] = row; + } +} + + +template +__global__ __launch_bounds__(default_block_size) void merge_col_kernel( + const size_type num, const IndexType *__restrict__ row_ptrs, + const IndexType *__restrict__ source_col, + const ValueType *__restrict__ source_val, + IndexType *__restrict__ result_col, ValueType *__restrict__ result_val) +{ + auto row = thread::get_thread_id_flat(); + if (row >= num) { + return; + } + const auto start = row_ptrs[row]; + const auto end = row_ptrs[row + 1]; + auto result_ind = start; + auto col = source_col[result_ind]; + auto temp = source_val[result_ind]; + for (size_type idx = start + 1; idx < end; idx++) { + auto temp_col = source_col[idx]; + if (temp_col != col) { + result_col[result_ind] = col; + result_val[result_ind] = temp; + result_ind++; + col = temp_col; + temp = zero(); + } + temp += source_val[idx]; + } + // If start != end, need to process the final column + if (start != end) { + result_col[result_ind] = col; + result_val[result_ind] = temp; + } +} + + +} // namespace kernel \ No newline at end of file diff --git a/cuda/multigrid/amgx_pgm_kernels.cu b/cuda/multigrid/amgx_pgm_kernels.cu index 9153f120d93..abe32153f4e 100644 --- a/cuda/multigrid/amgx_pgm_kernels.cu +++ b/cuda/multigrid/amgx_pgm_kernels.cu @@ -45,10 +45,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" #include "cuda/base/cusparse_bindings.hpp" #include "cuda/base/math.hpp" #include "cuda/base/types.hpp" -#include "cuda/solver/common_trs_kernels.cuh" +#include "cuda/components/atomic.cuh" +#include "cuda/components/reduction.cuh" +#include "cuda/components/thread_ids.cuh" namespace gko { @@ -62,25 +66,58 @@ namespace cuda { namespace amgx_pgm { +constexpr int default_block_size = 512; + + +#include "common/multigrid/amgx_pgm_kernels.hpp.inc" + + template void match_edge(std::shared_ptr exec, const Array &strongest_neighbor, - Array &agg) GKO_NOT_IMPLEMENTED; + Array &agg) +{ + const auto num = agg.get_num_elems(); + const dim3 grid(ceildiv(num, default_block_size)); + kernel::match_edge_kernel<<>>( + num, strongest_neighbor.get_const_data(), agg.get_data()); +} GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL); template void count_unagg(std::shared_ptr exec, - const Array &agg, - IndexType *num_unagg) GKO_NOT_IMPLEMENTED; + const Array &agg, IndexType *num_unagg) +{ + Array active_agg(exec, agg.get_num_elems()); + const dim3 grid(ceildiv(active_agg.get_num_elems(), default_block_size)); + kernel::activate_kernel<<>>( + active_agg.get_num_elems(), agg.get_const_data(), + active_agg.get_data()); + *num_unagg = reduce_add_array(exec, active_agg.get_num_elems(), + active_agg.get_const_data()); +} GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL); template void renumber(std::shared_ptr exec, Array &agg, - IndexType *num_agg) GKO_NOT_IMPLEMENTED; + IndexType *num_agg) +{ + const auto num = agg.get_num_elems(); + Array agg_map(exec, num + 1); + components::fill_array(exec, agg_map.get_data(), agg_map.get_num_elems(), + zero()); + const dim3 grid(ceildiv(num, default_block_size)); + kernel::fill_agg_kernel<<>>( + num, agg.get_const_data(), agg_map.get_data()); + components::prefix_sum(exec, agg_map.get_data(), agg_map.get_num_elems()); + kernel::renumber_kernel<<>>( + num, agg_map.get_const_data(), agg.get_data()); + *num_agg = exec->copy_val_to_host(agg_map.get_const_data() + num); +} GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL); @@ -90,18 +127,40 @@ void find_strongest_neighbor( std::shared_ptr exec, const matrix::Csr *weight_mtx, const matrix::Diagonal *diag, Array &agg, - Array &strongest_neighbor) GKO_NOT_IMPLEMENTED; + Array &strongest_neighbor) +{ + const auto num = agg.get_num_elems(); + const dim3 grid(ceildiv(num, default_block_size)); + kernel::find_strongest_neighbor_kernel<<>>( + num, weight_mtx->get_const_row_ptrs(), weight_mtx->get_const_col_idxs(), + weight_mtx->get_const_values(), diag->get_const_values(), + diag->get_stride(), agg.get_data(), strongest_neighbor.get_data()); +} GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_FIND_STRONGEST_NEIGHBOR); - template -void assign_to_exist_agg( - std::shared_ptr exec, - const matrix::Csr *weight_mtx, - const matrix::Diagonal *diag, Array &agg, - Array &intermediate_agg) GKO_NOT_IMPLEMENTED; +void assign_to_exist_agg(std::shared_ptr exec, + const matrix::Csr *weight_mtx, + const matrix::Diagonal *diag, + Array &agg, + Array &intermediate_agg) +{ + auto agg_val = (intermediate_agg.get_num_elems() > 0) + ? intermediate_agg.get_data() + : agg.get_data(); + const auto num = agg.get_num_elems(); + const dim3 grid(ceildiv(num, default_block_size)); + kernel::assign_to_exist_agg_kernel<<>>( + num, weight_mtx->get_const_row_ptrs(), weight_mtx->get_const_col_idxs(), + weight_mtx->get_const_values(), diag->get_const_values(), + diag->get_stride(), agg.get_const_data(), agg_val); + if (intermediate_agg.get_num_elems() > 0) { + // Copy the intermediate_agg to agg + agg = intermediate_agg; + } +} GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); diff --git a/cuda/test/CMakeLists.txt b/cuda/test/CMakeLists.txt index 83d7b39f35e..fb66aaf270f 100644 --- a/cuda/test/CMakeLists.txt +++ b/cuda/test/CMakeLists.txt @@ -4,6 +4,7 @@ add_subdirectory(base) add_subdirectory(components) add_subdirectory(factorization) add_subdirectory(matrix) +add_subdirectory(multigrid) add_subdirectory(preconditioner) add_subdirectory(reorder) add_subdirectory(solver) diff --git a/cuda/test/multigrid/CMakeLists.txt b/cuda/test/multigrid/CMakeLists.txt new file mode 100644 index 00000000000..8fe8bbeba48 --- /dev/null +++ b/cuda/test/multigrid/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(amgx_pgm_kernels) diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp new file mode 100644 index 00000000000..d1c899d0f54 --- /dev/null +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -0,0 +1,341 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include +#include + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/multigrid/amgx_pgm_kernels.hpp" +#include "core/test/utils/matrix_generator.hpp" +#include "cuda/test/utils.hpp" + + +namespace { + + +template +Array generate_random_array(gko::size_type num, ValueDistribution &&value_dist, + Engine &&engine, + std::shared_ptr exec) +{ + using value_type = typename Array::value_type; + Array array_host(exec->get_master(), num); + auto val = array_host.get_data(); + for (int i = 0; i < num; i++) { + val[i] = + gko::test::detail::get_rand_value(value_dist, engine); + } + Array array(exec); + array = array_host; + return array; +} + + +class AmgxPgm : public ::testing::Test { +protected: + using value_type = gko::default_precision; + using index_type = gko::int32; + using Mtx = gko::matrix::Dense<>; + using Csr = gko::matrix::Csr; + AmgxPgm() : rand_engine(30) {} + + void SetUp() + { + ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); + ref = gko::ReferenceExecutor::create(); + cuda = gko::CudaExecutor::create(0, ref); + } + + void TearDown() + { + if (cuda != nullptr) { + ASSERT_NO_THROW(cuda->synchronize()); + } + } + + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(num_cols, num_cols), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + gko::Array gen_array(gko::size_type num, index_type min_val, + index_type max_val) + { + return generate_random_array>( + num, std::uniform_int_distribution<>(min_val, max_val), rand_engine, + ref); + } + + void initialize_data() + { + int m = 597; + int n = 300; + int nrhs = 3; + + agg = gen_array(m, 0, n - 1); + unfinished_agg = gen_array(m, -1, n - 1); + strongest_neighbor = gen_array(m, 0, n - 1); + coarse_vector = gen_mtx(n, nrhs); + fine_vector = gen_mtx(m, nrhs); + auto weight = gen_mtx(m, m); + make_weight(weight.get()); + weight_csr = Csr::create(ref); + weight->convert_to(weight_csr.get()); + weight_diag = weight_csr->extract_diagonal(); + + d_agg.set_executor(cuda); + d_unfinished_agg.set_executor(cuda); + d_strongest_neighbor.set_executor(cuda); + d_coarse_vector = Mtx::create(cuda); + d_fine_vector = Mtx::create(cuda); + d_weight_csr = Csr::create(cuda); + d_weight_diag = Mtx::create(cuda); + d_agg = agg; + d_unfinished_agg = unfinished_agg; + d_strongest_neighbor = strongest_neighbor; + d_coarse_vector->copy_from(coarse_vector.get()); + d_fine_vector->copy_from(fine_vector.get()); + d_weight_csr->copy_from(weight_csr.get()); + d_weight_diag->copy_from(weight_diag.get()); + } + + void make_symetric(Mtx *mtx) + { + for (int i = 0; i < mtx->get_size()[0]; ++i) { + for (int j = i + 1; j < mtx->get_size()[1]; ++j) { + mtx->at(i, j) = mtx->at(j, i); + } + } + } + + // only for real value + void make_absoulte(Mtx *mtx) + { + for (int i = 0; i < mtx->get_size()[0]; ++i) { + for (int j = 0; j < mtx->get_size()[1]; ++j) { + mtx->at(i, j) = abs(mtx->at(i, j)); + } + } + } + + void make_diag_dominant(Mtx *mtx) + { + using std::abs; + for (int i = 0; i < mtx->get_size()[0]; ++i) { + auto sum = gko::zero(); + for (int j = 0; j < mtx->get_size()[1]; ++j) { + sum += abs(mtx->at(i, j)); + } + mtx->at(i, i) = sum; + } + } + + void make_spd(Mtx *mtx) + { + make_symetric(mtx); + make_diag_dominant(mtx); + } + + void make_weight(Mtx *mtx) + { + make_symetric(mtx); + make_absoulte(mtx); + make_diag_dominant(mtx); + } + + std::shared_ptr ref; + std::shared_ptr cuda; + + std::ranlux48 rand_engine; + + gko::Array agg; + gko::Array unfinished_agg; + gko::Array strongest_neighbor; + + gko::Array d_agg; + gko::Array d_unfinished_agg; + gko::Array d_strongest_neighbor; + + std::unique_ptr coarse_vector; + std::unique_ptr fine_vector; + std::unique_ptr weight_diag; + std::unique_ptr weight_csr; + + std::unique_ptr d_coarse_vector; + std::unique_ptr d_fine_vector; + std::unique_ptr d_weight_diag; + std::unique_ptr d_weight_csr; +}; + + +TEST_F(AmgxPgm, RestrictApplyIsEquivalentToRef) +{ + initialize_data(); + // fine->coarse + auto x = Mtx::create_with_config_of(gko::lend(coarse_vector)); + auto d_x = Mtx::create_with_config_of(gko::lend(d_coarse_vector)); + + gko::kernels::reference::amgx_pgm::restrict_apply( + ref, agg, fine_vector.get(), x.get()); + gko::kernels::cuda::amgx_pgm::restrict_apply( + cuda, d_agg, d_fine_vector.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); +} + + +TEST_F(AmgxPgm, ProlongApplyaddIsEquivalentToRef) +{ + initialize_data(); + // coarse->fine + auto x = fine_vector->clone(); + auto d_x = d_fine_vector->clone(); + + gko::kernels::reference::amgx_pgm::prolong_applyadd( + ref, agg, coarse_vector.get(), x.get()); + gko::kernels::cuda::amgx_pgm::prolong_applyadd( + cuda, d_agg, d_coarse_vector.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); +} + + +TEST_F(AmgxPgm, MatchEdgeIsEquivalentToRef) +{ + initialize_data(); + auto x = unfinished_agg; + auto d_x = d_unfinished_agg; + + gko::kernels::reference::amgx_pgm::match_edge(ref, strongest_neighbor, x); + gko::kernels::cuda::amgx_pgm::match_edge(cuda, d_strongest_neighbor, d_x); + + GKO_ASSERT_ARRAY_EQ(d_x, x); +} + + +TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) +{ + initialize_data(); + gko::size_type num_unagg; + gko::size_type d_num_unagg; + + gko::kernels::reference::amgx_pgm::count_unagg(ref, agg, &num_unagg); + gko::kernels::cuda::amgx_pgm::count_unagg(cuda, d_agg, &d_num_unagg); + + ASSERT_EQ(d_num_unagg, num_unagg); +} + + +TEST_F(AmgxPgm, RenumberIsEquivalentToRef) +{ + initialize_data(); + auto x = unfinished_agg; + auto d_x = d_unfinished_agg; + gko::size_type num_agg; + gko::size_type d_num_agg; + + gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); + gko::kernels::cuda::amgx_pgm::renumber(cuda, d_agg, &d_num_agg); + + ASSERT_EQ(d_num_agg, num_agg); + GKO_ASSERT_ARRAY_EQ(d_agg, agg); + ASSERT_LE(num_agg, 300); +} + + +TEST_F(AmgxPgm, FindStrongestNeighborIsEquivalentToRef) +{ + initialize_data(); + auto snb = strongest_neighbor; + auto d_snb = d_strongest_neighbor; + + gko::kernels::reference::amgx_pgm::find_strongest_neighbor( + ref, weight_csr.get(), weight_diag.get(), agg, snb); + gko::kernels::cuda::amgx_pgm::find_strongest_neighbor( + cuda, d_weight_csr.get(), d_weight_diag.get(), d_agg, d_snb); + + GKO_ASSERT_ARRAY_EQ(d_snb, snb); +} + + +TEST_F(AmgxPgm, AssignToExistAggIsEquivalentToRef) +{ + initialize_data(); + auto x = unfinished_agg; + auto d_x = d_unfinished_agg; + auto intermediate_agg = x; + auto d_intermediate_agg = d_x; + + gko::kernels::reference::amgx_pgm::assign_to_exist_agg( + ref, weight_csr.get(), weight_diag.get(), x, intermediate_agg); + gko::kernels::cuda::amgx_pgm::assign_to_exist_agg( + cuda, d_weight_csr.get(), d_weight_diag.get(), d_x, d_intermediate_agg); + + GKO_ASSERT_ARRAY_EQ(d_x, x); +} + + +TEST_F(AmgxPgm, AssignToExistAggUnderteminsticIsEquivalentToRef) +{ + initialize_data(); + auto d_x = d_unfinished_agg; + auto d_intermediate_agg = gko::Array(cuda, 0); + gko::size_type d_num_unagg; + + gko::kernels::cuda::amgx_pgm::assign_to_exist_agg( + cuda, d_weight_csr.get(), d_weight_diag.get(), d_x, d_intermediate_agg); + gko::kernels::cuda::amgx_pgm::count_unagg(cuda, d_agg, &d_num_unagg); + + // only test whether all elements are aggregated. + GKO_ASSERT_EQ(d_num_unagg, 0); +} + + +} // namespace From 6bd9ce89ceafd27c3527829286c6975549319bcd Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Thu, 16 Jul 2020 15:10:16 +0200 Subject: [PATCH 02/16] cuda amgx_pgm size2 generation and related test --- common/multigrid/amgx_pgm_kernels.hpp.inc | 108 ++++++++++++++---- core/multigrid/amgx_pgm.cpp | 5 +- core/multigrid/amgx_pgm_kernels.hpp | 3 +- cuda/multigrid/amgx_pgm_kernels.cu | 53 ++++++++- cuda/test/multigrid/amgx_pgm_kernels.cpp | 25 +++- hip/multigrid/amgx_pgm_kernels.hip.cpp | 3 +- omp/multigrid/amgx_pgm_kernels.cpp | 3 +- reference/multigrid/amgx_pgm_kernels.cpp | 3 +- reference/test/multigrid/amgx_pgm_kernels.cpp | 3 +- 9 files changed, 175 insertions(+), 31 deletions(-) diff --git a/common/multigrid/amgx_pgm_kernels.hpp.inc b/common/multigrid/amgx_pgm_kernels.hpp.inc index 6f460ad98c2..5bed8696b10 100644 --- a/common/multigrid/amgx_pgm_kernels.hpp.inc +++ b/common/multigrid/amgx_pgm_kernels.hpp.inc @@ -241,37 +241,103 @@ __global__ } +template +__global__ __launch_bounds__(default_block_size) void get_source_row_map_kernel( + const size_type source_nrows, const IndexType *__restrict__ agg_val, + const IndexType *__restrict__ source_row_ptrs, + IndexType *__restrict__ result_row_ptrs, IndexType *__restrict__ row_map) +{ + auto row = thread::get_thread_id_flat(); + if (row >= source_nrows) { + return; + } + const auto num_elems = source_row_ptrs[row + 1] - source_row_ptrs[row]; + const auto result_idx = agg_val[row]; + // atomic_add returns the old value, so it can be the starting point. + row_map[row] = atomic_add(result_row_ptrs + result_idx, num_elems); +} + +template +__global__ __launch_bounds__(default_block_size) void move_row_kernel( + const size_type source_nrows, const IndexType *__restrict__ agg_val, + const IndexType *__restrict__ row_map, + const IndexType *__restrict__ source_row_ptrs, + const IndexType *__restrict__ source_col_idxs, + const ValueType *__restrict__ source_values, + const IndexType *__restrict__ result_row_ptrs, + IndexType *__restrict__ result_col_idxs, + ValueType *__restrict__ result_values) +{ + auto row = thread::get_thread_id_flat(); + if (row >= source_nrows) { + return; + } + auto result_i = result_row_ptrs[agg_val[row]] + row_map[row]; + for (auto i = source_row_ptrs[row]; i < source_row_ptrs[row + 1]; + i++, result_i++) { + result_col_idxs[result_i] = agg_val[source_col_idxs[i]]; + result_values[result_i] = source_values[i]; + } +} + + template __global__ __launch_bounds__(default_block_size) void merge_col_kernel( - const size_type num, const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ source_col, - const ValueType *__restrict__ source_val, - IndexType *__restrict__ result_col, ValueType *__restrict__ result_val) + const size_type nrows, const IndexType *__restrict__ temp_row_ptrs, + IndexType *__restrict__ temp_col_idxs, ValueType *__restrict__ temp_values, + IndexType *__restrict__ coarse_row_ptrs) { auto row = thread::get_thread_id_flat(); - if (row >= num) { + if (row >= nrows) { return; } - const auto start = row_ptrs[row]; - const auto end = row_ptrs[row + 1]; - auto result_ind = start; - auto col = source_col[result_ind]; - auto temp = source_val[result_ind]; - for (size_type idx = start + 1; idx < end; idx++) { - auto temp_col = source_col[idx]; - if (temp_col != col) { - result_col[result_ind] = col; - result_val[result_ind] = temp; - result_ind++; - col = temp_col; - temp = zero(); + + IndexType num_elems = zero(); + const auto start = temp_row_ptrs[row]; + const auto end = temp_row_ptrs[row + 1]; + IndexType col = temp_col_idxs[start]; + ValueType value = temp_values[start]; + for (auto i = start + 1; i < end; i++) { + const auto current_col = temp_col_idxs[i]; + if (current_col != col) { + // apply to the original data. It is sorted, so the writing position + // is before read position + temp_col_idxs[start + num_elems] = col; + temp_values[start + num_elems] = value; + value = zero(); + col = current_col; + num_elems++; } - temp += source_val[idx]; + value += temp_values[i]; } // If start != end, need to process the final column if (start != end) { - result_col[result_ind] = col; - result_val[result_ind] = temp; + temp_col_idxs[start + num_elems] = col; + temp_values[start + num_elems] = value; + num_elems++; + } + coarse_row_ptrs[row] = num_elems; +} + + +template +__global__ __launch_bounds__(default_block_size) void copy_to_coarse_kernel( + const size_type nrows, const IndexType *__restrict__ temp_row_ptrs, + const IndexType *__restrict__ temp_col_idxs, + const ValueType *__restrict__ temp_values, + const IndexType *__restrict__ coarse_row_ptrs, + IndexType *__restrict__ coarse_col_idxs, + ValueType *__restrict__ coarse_values) +{ + auto row = thread::get_thread_id_flat(); + if (row >= nrows) { + return; + } + auto temp_i = temp_row_ptrs[row]; + for (auto i = coarse_row_ptrs[row]; i < coarse_row_ptrs[row + 1]; + i++, temp_i++) { + coarse_col_idxs[i] = temp_col_idxs[temp_i]; + coarse_values[i] = temp_values[temp_i]; } } diff --git a/core/multigrid/amgx_pgm.cpp b/core/multigrid/amgx_pgm.cpp index 09edd76d101..1803c4252d2 100644 --- a/core/multigrid/amgx_pgm.cpp +++ b/core/multigrid/amgx_pgm.cpp @@ -79,7 +79,10 @@ std::unique_ptr amgx_pgm_generate( { auto coarse = matrix::Csr::create( exec, dim<2>{num_agg, num_agg}, 0, source->get_strategy()); - exec->run(amgx_pgm::make_amgx_pgm_generate(source, agg, coarse.get())); + auto temp = matrix::Csr::create( + exec, dim<2>{num_agg, num_agg}, source->get_num_stored_elements()); + exec->run(amgx_pgm::make_amgx_pgm_generate(source, agg, coarse.get(), + temp.get())); return std::move(coarse); } diff --git a/core/multigrid/amgx_pgm_kernels.hpp b/core/multigrid/amgx_pgm_kernels.hpp index 9109c59e610..e0006be63a7 100644 --- a/core/multigrid/amgx_pgm_kernels.hpp +++ b/core/multigrid/amgx_pgm_kernels.hpp @@ -82,7 +82,8 @@ namespace amgx_pgm { void amgx_pgm_generate(std::shared_ptr exec, \ const matrix::Csr *source, \ const Array &agg, \ - matrix::Csr *coarse) + matrix::Csr *coarse, \ + matrix::Csr *temp) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/cuda/multigrid/amgx_pgm_kernels.cu b/cuda/multigrid/amgx_pgm_kernels.cu index abe32153f4e..d84766557cb 100644 --- a/cuda/multigrid/amgx_pgm_kernels.cu +++ b/cuda/multigrid/amgx_pgm_kernels.cu @@ -47,6 +47,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/fill_array.hpp" #include "core/components/prefix_sum.hpp" +#include "core/matrix/csr_builder.hpp" +#include "core/matrix/csr_kernels.hpp" #include "cuda/base/cusparse_bindings.hpp" #include "cuda/base/math.hpp" #include "cuda/base/types.hpp" @@ -170,8 +172,55 @@ template void amgx_pgm_generate(std::shared_ptr exec, const matrix::Csr *source, const Array &agg, - matrix::Csr *coarse) - GKO_NOT_IMPLEMENTED; + matrix::Csr *coarse, + matrix::Csr *temp) +{ + const auto source_nrows = source->get_size()[0]; + const auto source_nnz = source->get_num_stored_elements(); + const auto coarse_nrows = coarse->get_size()[0]; + Array row_map(exec, source_nrows); + // fill coarse row pointer as zero + components::fill_array(exec, temp->get_row_ptrs(), coarse_nrows + 1, + zero()); + // compute each source row should be moved and also change column index + dim3 grid(ceildiv(source_nrows, default_block_size)); + // agg source_row (for row size) coarse row source map + kernel::get_source_row_map_kernel<<>>( + source_nrows, agg.get_const_data(), source->get_const_row_ptrs(), + temp->get_row_ptrs(), row_map.get_data()); + // prefix sum of temp_row_ptrs + components::prefix_sum(exec, temp->get_row_ptrs(), coarse_nrows + 1); + // copy source -> to coarse and change column index + kernel::move_row_kernel<<>>( + source_nrows, agg.get_const_data(), row_map.get_const_data(), + source->get_const_row_ptrs(), source->get_const_col_idxs(), + as_cuda_type(source->get_const_values()), temp->get_const_row_ptrs(), + temp->get_col_idxs(), as_cuda_type(temp->get_values())); + // sort csr + csr::sort_by_column_index(exec, temp); + // summation of the elements with same position + grid = ceildiv(coarse_nrows, default_block_size); + kernel::merge_col_kernel<<>>( + coarse_nrows, temp->get_const_row_ptrs(), temp->get_col_idxs(), + as_cuda_type(temp->get_values()), coarse->get_row_ptrs()); + // build the coarse matrix + components::prefix_sum(exec, coarse->get_row_ptrs(), coarse_nrows + 1); + // prefix sum of coarse->get_row_ptrs + const auto coarse_nnz = + exec->copy_val_to_host(coarse->get_row_ptrs() + coarse_nrows); + // reallocate size of column and values + matrix::CsrBuilder coarse_builder{coarse}; + auto &coarse_col_idxs_array = coarse_builder.get_col_idx_array(); + auto &coarse_vals_array = coarse_builder.get_value_array(); + coarse_col_idxs_array.resize_and_reset(coarse_nnz); + coarse_vals_array.resize_and_reset(coarse_nnz); + // copy the result + kernel::copy_to_coarse_kernel<<>>( + coarse_nrows, temp->get_const_row_ptrs(), temp->get_const_col_idxs(), + as_cuda_type(temp->get_const_values()), coarse->get_const_row_ptrs(), + coarse_col_idxs_array.get_data(), + as_cuda_type(coarse_vals_array.get_data())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_GENERATE); diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index d1c899d0f54..746d9cf7c40 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -34,10 +34,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #include #include + #include @@ -116,7 +116,7 @@ class AmgxPgm : public ::testing::Test { void initialize_data() { int m = 597; - int n = 300; + n = 300; int nrhs = 3; agg = gen_array(m, 0, n - 1); @@ -212,6 +212,8 @@ class AmgxPgm : public ::testing::Test { std::unique_ptr d_fine_vector; std::unique_ptr d_weight_diag; std::unique_ptr d_weight_csr; + + int n; }; @@ -338,4 +340,23 @@ TEST_F(AmgxPgm, AssignToExistAggUnderteminsticIsEquivalentToRef) } +TEST_F(AmgxPgm, GenerateMtxIsEquivalentToRef) +{ + initialize_data(); + auto csr_coarse = Csr::create(ref, gko::dim<2>{n, n}, 0); + auto d_csr_coarse = Csr::create(cuda, gko::dim<2>{n, n}, 0); + auto csr_temp = Csr::create(ref, gko::dim<2>{n, n}, + weight_csr->get_num_stored_elements()); + auto d_csr_temp = Csr::create(cuda, gko::dim<2>{n, n}, + d_weight_csr->get_num_stored_elements()); + + gko::kernels::cuda::amgx_pgm::amgx_pgm_generate( + cuda, d_weight_csr.get(), d_agg, d_csr_coarse.get(), d_csr_temp.get()); + gko::kernels::reference::amgx_pgm::amgx_pgm_generate( + ref, weight_csr.get(), agg, csr_coarse.get(), csr_temp.get()); + + GKO_ASSERT_MTX_NEAR(d_csr_coarse, csr_coarse, 1e-14); +} + + } // namespace diff --git a/hip/multigrid/amgx_pgm_kernels.hip.cpp b/hip/multigrid/amgx_pgm_kernels.hip.cpp index d401bb72b72..88c2db995f1 100644 --- a/hip/multigrid/amgx_pgm_kernels.hip.cpp +++ b/hip/multigrid/amgx_pgm_kernels.hip.cpp @@ -111,7 +111,8 @@ template void amgx_pgm_generate(std::shared_ptr exec, const matrix::Csr *source, const Array &agg, - matrix::Csr *coarse) + matrix::Csr *coarse, + matrix::Csr *temp) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_GENERATE); diff --git a/omp/multigrid/amgx_pgm_kernels.cpp b/omp/multigrid/amgx_pgm_kernels.cpp index d56b3ff9dab..a0db864a250 100644 --- a/omp/multigrid/amgx_pgm_kernels.cpp +++ b/omp/multigrid/amgx_pgm_kernels.cpp @@ -108,7 +108,8 @@ template void amgx_pgm_generate(std::shared_ptr exec, const matrix::Csr *source, const Array &agg, - matrix::Csr *coarse) + matrix::Csr *coarse, + matrix::Csr *temp) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_GENERATE); diff --git a/reference/multigrid/amgx_pgm_kernels.cpp b/reference/multigrid/amgx_pgm_kernels.cpp index 1ae7ee860b0..64788db9805 100644 --- a/reference/multigrid/amgx_pgm_kernels.cpp +++ b/reference/multigrid/amgx_pgm_kernels.cpp @@ -233,7 +233,8 @@ template void amgx_pgm_generate(std::shared_ptr exec, const matrix::Csr *source, const Array &agg, - matrix::Csr *coarse) + matrix::Csr *coarse, + matrix::Csr *temp) { // agg[i] -> I, agg[j] -> J const auto coarse_nrows = coarse->get_size()[0]; diff --git a/reference/test/multigrid/amgx_pgm_kernels.cpp b/reference/test/multigrid/amgx_pgm_kernels.cpp index 63f8baf2e62..847e859529e 100644 --- a/reference/test/multigrid/amgx_pgm_kernels.cpp +++ b/reference/test/multigrid/amgx_pgm_kernels.cpp @@ -525,9 +525,10 @@ TYPED_TEST(AmgxPgm, GenerateMtx) {2, 1, -2}, {2, 2, 5}}}); auto csr_coarse = mtx_type::create(this->exec, gko::dim<2>{3, 3}, 0); + auto empty = gko::matrix::Csr::create(this->exec); gko::kernels::reference::amgx_pgm::amgx_pgm_generate( - this->exec, this->mtx.get(), agg, csr_coarse.get()); + this->exec, this->mtx.get(), agg, csr_coarse.get(), empty.get()); GKO_ASSERT_MTX_NEAR(csr_coarse, coarse_ans, r::value); } From d50aae8032526e2178054a8e8afb4613b1ca1428 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Fri, 17 Jul 2020 12:22:26 +0200 Subject: [PATCH 03/16] fix windows issue --- cuda/test/multigrid/amgx_pgm_kernels.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index 746d9cf7c40..6c2c34885a1 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -213,7 +213,7 @@ class AmgxPgm : public ::testing::Test { std::unique_ptr d_weight_diag; std::unique_ptr d_weight_csr; - int n; + gko::size_type n; }; From 2915a3677caa8c607f153ea080f935094fd2b770 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Tue, 21 Jul 2020 17:41:34 +0200 Subject: [PATCH 04/16] omp implementation --- omp/multigrid/amgx_pgm_kernels.cpp | 194 ++++++++++++- omp/test/CMakeLists.txt | 1 + omp/test/multigrid/CMakeLists.txt | 1 + omp/test/multigrid/amgx_pgm_kernels.cpp | 353 ++++++++++++++++++++++++ 4 files changed, 538 insertions(+), 11 deletions(-) create mode 100644 omp/test/multigrid/CMakeLists.txt create mode 100644 omp/test/multigrid/amgx_pgm_kernels.cpp diff --git a/omp/multigrid/amgx_pgm_kernels.cpp b/omp/multigrid/amgx_pgm_kernels.cpp index a0db864a250..80f1b2f88ce 100644 --- a/omp/multigrid/amgx_pgm_kernels.cpp +++ b/omp/multigrid/amgx_pgm_kernels.cpp @@ -48,6 +48,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/base/allocator.hpp" +#include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/csr_builder.hpp" + + namespace gko { namespace kernels { namespace omp { @@ -62,22 +68,61 @@ namespace amgx_pgm { template void match_edge(std::shared_ptr exec, const Array &strongest_neighbor, - Array &agg) GKO_NOT_IMPLEMENTED; + Array &agg) +{ + auto agg_vals = agg.get_data(); + auto strongest_neighbor_vals = strongest_neighbor.get_const_data(); +#pragma omp parallel for + for (size_type i = 0; i < agg.get_num_elems(); i++) { + if (agg_vals[i] == -1) { + auto neighbor = strongest_neighbor_vals[i]; + if (neighbor != -1 && strongest_neighbor_vals[neighbor] == i) { + agg_vals[i] = i; + agg_vals[neighbor] = i; + // Use the smaller index as agg point + } + } + } +} GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL); template void count_unagg(std::shared_ptr exec, - const Array &agg, - IndexType *num_unagg) GKO_NOT_IMPLEMENTED; + const Array &agg, IndexType *num_unagg) +{ + IndexType unagg = 0; +#pragma omp parallel for reduction(+ : unagg) + for (size_type i = 0; i < agg.get_num_elems(); i++) { + unagg += (agg.get_const_data()[i] == -1); + } + *num_unagg = unagg; +} GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL); template void renumber(std::shared_ptr exec, Array &agg, - IndexType *num_agg) GKO_NOT_IMPLEMENTED; + IndexType *num_agg) +{ + const auto num = agg.get_num_elems(); + Array agg_map(exec, num + 1); + auto agg_vals = agg.get_data(); + auto agg_map_vals = agg_map.get_data(); + components::fill_array(exec, agg_map_vals, num + 1, zero()); +#pragma omp parallel for + for (size_type i = 0; i < num; i++) { + agg_map_vals[agg_vals[i]] = 1; + } + components::prefix_sum(exec, agg_map_vals, num + 1); +#pragma omp parallel for + for (size_type i = 0; i < num; i++) { + agg_vals[i] = agg_map_vals[agg_vals[i]]; + } + *num_agg = agg_map_vals[num]; +} GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL); @@ -87,18 +132,104 @@ void find_strongest_neighbor( std::shared_ptr exec, const matrix::Csr *weight_mtx, const matrix::Diagonal *diag, Array &agg, - Array &strongest_neighbor) GKO_NOT_IMPLEMENTED; + Array &strongest_neighbor) +{ + const auto row_ptrs = weight_mtx->get_const_row_ptrs(); + const auto col_idxs = weight_mtx->get_const_col_idxs(); + const auto vals = weight_mtx->get_const_values(); +#pragma omp parallel for + for (size_type row = 0; row < agg.get_num_elems(); row++) { + auto max_weight_unagg = zero(); + auto max_weight_agg = zero(); + IndexType strongest_unagg = -1; + IndexType strongest_agg = -1; + if (agg.get_const_data()[row] == -1) { + for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; idx++) { + auto col = col_idxs[idx]; + if (col == row) { + continue; + } + auto weight = vals[idx] / + max(abs(diag->at(row, 0)), abs(diag->at(col, 0))); + if (agg.get_const_data()[col] == -1 && + (weight > max_weight_unagg || + (weight == max_weight_unagg && col > strongest_unagg))) { + max_weight_unagg = weight; + strongest_unagg = col; + } else if (agg.get_const_data()[col] != -1 && + (weight > max_weight_agg || + (weight == max_weight_agg && + col > strongest_agg))) { + max_weight_agg = weight; + strongest_agg = col; + } + } + + if (strongest_unagg == -1 && strongest_agg != -1) { + // all neighbor is agg, connect to the strongest agg + agg.get_data()[row] = agg.get_data()[strongest_agg]; + } else if (strongest_unagg != -1) { + // set the strongest neighbor in the unagg group + strongest_neighbor.get_data()[row] = strongest_unagg; + } else { + // no neighbor + strongest_neighbor.get_data()[row] = row; + } + } + } +} GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_FIND_STRONGEST_NEIGHBOR); template -void assign_to_exist_agg( - std::shared_ptr exec, - const matrix::Csr *weight_mtx, - const matrix::Diagonal *diag, Array &agg, - Array &intermediate_agg) GKO_NOT_IMPLEMENTED; +void assign_to_exist_agg(std::shared_ptr exec, + const matrix::Csr *weight_mtx, + const matrix::Diagonal *diag, + Array &agg, + Array &intermediate_agg) +{ + const auto row_ptrs = weight_mtx->get_const_row_ptrs(); + const auto col_idxs = weight_mtx->get_const_col_idxs(); + const auto vals = weight_mtx->get_const_values(); + auto max_weight_agg = zero(); + const auto agg_const_val = agg.get_const_data(); + auto agg_val = (intermediate_agg.get_num_elems() > 0) + ? intermediate_agg.get_data() + : agg.get_data(); +#pragma omp parallel for + for (IndexType row = 0; row < agg.get_num_elems(); row++) { + if (agg_const_val[row] != -1) { + continue; + } + IndexType strongest_agg = -1; + for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; idx++) { + auto col = col_idxs[idx]; + if (col == row) { + continue; + } + auto weight = + vals[idx] / max(abs(diag->at(row, 0)), abs(diag->at(col, 0))); + if (agg_const_val[col] != -1 && + (weight > max_weight_agg || + (weight == max_weight_agg && col > strongest_agg))) { + max_weight_agg = weight; + strongest_agg = col; + } + } + if (strongest_agg != -1) { + agg_val[row] = agg_const_val[strongest_agg]; + } else { + agg_val[row] = row; + } + } + + if (intermediate_agg.get_num_elems() > 0) { + // Copy the intermediate_agg to agg + agg = intermediate_agg; + } +} GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); @@ -110,7 +241,48 @@ void amgx_pgm_generate(std::shared_ptr exec, const Array &agg, matrix::Csr *coarse, matrix::Csr *temp) - GKO_NOT_IMPLEMENTED; +{ + // agg[i] -> I, agg[j] -> J + const auto coarse_nrows = coarse->get_size()[0]; + const auto source_nrows = source->get_size()[0]; + const auto source_row_ptrs = source->get_const_row_ptrs(); + const auto source_col_idxs = source->get_const_col_idxs(); + const auto source_vals = source->get_const_values(); + vector> row_list( + source_nrows, map{exec}, exec); + for (size_type i = 0; i < source_nrows; i++) { + IndexType row_idx = agg.get_const_data()[i]; + for (auto j = source_row_ptrs[i]; j < source_row_ptrs[i + 1]; j++) { + const auto col = agg.get_const_data()[source_col_idxs[j]]; + const auto val = source_vals[j]; + row_list[row_idx][col] += val; + } + } + auto coarse_row_ptrs = coarse->get_row_ptrs(); +#pragma omp parallel for + for (size_type i = 0; i < coarse_nrows; i++) { + coarse_row_ptrs[i] = row_list[i].size(); + } + components::prefix_sum(exec, coarse_row_ptrs, coarse_nrows + 1); + + auto nnz = coarse_row_ptrs[coarse_nrows]; + matrix::CsrBuilder coarse_builder{coarse}; + auto &coarse_col_idxs_array = coarse_builder.get_col_idx_array(); + auto &coarse_vals_array = coarse_builder.get_value_array(); + coarse_col_idxs_array.resize_and_reset(nnz); + coarse_vals_array.resize_and_reset(nnz); + auto coarse_col_idxs = coarse_col_idxs_array.get_data(); + auto coarse_vals = coarse_vals_array.get_data(); +#pragma omp parallel for + for (size_type i = 0; i < coarse_nrows; i++) { + auto ind = coarse_row_ptrs[i]; + for (auto pair : row_list[i]) { + coarse_col_idxs[ind] = pair.first; + coarse_vals[ind] = pair.second; + ind++; + } + } +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_GENERATE); diff --git a/omp/test/CMakeLists.txt b/omp/test/CMakeLists.txt index 2f506e51109..3759a39bc77 100644 --- a/omp/test/CMakeLists.txt +++ b/omp/test/CMakeLists.txt @@ -3,6 +3,7 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(components) add_subdirectory(factorization) add_subdirectory(matrix) +add_subdirectory(multigrid) add_subdirectory(preconditioner) add_subdirectory(reorder) add_subdirectory(solver) diff --git a/omp/test/multigrid/CMakeLists.txt b/omp/test/multigrid/CMakeLists.txt new file mode 100644 index 00000000000..8fe8bbeba48 --- /dev/null +++ b/omp/test/multigrid/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(amgx_pgm_kernels) diff --git a/omp/test/multigrid/amgx_pgm_kernels.cpp b/omp/test/multigrid/amgx_pgm_kernels.cpp new file mode 100644 index 00000000000..cc822135a5d --- /dev/null +++ b/omp/test/multigrid/amgx_pgm_kernels.cpp @@ -0,0 +1,353 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/multigrid/amgx_pgm_kernels.hpp" +#include "core/test/utils.hpp" +#include "core/test/utils/matrix_generator.hpp" + +namespace { + + +template +Array generate_random_array(gko::size_type num, ValueDistribution &&value_dist, + Engine &&engine, + std::shared_ptr exec) +{ + using value_type = typename Array::value_type; + Array array_host(exec->get_master(), num); + auto val = array_host.get_data(); + for (int i = 0; i < num; i++) { + val[i] = + gko::test::detail::get_rand_value(value_dist, engine); + } + Array array(exec); + array = array_host; + return array; +} + + +class AmgxPgm : public ::testing::Test { +protected: + using value_type = gko::default_precision; + using index_type = gko::int32; + using Mtx = gko::matrix::Dense<>; + using Csr = gko::matrix::Csr; + AmgxPgm() : rand_engine(30) {} + + void SetUp() + { + ref = gko::ReferenceExecutor::create(); + omp = gko::OmpExecutor::create(); + } + + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(num_cols, num_cols), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + gko::Array gen_array(gko::size_type num, index_type min_val, + index_type max_val) + { + return generate_random_array>( + num, std::uniform_int_distribution<>(min_val, max_val), rand_engine, + ref); + } + + void initialize_data() + { + int m = 597; + n = 300; + int nrhs = 3; + + agg = gen_array(m, 0, n - 1); + unfinished_agg = gen_array(m, -1, n - 1); + strongest_neighbor = gen_array(m, 0, n - 1); + coarse_vector = gen_mtx(n, nrhs); + fine_vector = gen_mtx(m, nrhs); + auto weight = gen_mtx(m, m); + make_weight(weight.get()); + weight_csr = Csr::create(ref); + weight->convert_to(weight_csr.get()); + weight_diag = weight_csr->extract_diagonal(); + + d_agg.set_executor(omp); + d_unfinished_agg.set_executor(omp); + d_strongest_neighbor.set_executor(omp); + d_coarse_vector = Mtx::create(omp); + d_fine_vector = Mtx::create(omp); + d_weight_csr = Csr::create(omp); + d_weight_diag = Mtx::create(omp); + d_agg = agg; + d_unfinished_agg = unfinished_agg; + d_strongest_neighbor = strongest_neighbor; + d_coarse_vector->copy_from(coarse_vector.get()); + d_fine_vector->copy_from(fine_vector.get()); + d_weight_csr->copy_from(weight_csr.get()); + d_weight_diag->copy_from(weight_diag.get()); + } + + void make_symetric(Mtx *mtx) + { + for (int i = 0; i < mtx->get_size()[0]; ++i) { + for (int j = i + 1; j < mtx->get_size()[1]; ++j) { + mtx->at(i, j) = mtx->at(j, i); + } + } + } + + // only for real value + void make_absoulte(Mtx *mtx) + { + for (int i = 0; i < mtx->get_size()[0]; ++i) { + for (int j = 0; j < mtx->get_size()[1]; ++j) { + mtx->at(i, j) = abs(mtx->at(i, j)); + } + } + } + + void make_diag_dominant(Mtx *mtx) + { + using std::abs; + for (int i = 0; i < mtx->get_size()[0]; ++i) { + auto sum = gko::zero(); + for (int j = 0; j < mtx->get_size()[1]; ++j) { + sum += abs(mtx->at(i, j)); + } + mtx->at(i, i) = sum; + } + } + + void make_spd(Mtx *mtx) + { + make_symetric(mtx); + make_diag_dominant(mtx); + } + + void make_weight(Mtx *mtx) + { + make_symetric(mtx); + make_absoulte(mtx); + make_diag_dominant(mtx); + } + + std::shared_ptr ref; + std::shared_ptr omp; + + std::ranlux48 rand_engine; + + gko::Array agg; + gko::Array unfinished_agg; + gko::Array strongest_neighbor; + + gko::Array d_agg; + gko::Array d_unfinished_agg; + gko::Array d_strongest_neighbor; + + std::unique_ptr coarse_vector; + std::unique_ptr fine_vector; + std::unique_ptr weight_diag; + std::unique_ptr weight_csr; + + std::unique_ptr d_coarse_vector; + std::unique_ptr d_fine_vector; + std::unique_ptr d_weight_diag; + std::unique_ptr d_weight_csr; + + gko::size_type n; +}; + + +TEST_F(AmgxPgm, RestrictApplyIsEquivalentToRef) +{ + initialize_data(); + // fine->coarse + auto x = Mtx::create_with_config_of(gko::lend(coarse_vector)); + auto d_x = Mtx::create_with_config_of(gko::lend(d_coarse_vector)); + + gko::kernels::reference::amgx_pgm::restrict_apply( + ref, agg, fine_vector.get(), x.get()); + gko::kernels::omp::amgx_pgm::restrict_apply(omp, d_agg, d_fine_vector.get(), + d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); +} + + +TEST_F(AmgxPgm, ProlongApplyaddIsEquivalentToRef) +{ + initialize_data(); + // coarse->fine + auto x = fine_vector->clone(); + auto d_x = d_fine_vector->clone(); + + gko::kernels::reference::amgx_pgm::prolong_applyadd( + ref, agg, coarse_vector.get(), x.get()); + gko::kernels::omp::amgx_pgm::prolong_applyadd( + omp, d_agg, d_coarse_vector.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); +} + + +TEST_F(AmgxPgm, MatchEdgeIsEquivalentToRef) +{ + initialize_data(); + auto x = unfinished_agg; + auto d_x = d_unfinished_agg; + + gko::kernels::reference::amgx_pgm::match_edge(ref, strongest_neighbor, x); + gko::kernels::omp::amgx_pgm::match_edge(omp, d_strongest_neighbor, d_x); + + GKO_ASSERT_ARRAY_EQ(d_x, x); +} + + +TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) +{ + initialize_data(); + gko::size_type num_unagg; + gko::size_type d_num_unagg; + + gko::kernels::reference::amgx_pgm::count_unagg(ref, agg, &num_unagg); + gko::kernels::omp::amgx_pgm::count_unagg(omp, d_agg, &d_num_unagg); + + ASSERT_EQ(d_num_unagg, num_unagg); +} + + +TEST_F(AmgxPgm, RenumberIsEquivalentToRef) +{ + initialize_data(); + auto x = unfinished_agg; + auto d_x = d_unfinished_agg; + gko::size_type num_agg; + gko::size_type d_num_agg; + + gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); + gko::kernels::omp::amgx_pgm::renumber(omp, d_agg, &d_num_agg); + + ASSERT_EQ(d_num_agg, num_agg); + GKO_ASSERT_ARRAY_EQ(d_agg, agg); + ASSERT_LE(num_agg, 300); +} + + +TEST_F(AmgxPgm, FindStrongestNeighborIsEquivalentToRef) +{ + initialize_data(); + auto snb = strongest_neighbor; + auto d_snb = d_strongest_neighbor; + + gko::kernels::reference::amgx_pgm::find_strongest_neighbor( + ref, weight_csr.get(), weight_diag.get(), agg, snb); + gko::kernels::omp::amgx_pgm::find_strongest_neighbor( + omp, d_weight_csr.get(), d_weight_diag.get(), d_agg, d_snb); + + GKO_ASSERT_ARRAY_EQ(d_snb, snb); +} + + +TEST_F(AmgxPgm, AssignToExistAggIsEquivalentToRef) +{ + initialize_data(); + auto x = unfinished_agg; + auto d_x = d_unfinished_agg; + auto intermediate_agg = x; + auto d_intermediate_agg = d_x; + + gko::kernels::reference::amgx_pgm::assign_to_exist_agg( + ref, weight_csr.get(), weight_diag.get(), x, intermediate_agg); + gko::kernels::omp::amgx_pgm::assign_to_exist_agg( + omp, d_weight_csr.get(), d_weight_diag.get(), d_x, d_intermediate_agg); + + GKO_ASSERT_ARRAY_EQ(d_x, x); +} + + +TEST_F(AmgxPgm, AssignToExistAggUnderteminsticIsEquivalentToRef) +{ + initialize_data(); + auto d_x = d_unfinished_agg; + auto d_intermediate_agg = gko::Array(omp, 0); + gko::size_type d_num_unagg; + + gko::kernels::omp::amgx_pgm::assign_to_exist_agg( + omp, d_weight_csr.get(), d_weight_diag.get(), d_x, d_intermediate_agg); + gko::kernels::omp::amgx_pgm::count_unagg(omp, d_agg, &d_num_unagg); + + // only test whether all elements are aggregated. + GKO_ASSERT_EQ(d_num_unagg, 0); +} + + +TEST_F(AmgxPgm, GenerateMtxIsEquivalentToRef) +{ + initialize_data(); + auto csr_coarse = Csr::create(ref, gko::dim<2>{n, n}, 0); + auto d_csr_coarse = Csr::create(omp, gko::dim<2>{n, n}, 0); + auto csr_temp = Csr::create(ref, gko::dim<2>{n, n}, + weight_csr->get_num_stored_elements()); + auto d_csr_temp = Csr::create(omp, gko::dim<2>{n, n}, + d_weight_csr->get_num_stored_elements()); + + gko::kernels::omp::amgx_pgm::amgx_pgm_generate( + omp, d_weight_csr.get(), d_agg, d_csr_coarse.get(), d_csr_temp.get()); + gko::kernels::reference::amgx_pgm::amgx_pgm_generate( + ref, weight_csr.get(), agg, csr_coarse.get(), csr_temp.get()); + + GKO_ASSERT_MTX_NEAR(d_csr_coarse, csr_coarse, 1e-14); +} + + +} // namespace From 3c963a72446144e108be87e80362389a3024cda7 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Tue, 21 Jul 2020 17:54:54 +0200 Subject: [PATCH 05/16] hip implementation --- hip/multigrid/amgx_pgm_kernels.hip.cpp | 144 +++++++- hip/test/CMakeLists.txt | 1 + hip/test/multigrid/CMakeLists.txt | 1 + hip/test/multigrid/amgx_pgm_kernels.hip.cpp | 362 ++++++++++++++++++++ 4 files changed, 496 insertions(+), 12 deletions(-) create mode 100644 hip/test/multigrid/CMakeLists.txt create mode 100644 hip/test/multigrid/amgx_pgm_kernels.hip.cpp diff --git a/hip/multigrid/amgx_pgm_kernels.hip.cpp b/hip/multigrid/amgx_pgm_kernels.hip.cpp index 88c2db995f1..e3e90a06b74 100644 --- a/hip/multigrid/amgx_pgm_kernels.hip.cpp +++ b/hip/multigrid/amgx_pgm_kernels.hip.cpp @@ -45,10 +45,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/csr_builder.hpp" +#include "core/matrix/csr_kernels.hpp" #include "hip/base/hipsparse_bindings.hip.hpp" #include "hip/base/math.hip.hpp" #include "hip/base/types.hip.hpp" -#include "hip/solver/common_trs_kernels.hip.hpp" +#include "hip/components/atomic.hip.hpp" +#include "hip/components/reduction.hip.hpp" +#include "hip/components/thread_ids.hip.hpp" namespace gko { @@ -62,25 +68,62 @@ namespace hip { namespace amgx_pgm { +constexpr int default_block_size = 512; + + +#include "common/multigrid/amgx_pgm_kernels.hpp.inc" + + template void match_edge(std::shared_ptr exec, const Array &strongest_neighbor, - Array &agg) GKO_NOT_IMPLEMENTED; + Array &agg) +{ + const auto num = agg.get_num_elems(); + const dim3 grid(ceildiv(num, default_block_size)); + hipLaunchKernelGGL(kernel::match_edge_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, + strongest_neighbor.get_const_data(), agg.get_data()); +} GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL); template void count_unagg(std::shared_ptr exec, - const Array &agg, - IndexType *num_unagg) GKO_NOT_IMPLEMENTED; + const Array &agg, IndexType *num_unagg) +{ + Array active_agg(exec, agg.get_num_elems()); + const dim3 grid(ceildiv(active_agg.get_num_elems(), default_block_size)); + hipLaunchKernelGGL(kernel::activate_kernel, dim3(grid), + dim3(default_block_size), 0, 0, + active_agg.get_num_elems(), agg.get_const_data(), + active_agg.get_data()); + *num_unagg = reduce_add_array(exec, active_agg.get_num_elems(), + active_agg.get_const_data()); +} GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL); template void renumber(std::shared_ptr exec, Array &agg, - IndexType *num_agg) GKO_NOT_IMPLEMENTED; + IndexType *num_agg) +{ + const auto num = agg.get_num_elems(); + Array agg_map(exec, num + 1); + components::fill_array(exec, agg_map.get_data(), agg_map.get_num_elems(), + zero()); + const dim3 grid(ceildiv(num, default_block_size)); + hipLaunchKernelGGL(kernel::fill_agg_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, + agg.get_const_data(), agg_map.get_data()); + components::prefix_sum(exec, agg_map.get_data(), agg_map.get_num_elems()); + hipLaunchKernelGGL(kernel::renumber_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, + agg_map.get_const_data(), agg.get_data()); + *num_agg = exec->copy_val_to_host(agg_map.get_const_data() + num); +} GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL); @@ -90,18 +133,45 @@ void find_strongest_neighbor( std::shared_ptr exec, const matrix::Csr *weight_mtx, const matrix::Diagonal *diag, Array &agg, - Array &strongest_neighbor) GKO_NOT_IMPLEMENTED; + Array &strongest_neighbor) +{ + const auto num = agg.get_num_elems(); + const dim3 grid(ceildiv(num, default_block_size)); + hipLaunchKernelGGL( + kernel::find_strongest_neighbor_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, weight_mtx->get_const_row_ptrs(), + weight_mtx->get_const_col_idxs(), weight_mtx->get_const_values(), + diag->get_const_values(), diag->get_stride(), agg.get_data(), + strongest_neighbor.get_data()); +} GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_FIND_STRONGEST_NEIGHBOR); template -void assign_to_exist_agg( - std::shared_ptr exec, - const matrix::Csr *weight_mtx, - const matrix::Diagonal *diag, Array &agg, - Array &intermediate_agg) GKO_NOT_IMPLEMENTED; +void assign_to_exist_agg(std::shared_ptr exec, + const matrix::Csr *weight_mtx, + const matrix::Diagonal *diag, + Array &agg, + Array &intermediate_agg) +{ + auto agg_val = (intermediate_agg.get_num_elems() > 0) + ? intermediate_agg.get_data() + : agg.get_data(); + const auto num = agg.get_num_elems(); + const dim3 grid(ceildiv(num, default_block_size)); + hipLaunchKernelGGL(kernel::assign_to_exist_agg_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, + weight_mtx->get_const_row_ptrs(), + weight_mtx->get_const_col_idxs(), + weight_mtx->get_const_values(), diag->get_const_values(), + diag->get_stride(), agg.get_const_data(), agg_val); + if (intermediate_agg.get_num_elems() > 0) { + // Copy the intermediate_agg to agg + agg = intermediate_agg; + } +} GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); @@ -113,7 +183,57 @@ void amgx_pgm_generate(std::shared_ptr exec, const Array &agg, matrix::Csr *coarse, matrix::Csr *temp) - GKO_NOT_IMPLEMENTED; +{ + const auto source_nrows = source->get_size()[0]; + const auto source_nnz = source->get_num_stored_elements(); + const auto coarse_nrows = coarse->get_size()[0]; + Array row_map(exec, source_nrows); + // fill coarse row pointer as zero + components::fill_array(exec, temp->get_row_ptrs(), coarse_nrows + 1, + zero()); + // compute each source row should be moved and also change column index + dim3 grid(ceildiv(source_nrows, default_block_size)); + // agg source_row (for row size) coarse row source map + hipLaunchKernelGGL(kernel::get_source_row_map_kernel, dim3(grid), + dim3(default_block_size), 0, 0, source_nrows, + agg.get_const_data(), source->get_const_row_ptrs(), + temp->get_row_ptrs(), row_map.get_data()); + // prefix sum of temp_row_ptrs + components::prefix_sum(exec, temp->get_row_ptrs(), coarse_nrows + 1); + // copy source -> to coarse and change column index + hipLaunchKernelGGL( + kernel::move_row_kernel, dim3(grid), dim3(default_block_size), 0, 0, + source_nrows, agg.get_const_data(), row_map.get_const_data(), + source->get_const_row_ptrs(), source->get_const_col_idxs(), + as_hip_type(source->get_const_values()), temp->get_const_row_ptrs(), + temp->get_col_idxs(), as_hip_type(temp->get_values())); + // sort csr + csr::sort_by_column_index(exec, temp); + // summation of the elements with same position + grid = ceildiv(coarse_nrows, default_block_size); + hipLaunchKernelGGL(kernel::merge_col_kernel, dim3(grid), + dim3(default_block_size), 0, 0, coarse_nrows, + temp->get_const_row_ptrs(), temp->get_col_idxs(), + as_hip_type(temp->get_values()), coarse->get_row_ptrs()); + // build the coarse matrix + components::prefix_sum(exec, coarse->get_row_ptrs(), coarse_nrows + 1); + // prefix sum of coarse->get_row_ptrs + const auto coarse_nnz = + exec->copy_val_to_host(coarse->get_row_ptrs() + coarse_nrows); + // reallocate size of column and values + matrix::CsrBuilder coarse_builder{coarse}; + auto &coarse_col_idxs_array = coarse_builder.get_col_idx_array(); + auto &coarse_vals_array = coarse_builder.get_value_array(); + coarse_col_idxs_array.resize_and_reset(coarse_nnz); + coarse_vals_array.resize_and_reset(coarse_nnz); + // copy the result + hipLaunchKernelGGL( + kernel::copy_to_coarse_kernel, dim3(grid), dim3(default_block_size), 0, + 0, coarse_nrows, temp->get_const_row_ptrs(), temp->get_const_col_idxs(), + as_hip_type(temp->get_const_values()), coarse->get_const_row_ptrs(), + coarse_col_idxs_array.get_data(), + as_hip_type(coarse_vals_array.get_data())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_GENERATE); diff --git a/hip/test/CMakeLists.txt b/hip/test/CMakeLists.txt index 9c8e39ca4ab..a5f126893bf 100644 --- a/hip/test/CMakeLists.txt +++ b/hip/test/CMakeLists.txt @@ -4,6 +4,7 @@ add_subdirectory(base) add_subdirectory(components) add_subdirectory(factorization) add_subdirectory(matrix) +add_subdirectory(multigrid) add_subdirectory(solver) add_subdirectory(preconditioner) add_subdirectory(stop) diff --git a/hip/test/multigrid/CMakeLists.txt b/hip/test/multigrid/CMakeLists.txt new file mode 100644 index 00000000000..1c8534e5337 --- /dev/null +++ b/hip/test/multigrid/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_hip_test(amgx_pgm_kernels) diff --git a/hip/test/multigrid/amgx_pgm_kernels.hip.cpp b/hip/test/multigrid/amgx_pgm_kernels.hip.cpp new file mode 100644 index 00000000000..10befbe9315 --- /dev/null +++ b/hip/test/multigrid/amgx_pgm_kernels.hip.cpp @@ -0,0 +1,362 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/multigrid/amgx_pgm_kernels.hpp" +#include "core/test/utils/matrix_generator.hpp" +#include "hip/test/utils.hip.hpp" + + +namespace { + + +template +Array generate_random_array(gko::size_type num, ValueDistribution &&value_dist, + Engine &&engine, + std::shared_ptr exec) +{ + using value_type = typename Array::value_type; + Array array_host(exec->get_master(), num); + auto val = array_host.get_data(); + for (int i = 0; i < num; i++) { + val[i] = + gko::test::detail::get_rand_value(value_dist, engine); + } + Array array(exec); + array = array_host; + return array; +} + + +class AmgxPgm : public ::testing::Test { +protected: + using value_type = gko::default_precision; + using index_type = gko::int32; + using Mtx = gko::matrix::Dense<>; + using Csr = gko::matrix::Csr; + AmgxPgm() : rand_engine(30) {} + + void SetUp() + { + ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); + ref = gko::ReferenceExecutor::create(); + hip = gko::HipExecutor::create(0, ref); + } + + void TearDown() + { + if (hip != nullptr) { + ASSERT_NO_THROW(hip->synchronize()); + } + } + + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(num_cols, num_cols), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + gko::Array gen_array(gko::size_type num, index_type min_val, + index_type max_val) + { + return generate_random_array>( + num, std::uniform_int_distribution<>(min_val, max_val), rand_engine, + ref); + } + + void initialize_data() + { + int m = 597; + n = 300; + int nrhs = 3; + + agg = gen_array(m, 0, n - 1); + unfinished_agg = gen_array(m, -1, n - 1); + strongest_neighbor = gen_array(m, 0, n - 1); + coarse_vector = gen_mtx(n, nrhs); + fine_vector = gen_mtx(m, nrhs); + auto weight = gen_mtx(m, m); + make_weight(weight.get()); + weight_csr = Csr::create(ref); + weight->convert_to(weight_csr.get()); + weight_diag = weight_csr->extract_diagonal(); + + d_agg.set_executor(hip); + d_unfinished_agg.set_executor(hip); + d_strongest_neighbor.set_executor(hip); + d_coarse_vector = Mtx::create(hip); + d_fine_vector = Mtx::create(hip); + d_weight_csr = Csr::create(hip); + d_weight_diag = Mtx::create(hip); + d_agg = agg; + d_unfinished_agg = unfinished_agg; + d_strongest_neighbor = strongest_neighbor; + d_coarse_vector->copy_from(coarse_vector.get()); + d_fine_vector->copy_from(fine_vector.get()); + d_weight_csr->copy_from(weight_csr.get()); + d_weight_diag->copy_from(weight_diag.get()); + } + + void make_symetric(Mtx *mtx) + { + for (int i = 0; i < mtx->get_size()[0]; ++i) { + for (int j = i + 1; j < mtx->get_size()[1]; ++j) { + mtx->at(i, j) = mtx->at(j, i); + } + } + } + + // only for real value + void make_absoulte(Mtx *mtx) + { + for (int i = 0; i < mtx->get_size()[0]; ++i) { + for (int j = 0; j < mtx->get_size()[1]; ++j) { + mtx->at(i, j) = abs(mtx->at(i, j)); + } + } + } + + void make_diag_dominant(Mtx *mtx) + { + using std::abs; + for (int i = 0; i < mtx->get_size()[0]; ++i) { + auto sum = gko::zero(); + for (int j = 0; j < mtx->get_size()[1]; ++j) { + sum += abs(mtx->at(i, j)); + } + mtx->at(i, i) = sum; + } + } + + void make_spd(Mtx *mtx) + { + make_symetric(mtx); + make_diag_dominant(mtx); + } + + void make_weight(Mtx *mtx) + { + make_symetric(mtx); + make_absoulte(mtx); + make_diag_dominant(mtx); + } + + std::shared_ptr ref; + std::shared_ptr hip; + + std::ranlux48 rand_engine; + + gko::Array agg; + gko::Array unfinished_agg; + gko::Array strongest_neighbor; + + gko::Array d_agg; + gko::Array d_unfinished_agg; + gko::Array d_strongest_neighbor; + + std::unique_ptr coarse_vector; + std::unique_ptr fine_vector; + std::unique_ptr weight_diag; + std::unique_ptr weight_csr; + + std::unique_ptr d_coarse_vector; + std::unique_ptr d_fine_vector; + std::unique_ptr d_weight_diag; + std::unique_ptr d_weight_csr; + + gko::size_type n; +}; + + +TEST_F(AmgxPgm, RestrictApplyIsEquivalentToRef) +{ + initialize_data(); + // fine->coarse + auto x = Mtx::create_with_config_of(gko::lend(coarse_vector)); + auto d_x = Mtx::create_with_config_of(gko::lend(d_coarse_vector)); + + gko::kernels::reference::amgx_pgm::restrict_apply( + ref, agg, fine_vector.get(), x.get()); + gko::kernels::hip::amgx_pgm::restrict_apply(hip, d_agg, d_fine_vector.get(), + d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); +} + + +TEST_F(AmgxPgm, ProlongApplyaddIsEquivalentToRef) +{ + initialize_data(); + // coarse->fine + auto x = fine_vector->clone(); + auto d_x = d_fine_vector->clone(); + + gko::kernels::reference::amgx_pgm::prolong_applyadd( + ref, agg, coarse_vector.get(), x.get()); + gko::kernels::hip::amgx_pgm::prolong_applyadd( + hip, d_agg, d_coarse_vector.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); +} + + +TEST_F(AmgxPgm, MatchEdgeIsEquivalentToRef) +{ + initialize_data(); + auto x = unfinished_agg; + auto d_x = d_unfinished_agg; + + gko::kernels::reference::amgx_pgm::match_edge(ref, strongest_neighbor, x); + gko::kernels::hip::amgx_pgm::match_edge(hip, d_strongest_neighbor, d_x); + + GKO_ASSERT_ARRAY_EQ(d_x, x); +} + + +TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) +{ + initialize_data(); + gko::size_type num_unagg; + gko::size_type d_num_unagg; + + gko::kernels::reference::amgx_pgm::count_unagg(ref, agg, &num_unagg); + gko::kernels::hip::amgx_pgm::count_unagg(hip, d_agg, &d_num_unagg); + + ASSERT_EQ(d_num_unagg, num_unagg); +} + + +TEST_F(AmgxPgm, RenumberIsEquivalentToRef) +{ + initialize_data(); + auto x = unfinished_agg; + auto d_x = d_unfinished_agg; + gko::size_type num_agg; + gko::size_type d_num_agg; + + gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); + gko::kernels::hip::amgx_pgm::renumber(hip, d_agg, &d_num_agg); + + ASSERT_EQ(d_num_agg, num_agg); + GKO_ASSERT_ARRAY_EQ(d_agg, agg); + ASSERT_LE(num_agg, 300); +} + + +TEST_F(AmgxPgm, FindStrongestNeighborIsEquivalentToRef) +{ + initialize_data(); + auto snb = strongest_neighbor; + auto d_snb = d_strongest_neighbor; + + gko::kernels::reference::amgx_pgm::find_strongest_neighbor( + ref, weight_csr.get(), weight_diag.get(), agg, snb); + gko::kernels::hip::amgx_pgm::find_strongest_neighbor( + hip, d_weight_csr.get(), d_weight_diag.get(), d_agg, d_snb); + + GKO_ASSERT_ARRAY_EQ(d_snb, snb); +} + + +TEST_F(AmgxPgm, AssignToExistAggIsEquivalentToRef) +{ + initialize_data(); + auto x = unfinished_agg; + auto d_x = d_unfinished_agg; + auto intermediate_agg = x; + auto d_intermediate_agg = d_x; + + gko::kernels::reference::amgx_pgm::assign_to_exist_agg( + ref, weight_csr.get(), weight_diag.get(), x, intermediate_agg); + gko::kernels::hip::amgx_pgm::assign_to_exist_agg( + hip, d_weight_csr.get(), d_weight_diag.get(), d_x, d_intermediate_agg); + + GKO_ASSERT_ARRAY_EQ(d_x, x); +} + + +TEST_F(AmgxPgm, AssignToExistAggUnderteminsticIsEquivalentToRef) +{ + initialize_data(); + auto d_x = d_unfinished_agg; + auto d_intermediate_agg = gko::Array(hip, 0); + gko::size_type d_num_unagg; + + gko::kernels::hip::amgx_pgm::assign_to_exist_agg( + hip, d_weight_csr.get(), d_weight_diag.get(), d_x, d_intermediate_agg); + gko::kernels::hip::amgx_pgm::count_unagg(hip, d_agg, &d_num_unagg); + + // only test whether all elements are aggregated. + GKO_ASSERT_EQ(d_num_unagg, 0); +} + + +TEST_F(AmgxPgm, GenerateMtxIsEquivalentToRef) +{ + initialize_data(); + auto csr_coarse = Csr::create(ref, gko::dim<2>{n, n}, 0); + auto d_csr_coarse = Csr::create(hip, gko::dim<2>{n, n}, 0); + auto csr_temp = Csr::create(ref, gko::dim<2>{n, n}, + weight_csr->get_num_stored_elements()); + auto d_csr_temp = Csr::create(hip, gko::dim<2>{n, n}, + d_weight_csr->get_num_stored_elements()); + + gko::kernels::hip::amgx_pgm::amgx_pgm_generate( + hip, d_weight_csr.get(), d_agg, d_csr_coarse.get(), d_csr_temp.get()); + gko::kernels::reference::amgx_pgm::amgx_pgm_generate( + ref, weight_csr.get(), agg, csr_coarse.get(), csr_temp.get()); + + GKO_ASSERT_MTX_NEAR(d_csr_coarse, csr_coarse, 1e-14); +} + + +} // namespace From e01afa84018c54371a7f92f2ee26b40a98354110 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Wed, 12 Aug 2020 11:10:15 +0200 Subject: [PATCH 06/16] fix amg_pgm_kernel determinstic problem --- common/multigrid/amgx_pgm_kernels.hpp.inc | 9 +++++---- omp/multigrid/amgx_pgm_kernels.cpp | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/common/multigrid/amgx_pgm_kernels.hpp.inc b/common/multigrid/amgx_pgm_kernels.hpp.inc index 5bed8696b10..851a7023409 100644 --- a/common/multigrid/amgx_pgm_kernels.hpp.inc +++ b/common/multigrid/amgx_pgm_kernels.hpp.inc @@ -95,11 +95,12 @@ __global__ __launch_bounds__(default_block_size) void match_edge_kernel( if (agg_vals[tidx] != -1) { return; } - auto neighbor = strongest_neighbor_vals[tidx]; + size_type neighbor = strongest_neighbor_vals[tidx]; if (neighbor != -1 && strongest_neighbor_vals[neighbor] == tidx) { - agg_vals[tidx] = tidx; - agg_vals[neighbor] = tidx; // Use the smaller index as agg point + auto group = min(tidx, neighbor); + agg_vals[tidx] = group; + agg_vals[neighbor] = group; } } @@ -342,4 +343,4 @@ __global__ __launch_bounds__(default_block_size) void copy_to_coarse_kernel( } -} // namespace kernel \ No newline at end of file +} // namespace kernel diff --git a/omp/multigrid/amgx_pgm_kernels.cpp b/omp/multigrid/amgx_pgm_kernels.cpp index 80f1b2f88ce..540a7bdddcf 100644 --- a/omp/multigrid/amgx_pgm_kernels.cpp +++ b/omp/multigrid/amgx_pgm_kernels.cpp @@ -75,11 +75,12 @@ void match_edge(std::shared_ptr exec, #pragma omp parallel for for (size_type i = 0; i < agg.get_num_elems(); i++) { if (agg_vals[i] == -1) { - auto neighbor = strongest_neighbor_vals[i]; + size_type neighbor = strongest_neighbor_vals[i]; if (neighbor != -1 && strongest_neighbor_vals[neighbor] == i) { - agg_vals[i] = i; - agg_vals[neighbor] = i; // Use the smaller index as agg point + auto group = min(i, neighbor); + agg_vals[i] = group; + agg_vals[neighbor] = group; } } } @@ -193,7 +194,6 @@ void assign_to_exist_agg(std::shared_ptr exec, const auto row_ptrs = weight_mtx->get_const_row_ptrs(); const auto col_idxs = weight_mtx->get_const_col_idxs(); const auto vals = weight_mtx->get_const_values(); - auto max_weight_agg = zero(); const auto agg_const_val = agg.get_const_data(); auto agg_val = (intermediate_agg.get_num_elems() > 0) ? intermediate_agg.get_data() @@ -203,6 +203,7 @@ void assign_to_exist_agg(std::shared_ptr exec, if (agg_const_val[row] != -1) { continue; } + auto max_weight_agg = zero(); IndexType strongest_agg = -1; for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; idx++) { auto col = col_idxs[idx]; From dd6ff83dfeb71821bb3129ee4cb918f2fb9609c3 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Mon, 31 Aug 2020 22:05:42 +0200 Subject: [PATCH 07/16] update diag related function to Diagonal format --- common/multigrid/amgx_pgm_kernels.hpp.inc | 12 +++++------ cuda/multigrid/amgx_pgm_kernels.cu | 4 ++-- cuda/test/multigrid/amgx_pgm_kernels.cpp | 8 ++++--- hip/multigrid/amgx_pgm_kernels.hip.cpp | 23 ++++++++++----------- hip/test/multigrid/amgx_pgm_kernels.hip.cpp | 8 ++++--- omp/multigrid/amgx_pgm_kernels.cpp | 8 ++++--- omp/test/multigrid/amgx_pgm_kernels.cpp | 8 ++++--- 7 files changed, 38 insertions(+), 33 deletions(-) diff --git a/common/multigrid/amgx_pgm_kernels.hpp.inc b/common/multigrid/amgx_pgm_kernels.hpp.inc index 851a7023409..d8bda57a2e0 100644 --- a/common/multigrid/amgx_pgm_kernels.hpp.inc +++ b/common/multigrid/amgx_pgm_kernels.hpp.inc @@ -150,8 +150,8 @@ __global__ const size_type num, const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ col_idxs, const ValueType *__restrict__ weight_vals, - const ValueType *__restrict__ diag, const size_type diag_stride, - IndexType *__restrict__ agg, IndexType *__restrict__ strongest_neighbor) + const ValueType *__restrict__ diag, IndexType *__restrict__ agg, + IndexType *__restrict__ strongest_neighbor) { auto row = thread::get_thread_id_flat(); if (row >= num) { @@ -170,8 +170,7 @@ __global__ if (col == row) { continue; } - auto weight = weight_vals[idx] / max(abs(diag[row * diag_stride]), - abs(diag[col * diag_stride])); + auto weight = weight_vals[idx] / max(abs(diag[row]), abs(diag[col])); if (agg[col] == -1 && (weight > max_weight_unagg || (weight == max_weight_unagg && col > strongest_unagg))) { @@ -207,7 +206,7 @@ __global__ const size_type num, const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ col_idxs, const ValueType *__restrict__ weight_vals, - const ValueType *__restrict__ diag, const size_type diag_stride, + const ValueType *__restrict__ diag, const IndexType *__restrict__ agg_const_val, IndexType *__restrict__ agg_val) { @@ -225,8 +224,7 @@ __global__ if (col == row) { continue; } - auto weight = weight_vals[idx] / max(abs(diag[row * diag_stride]), - abs(diag[col * diag_stride])); + auto weight = weight_vals[idx] / max(abs(diag[row]), abs(diag[col])); if (agg_const_val[col] != -1 && (weight > max_weight_agg || (weight == max_weight_agg && col > strongest_agg))) { diff --git a/cuda/multigrid/amgx_pgm_kernels.cu b/cuda/multigrid/amgx_pgm_kernels.cu index d84766557cb..b8700466365 100644 --- a/cuda/multigrid/amgx_pgm_kernels.cu +++ b/cuda/multigrid/amgx_pgm_kernels.cu @@ -136,7 +136,7 @@ void find_strongest_neighbor( kernel::find_strongest_neighbor_kernel<<>>( num, weight_mtx->get_const_row_ptrs(), weight_mtx->get_const_col_idxs(), weight_mtx->get_const_values(), diag->get_const_values(), - diag->get_stride(), agg.get_data(), strongest_neighbor.get_data()); + agg.get_data(), strongest_neighbor.get_data()); } GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( @@ -157,7 +157,7 @@ void assign_to_exist_agg(std::shared_ptr exec, kernel::assign_to_exist_agg_kernel<<>>( num, weight_mtx->get_const_row_ptrs(), weight_mtx->get_const_col_idxs(), weight_mtx->get_const_values(), diag->get_const_values(), - diag->get_stride(), agg.get_const_data(), agg_val); + agg.get_const_data(), agg_val); if (intermediate_agg.get_num_elems() > 0) { // Copy the intermediate_agg to agg agg = intermediate_agg; diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index 6c2c34885a1..fa4e582085d 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -81,6 +82,7 @@ class AmgxPgm : public ::testing::Test { using index_type = gko::int32; using Mtx = gko::matrix::Dense<>; using Csr = gko::matrix::Csr; + using Diag = gko::matrix::Diagonal; AmgxPgm() : rand_engine(30) {} void SetUp() @@ -136,7 +138,7 @@ class AmgxPgm : public ::testing::Test { d_coarse_vector = Mtx::create(cuda); d_fine_vector = Mtx::create(cuda); d_weight_csr = Csr::create(cuda); - d_weight_diag = Mtx::create(cuda); + d_weight_diag = Diag::create(cuda); d_agg = agg; d_unfinished_agg = unfinished_agg; d_strongest_neighbor = strongest_neighbor; @@ -205,12 +207,12 @@ class AmgxPgm : public ::testing::Test { std::unique_ptr coarse_vector; std::unique_ptr fine_vector; - std::unique_ptr weight_diag; + std::unique_ptr weight_diag; std::unique_ptr weight_csr; std::unique_ptr d_coarse_vector; std::unique_ptr d_fine_vector; - std::unique_ptr d_weight_diag; + std::unique_ptr d_weight_diag; std::unique_ptr d_weight_csr; gko::size_type n; diff --git a/hip/multigrid/amgx_pgm_kernels.hip.cpp b/hip/multigrid/amgx_pgm_kernels.hip.cpp index e3e90a06b74..41fc38854a8 100644 --- a/hip/multigrid/amgx_pgm_kernels.hip.cpp +++ b/hip/multigrid/amgx_pgm_kernels.hip.cpp @@ -137,12 +137,12 @@ void find_strongest_neighbor( { const auto num = agg.get_num_elems(); const dim3 grid(ceildiv(num, default_block_size)); - hipLaunchKernelGGL( - kernel::find_strongest_neighbor_kernel, dim3(grid), - dim3(default_block_size), 0, 0, num, weight_mtx->get_const_row_ptrs(), - weight_mtx->get_const_col_idxs(), weight_mtx->get_const_values(), - diag->get_const_values(), diag->get_stride(), agg.get_data(), - strongest_neighbor.get_data()); + hipLaunchKernelGGL(kernel::find_strongest_neighbor_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, + weight_mtx->get_const_row_ptrs(), + weight_mtx->get_const_col_idxs(), + weight_mtx->get_const_values(), diag->get_const_values(), + agg.get_data(), strongest_neighbor.get_data()); } GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( @@ -161,12 +161,11 @@ void assign_to_exist_agg(std::shared_ptr exec, : agg.get_data(); const auto num = agg.get_num_elems(); const dim3 grid(ceildiv(num, default_block_size)); - hipLaunchKernelGGL(kernel::assign_to_exist_agg_kernel, dim3(grid), - dim3(default_block_size), 0, 0, num, - weight_mtx->get_const_row_ptrs(), - weight_mtx->get_const_col_idxs(), - weight_mtx->get_const_values(), diag->get_const_values(), - diag->get_stride(), agg.get_const_data(), agg_val); + hipLaunchKernelGGL( + kernel::assign_to_exist_agg_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, weight_mtx->get_const_row_ptrs(), + weight_mtx->get_const_col_idxs(), weight_mtx->get_const_values(), + diag->get_const_values(), agg.get_const_data(), agg_val); if (intermediate_agg.get_num_elems() > 0) { // Copy the intermediate_agg to agg agg = intermediate_agg; diff --git a/hip/test/multigrid/amgx_pgm_kernels.hip.cpp b/hip/test/multigrid/amgx_pgm_kernels.hip.cpp index 10befbe9315..9575e854207 100644 --- a/hip/test/multigrid/amgx_pgm_kernels.hip.cpp +++ b/hip/test/multigrid/amgx_pgm_kernels.hip.cpp @@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -81,6 +82,7 @@ class AmgxPgm : public ::testing::Test { using index_type = gko::int32; using Mtx = gko::matrix::Dense<>; using Csr = gko::matrix::Csr; + using Diag = gko::matrix::Diagonal; AmgxPgm() : rand_engine(30) {} void SetUp() @@ -136,7 +138,7 @@ class AmgxPgm : public ::testing::Test { d_coarse_vector = Mtx::create(hip); d_fine_vector = Mtx::create(hip); d_weight_csr = Csr::create(hip); - d_weight_diag = Mtx::create(hip); + d_weight_diag = Diag::create(hip); d_agg = agg; d_unfinished_agg = unfinished_agg; d_strongest_neighbor = strongest_neighbor; @@ -205,12 +207,12 @@ class AmgxPgm : public ::testing::Test { std::unique_ptr coarse_vector; std::unique_ptr fine_vector; - std::unique_ptr weight_diag; + std::unique_ptr weight_diag; std::unique_ptr weight_csr; std::unique_ptr d_coarse_vector; std::unique_ptr d_fine_vector; - std::unique_ptr d_weight_diag; + std::unique_ptr d_weight_diag; std::unique_ptr d_weight_csr; gko::size_type n; diff --git a/omp/multigrid/amgx_pgm_kernels.cpp b/omp/multigrid/amgx_pgm_kernels.cpp index 540a7bdddcf..9c4efefebe6 100644 --- a/omp/multigrid/amgx_pgm_kernels.cpp +++ b/omp/multigrid/amgx_pgm_kernels.cpp @@ -138,6 +138,7 @@ void find_strongest_neighbor( const auto row_ptrs = weight_mtx->get_const_row_ptrs(); const auto col_idxs = weight_mtx->get_const_col_idxs(); const auto vals = weight_mtx->get_const_values(); + const auto diag_vals = diag->get_const_values(); #pragma omp parallel for for (size_type row = 0; row < agg.get_num_elems(); row++) { auto max_weight_unagg = zero(); @@ -150,8 +151,8 @@ void find_strongest_neighbor( if (col == row) { continue; } - auto weight = vals[idx] / - max(abs(diag->at(row, 0)), abs(diag->at(col, 0))); + auto weight = + vals[idx] / max(abs(diag_vals[col]), abs(diag_vals[col])); if (agg.get_const_data()[col] == -1 && (weight > max_weight_unagg || (weight == max_weight_unagg && col > strongest_unagg))) { @@ -198,6 +199,7 @@ void assign_to_exist_agg(std::shared_ptr exec, auto agg_val = (intermediate_agg.get_num_elems() > 0) ? intermediate_agg.get_data() : agg.get_data(); + const auto diag_vals = diag->get_const_values(); #pragma omp parallel for for (IndexType row = 0; row < agg.get_num_elems(); row++) { if (agg_const_val[row] != -1) { @@ -211,7 +213,7 @@ void assign_to_exist_agg(std::shared_ptr exec, continue; } auto weight = - vals[idx] / max(abs(diag->at(row, 0)), abs(diag->at(col, 0))); + vals[idx] / max(abs(diag_vals[row]), abs(diag_vals[col])); if (agg_const_val[col] != -1 && (weight > max_weight_agg || (weight == max_weight_agg && col > strongest_agg))) { diff --git a/omp/test/multigrid/amgx_pgm_kernels.cpp b/omp/test/multigrid/amgx_pgm_kernels.cpp index cc822135a5d..09e1cf918a3 100644 --- a/omp/test/multigrid/amgx_pgm_kernels.cpp +++ b/omp/test/multigrid/amgx_pgm_kernels.cpp @@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -80,6 +81,7 @@ class AmgxPgm : public ::testing::Test { using index_type = gko::int32; using Mtx = gko::matrix::Dense<>; using Csr = gko::matrix::Csr; + using Diag = gko::matrix::Diagonal; AmgxPgm() : rand_engine(30) {} void SetUp() @@ -127,7 +129,7 @@ class AmgxPgm : public ::testing::Test { d_coarse_vector = Mtx::create(omp); d_fine_vector = Mtx::create(omp); d_weight_csr = Csr::create(omp); - d_weight_diag = Mtx::create(omp); + d_weight_diag = Diag::create(omp); d_agg = agg; d_unfinished_agg = unfinished_agg; d_strongest_neighbor = strongest_neighbor; @@ -196,12 +198,12 @@ class AmgxPgm : public ::testing::Test { std::unique_ptr coarse_vector; std::unique_ptr fine_vector; - std::unique_ptr weight_diag; + std::unique_ptr weight_diag; std::unique_ptr weight_csr; std::unique_ptr d_coarse_vector; std::unique_ptr d_fine_vector; - std::unique_ptr d_weight_diag; + std::unique_ptr d_weight_diag; std::unique_ptr d_weight_csr; gko::size_type n; From 9009d4b60087b6f6a8cfd416f5c989ac480fd0c8 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Wed, 27 Jan 2021 16:37:50 +0800 Subject: [PATCH 08/16] fix the type of the tests --- cuda/test/multigrid/amgx_pgm_kernels.cpp | 10 +++++----- hip/test/multigrid/amgx_pgm_kernels.hip.cpp | 10 +++++----- omp/test/multigrid/amgx_pgm_kernels.cpp | 10 +++++----- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index fa4e582085d..a9852bd3118 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -267,8 +267,8 @@ TEST_F(AmgxPgm, MatchEdgeIsEquivalentToRef) TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) { initialize_data(); - gko::size_type num_unagg; - gko::size_type d_num_unagg; + index_type num_unagg; + index_type d_num_unagg; gko::kernels::reference::amgx_pgm::count_unagg(ref, agg, &num_unagg); gko::kernels::cuda::amgx_pgm::count_unagg(cuda, d_agg, &d_num_unagg); @@ -282,8 +282,8 @@ TEST_F(AmgxPgm, RenumberIsEquivalentToRef) initialize_data(); auto x = unfinished_agg; auto d_x = d_unfinished_agg; - gko::size_type num_agg; - gko::size_type d_num_agg; + index_type num_agg; + index_type d_num_agg; gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); gko::kernels::cuda::amgx_pgm::renumber(cuda, d_agg, &d_num_agg); @@ -331,7 +331,7 @@ TEST_F(AmgxPgm, AssignToExistAggUnderteminsticIsEquivalentToRef) initialize_data(); auto d_x = d_unfinished_agg; auto d_intermediate_agg = gko::Array(cuda, 0); - gko::size_type d_num_unagg; + index_type d_num_unagg; gko::kernels::cuda::amgx_pgm::assign_to_exist_agg( cuda, d_weight_csr.get(), d_weight_diag.get(), d_x, d_intermediate_agg); diff --git a/hip/test/multigrid/amgx_pgm_kernels.hip.cpp b/hip/test/multigrid/amgx_pgm_kernels.hip.cpp index 9575e854207..0d059b631b2 100644 --- a/hip/test/multigrid/amgx_pgm_kernels.hip.cpp +++ b/hip/test/multigrid/amgx_pgm_kernels.hip.cpp @@ -267,8 +267,8 @@ TEST_F(AmgxPgm, MatchEdgeIsEquivalentToRef) TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) { initialize_data(); - gko::size_type num_unagg; - gko::size_type d_num_unagg; + index_type num_unagg; + index_type d_num_unagg; gko::kernels::reference::amgx_pgm::count_unagg(ref, agg, &num_unagg); gko::kernels::hip::amgx_pgm::count_unagg(hip, d_agg, &d_num_unagg); @@ -282,8 +282,8 @@ TEST_F(AmgxPgm, RenumberIsEquivalentToRef) initialize_data(); auto x = unfinished_agg; auto d_x = d_unfinished_agg; - gko::size_type num_agg; - gko::size_type d_num_agg; + index_type num_agg; + index_type d_num_agg; gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); gko::kernels::hip::amgx_pgm::renumber(hip, d_agg, &d_num_agg); @@ -331,7 +331,7 @@ TEST_F(AmgxPgm, AssignToExistAggUnderteminsticIsEquivalentToRef) initialize_data(); auto d_x = d_unfinished_agg; auto d_intermediate_agg = gko::Array(hip, 0); - gko::size_type d_num_unagg; + index_type d_num_unagg; gko::kernels::hip::amgx_pgm::assign_to_exist_agg( hip, d_weight_csr.get(), d_weight_diag.get(), d_x, d_intermediate_agg); diff --git a/omp/test/multigrid/amgx_pgm_kernels.cpp b/omp/test/multigrid/amgx_pgm_kernels.cpp index 09e1cf918a3..3b00ea89054 100644 --- a/omp/test/multigrid/amgx_pgm_kernels.cpp +++ b/omp/test/multigrid/amgx_pgm_kernels.cpp @@ -258,8 +258,8 @@ TEST_F(AmgxPgm, MatchEdgeIsEquivalentToRef) TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) { initialize_data(); - gko::size_type num_unagg; - gko::size_type d_num_unagg; + index_type num_unagg; + index_type d_num_unagg; gko::kernels::reference::amgx_pgm::count_unagg(ref, agg, &num_unagg); gko::kernels::omp::amgx_pgm::count_unagg(omp, d_agg, &d_num_unagg); @@ -273,8 +273,8 @@ TEST_F(AmgxPgm, RenumberIsEquivalentToRef) initialize_data(); auto x = unfinished_agg; auto d_x = d_unfinished_agg; - gko::size_type num_agg; - gko::size_type d_num_agg; + index_type num_agg; + index_type d_num_agg; gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); gko::kernels::omp::amgx_pgm::renumber(omp, d_agg, &d_num_agg); @@ -322,7 +322,7 @@ TEST_F(AmgxPgm, AssignToExistAggUnderteminsticIsEquivalentToRef) initialize_data(); auto d_x = d_unfinished_agg; auto d_intermediate_agg = gko::Array(omp, 0); - gko::size_type d_num_unagg; + index_type d_num_unagg; gko::kernels::omp::amgx_pgm::assign_to_exist_agg( omp, d_weight_csr.get(), d_weight_diag.get(), d_x, d_intermediate_agg); From ac1f66e8fea26b6bc200b169bc6ec798b8544022 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Wed, 27 Jan 2021 16:39:49 +0800 Subject: [PATCH 09/16] format --- cuda/test/multigrid/amgx_pgm_kernels.cpp | 2 +- hip/test/multigrid/amgx_pgm_kernels.hip.cpp | 2 +- omp/test/multigrid/amgx_pgm_kernels.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index a9852bd3118..2d855dc328b 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/test/multigrid/amgx_pgm_kernels.hip.cpp b/hip/test/multigrid/amgx_pgm_kernels.hip.cpp index 0d059b631b2..6015b2f5522 100644 --- a/hip/test/multigrid/amgx_pgm_kernels.hip.cpp +++ b/hip/test/multigrid/amgx_pgm_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/omp/test/multigrid/amgx_pgm_kernels.cpp b/omp/test/multigrid/amgx_pgm_kernels.cpp index 3b00ea89054..22fea84bcaa 100644 --- a/omp/test/multigrid/amgx_pgm_kernels.cpp +++ b/omp/test/multigrid/amgx_pgm_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without From 32bffa2f7e93688b4e857c3d35394a89c8d4f9a1 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Wed, 10 Mar 2021 23:53:40 +0800 Subject: [PATCH 10/16] delete the tests using unused function --- common/multigrid/amgx_pgm_kernels.hpp.inc | 38 +-------------------- cuda/test/multigrid/amgx_pgm_kernels.cpp | 32 ----------------- hip/test/multigrid/amgx_pgm_kernels.hip.cpp | 32 ----------------- omp/test/multigrid/amgx_pgm_kernels.cpp | 32 ----------------- 4 files changed, 1 insertion(+), 133 deletions(-) diff --git a/common/multigrid/amgx_pgm_kernels.hpp.inc b/common/multigrid/amgx_pgm_kernels.hpp.inc index d8bda57a2e0..8565aaa6227 100644 --- a/common/multigrid/amgx_pgm_kernels.hpp.inc +++ b/common/multigrid/amgx_pgm_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,42 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace kernel { -template -__global__ __launch_bounds__(default_block_size) void restrict_apply_kernel( - const IndexType *__restrict__ agg, const size_type num_rows, - const size_type num_rhs, const ValueType *__restrict__ b, - const size_type b_stride, ValueType *__restrict__ x, - const size_type x_stride) -{ - auto tidx = thread::get_thread_id_flat(); - auto row = tidx / num_rhs; - if (row >= num_rows) { - return; - } - auto col = tidx % num_rhs; - auto ind = agg[row]; - atomic_add(x + ind * x_stride + col, b[row * b_stride + col]); -} - - -template -__global__ __launch_bounds__(default_block_size) void prolong_applyadd_kernel( - const IndexType *__restrict__ agg, const size_type num_rows, - const size_type num_rhs, const ValueType *__restrict__ b, - const size_type b_stride, ValueType *__restrict__ x, - const size_type x_stride) -{ - auto tidx = thread::get_thread_id_flat(); - auto row = tidx / num_rhs; - if (row >= num_rows) { - return; - } - auto col = tidx % num_rhs; - auto ind = agg[row]; - x[row * x_stride + col] += b[ind * b_stride + col]; -} - - template __global__ __launch_bounds__(default_block_size) void replace_kernel( size_type size, const IndexType *__restrict__ source, diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index 2d855dc328b..6a3071e5ce0 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -219,38 +219,6 @@ class AmgxPgm : public ::testing::Test { }; -TEST_F(AmgxPgm, RestrictApplyIsEquivalentToRef) -{ - initialize_data(); - // fine->coarse - auto x = Mtx::create_with_config_of(gko::lend(coarse_vector)); - auto d_x = Mtx::create_with_config_of(gko::lend(d_coarse_vector)); - - gko::kernels::reference::amgx_pgm::restrict_apply( - ref, agg, fine_vector.get(), x.get()); - gko::kernels::cuda::amgx_pgm::restrict_apply( - cuda, d_agg, d_fine_vector.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - -TEST_F(AmgxPgm, ProlongApplyaddIsEquivalentToRef) -{ - initialize_data(); - // coarse->fine - auto x = fine_vector->clone(); - auto d_x = d_fine_vector->clone(); - - gko::kernels::reference::amgx_pgm::prolong_applyadd( - ref, agg, coarse_vector.get(), x.get()); - gko::kernels::cuda::amgx_pgm::prolong_applyadd( - cuda, d_agg, d_coarse_vector.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - TEST_F(AmgxPgm, MatchEdgeIsEquivalentToRef) { initialize_data(); diff --git a/hip/test/multigrid/amgx_pgm_kernels.hip.cpp b/hip/test/multigrid/amgx_pgm_kernels.hip.cpp index 6015b2f5522..ad1e8f040e7 100644 --- a/hip/test/multigrid/amgx_pgm_kernels.hip.cpp +++ b/hip/test/multigrid/amgx_pgm_kernels.hip.cpp @@ -219,38 +219,6 @@ class AmgxPgm : public ::testing::Test { }; -TEST_F(AmgxPgm, RestrictApplyIsEquivalentToRef) -{ - initialize_data(); - // fine->coarse - auto x = Mtx::create_with_config_of(gko::lend(coarse_vector)); - auto d_x = Mtx::create_with_config_of(gko::lend(d_coarse_vector)); - - gko::kernels::reference::amgx_pgm::restrict_apply( - ref, agg, fine_vector.get(), x.get()); - gko::kernels::hip::amgx_pgm::restrict_apply(hip, d_agg, d_fine_vector.get(), - d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - -TEST_F(AmgxPgm, ProlongApplyaddIsEquivalentToRef) -{ - initialize_data(); - // coarse->fine - auto x = fine_vector->clone(); - auto d_x = d_fine_vector->clone(); - - gko::kernels::reference::amgx_pgm::prolong_applyadd( - ref, agg, coarse_vector.get(), x.get()); - gko::kernels::hip::amgx_pgm::prolong_applyadd( - hip, d_agg, d_coarse_vector.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - TEST_F(AmgxPgm, MatchEdgeIsEquivalentToRef) { initialize_data(); diff --git a/omp/test/multigrid/amgx_pgm_kernels.cpp b/omp/test/multigrid/amgx_pgm_kernels.cpp index 22fea84bcaa..9a27ef3f4fc 100644 --- a/omp/test/multigrid/amgx_pgm_kernels.cpp +++ b/omp/test/multigrid/amgx_pgm_kernels.cpp @@ -210,38 +210,6 @@ class AmgxPgm : public ::testing::Test { }; -TEST_F(AmgxPgm, RestrictApplyIsEquivalentToRef) -{ - initialize_data(); - // fine->coarse - auto x = Mtx::create_with_config_of(gko::lend(coarse_vector)); - auto d_x = Mtx::create_with_config_of(gko::lend(d_coarse_vector)); - - gko::kernels::reference::amgx_pgm::restrict_apply( - ref, agg, fine_vector.get(), x.get()); - gko::kernels::omp::amgx_pgm::restrict_apply(omp, d_agg, d_fine_vector.get(), - d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - -TEST_F(AmgxPgm, ProlongApplyaddIsEquivalentToRef) -{ - initialize_data(); - // coarse->fine - auto x = fine_vector->clone(); - auto d_x = d_fine_vector->clone(); - - gko::kernels::reference::amgx_pgm::prolong_applyadd( - ref, agg, coarse_vector.get(), x.get()); - gko::kernels::omp::amgx_pgm::prolong_applyadd( - omp, d_agg, d_coarse_vector.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - TEST_F(AmgxPgm, MatchEdgeIsEquivalentToRef) { initialize_data(); From 29dfbaa2a6acb5cc70fc58220347f7acadece06a Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Mon, 15 Mar 2021 15:24:23 +0800 Subject: [PATCH 11/16] change deprecated header, fix wrong type, add test --- common/multigrid/amgx_pgm_kernels.hpp.inc | 45 ++++++++++++++++--- cuda/multigrid/amgx_pgm_kernels.cu | 19 +++++--- cuda/test/multigrid/amgx_pgm_kernels.cpp | 30 +++++++++++++ dpcpp/multigrid/amgx_pgm_kernels.dp.cpp | 10 ++--- hip/multigrid/amgx_pgm_kernels.hip.cpp | 24 ++++++---- hip/test/multigrid/CMakeLists.txt | 2 +- ...m_kernels.hip.cpp => amgx_pgm_kernels.cpp} | 30 +++++++++++++ reference/test/multigrid/amgx_pgm_kernels.cpp | 2 +- 8 files changed, 134 insertions(+), 28 deletions(-) rename hip/test/multigrid/{amgx_pgm_kernels.hip.cpp => amgx_pgm_kernels.cpp} (88%) diff --git a/common/multigrid/amgx_pgm_kernels.hpp.inc b/common/multigrid/amgx_pgm_kernels.hpp.inc index 8565aaa6227..eda2854763e 100644 --- a/common/multigrid/amgx_pgm_kernels.hpp.inc +++ b/common/multigrid/amgx_pgm_kernels.hpp.inc @@ -52,14 +52,14 @@ __global__ __launch_bounds__(default_block_size) void match_edge_kernel( size_type num, const IndexType *__restrict__ strongest_neighbor_vals, IndexType *__restrict__ agg_vals) { - auto tidx = thread::get_thread_id_flat(); + auto tidx = thread::get_thread_id_flat(); if (tidx >= num) { return; } if (agg_vals[tidx] != -1) { return; } - size_type neighbor = strongest_neighbor_vals[tidx]; + auto neighbor = strongest_neighbor_vals[tidx]; if (neighbor != -1 && strongest_neighbor_vals[neighbor] == tidx) { // Use the smaller index as agg point auto group = min(tidx, neighbor); @@ -175,10 +175,7 @@ __global__ IndexType *__restrict__ agg_val) { auto row = thread::get_thread_id_flat(); - if (row >= num) { - return; - } - if (agg_val[row] != -1) { + if (row >= num || agg_val[row] != -1) { return; } ValueType max_weight_agg = zero(); @@ -203,6 +200,42 @@ __global__ } } +// This is the undeterminstic implementation which is the same implementation of +// the previous one but agg_val == agg_const_val. +template +__global__ + __launch_bounds__(default_block_size) void assign_to_exist_agg_kernel( + const size_type num, const IndexType *__restrict__ row_ptrs, + const IndexType *__restrict__ col_idxs, + const ValueType *__restrict__ weight_vals, + const ValueType *__restrict__ diag, IndexType *__restrict__ agg_val) +{ + auto row = thread::get_thread_id_flat(); + if (row >= num || agg_val[row] != -1) { + return; + } + ValueType max_weight_agg = zero(); + IndexType strongest_agg = -1; + for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; idx++) { + auto col = col_idxs[idx]; + if (col == row) { + continue; + } + auto weight = weight_vals[idx] / max(abs(diag[row]), abs(diag[col])); + if (agg_val[col] != -1 && + (weight > max_weight_agg || + (weight == max_weight_agg && col > strongest_agg))) { + max_weight_agg = weight; + strongest_agg = col; + } + } + if (strongest_agg != -1) { + agg_val[row] = agg_val[strongest_agg]; + } else { + agg_val[row] = row; + } +} + template __global__ __launch_bounds__(default_block_size) void get_source_row_map_kernel( diff --git a/cuda/multigrid/amgx_pgm_kernels.cu b/cuda/multigrid/amgx_pgm_kernels.cu index b8700466365..1560d7bf7f7 100644 --- a/cuda/multigrid/amgx_pgm_kernels.cu +++ b/cuda/multigrid/amgx_pgm_kernels.cu @@ -149,18 +149,23 @@ void assign_to_exist_agg(std::shared_ptr exec, Array &agg, Array &intermediate_agg) { - auto agg_val = (intermediate_agg.get_num_elems() > 0) - ? intermediate_agg.get_data() - : agg.get_data(); const auto num = agg.get_num_elems(); const dim3 grid(ceildiv(num, default_block_size)); - kernel::assign_to_exist_agg_kernel<<>>( - num, weight_mtx->get_const_row_ptrs(), weight_mtx->get_const_col_idxs(), - weight_mtx->get_const_values(), diag->get_const_values(), - agg.get_const_data(), agg_val); if (intermediate_agg.get_num_elems() > 0) { + // determinstic kernel + kernel::assign_to_exist_agg_kernel<<>>( + num, weight_mtx->get_const_row_ptrs(), + weight_mtx->get_const_col_idxs(), weight_mtx->get_const_values(), + diag->get_const_values(), agg.get_const_data(), + intermediate_agg.get_data()); // Copy the intermediate_agg to agg agg = intermediate_agg; + } else { + // undeterminstic kernel + kernel::assign_to_exist_agg_kernel<<>>( + num, weight_mtx->get_const_row_ptrs(), + weight_mtx->get_const_col_idxs(), weight_mtx->get_const_values(), + diag->get_const_values(), agg.get_data()); } } diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index 6a3071e5ce0..10d2b97a8bd 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -131,6 +131,10 @@ class AmgxPgm : public ::testing::Test { weight_csr = Csr::create(ref); weight->convert_to(weight_csr.get()); weight_diag = weight_csr->extract_diagonal(); + auto system_dense = gen_mtx(m, m); + make_spd(system_dense.get()); + system_mtx = Csr::create(ref); + system_dense->convert_to(system_mtx.get()); d_agg.set_executor(cuda); d_unfinished_agg.set_executor(cuda); @@ -139,6 +143,7 @@ class AmgxPgm : public ::testing::Test { d_fine_vector = Mtx::create(cuda); d_weight_csr = Csr::create(cuda); d_weight_diag = Diag::create(cuda); + d_system_mtx = Csr::create(cuda); d_agg = agg; d_unfinished_agg = unfinished_agg; d_strongest_neighbor = strongest_neighbor; @@ -146,6 +151,7 @@ class AmgxPgm : public ::testing::Test { d_fine_vector->copy_from(fine_vector.get()); d_weight_csr->copy_from(weight_csr.get()); d_weight_diag->copy_from(weight_diag.get()); + d_system_mtx->copy_from(system_mtx.get()); } void make_symetric(Mtx *mtx) @@ -209,11 +215,13 @@ class AmgxPgm : public ::testing::Test { std::unique_ptr fine_vector; std::unique_ptr weight_diag; std::unique_ptr weight_csr; + std::shared_ptr system_mtx; std::unique_ptr d_coarse_vector; std::unique_ptr d_fine_vector; std::unique_ptr d_weight_diag; std::unique_ptr d_weight_csr; + std::shared_ptr d_system_mtx; gko::size_type n; }; @@ -329,4 +337,26 @@ TEST_F(AmgxPgm, GenerateMtxIsEquivalentToRef) } +TEST_F(AmgxPgm, GenerateMgLevelIsEquivalentToRef) +{ + initialize_data(); + auto mg_level_factory = gko::multigrid::AmgxPgm::build() + .with_deterministic(true) + .on(ref); + auto d_mg_level_factory = gko::multigrid::AmgxPgm::build() + .with_deterministic(true) + .on(cuda); + + auto mg_level = mg_level_factory->generate(system_mtx); + auto d_mg_level = d_mg_level_factory->generate(d_system_mtx); + + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_restrict_op()), + gko::as(mg_level->get_restrict_op()), 1e-14); + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_coarse_op()), + gko::as(mg_level->get_coarse_op()), 1e-14); + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_prolong_op()), + gko::as(mg_level->get_prolong_op()), 1e-14); +} + + } // namespace diff --git a/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp b/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp index 933fc6fda7d..32817e5e834 100644 --- a/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp +++ b/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp @@ -101,11 +101,11 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( template -void amgx_pgm_generate(std::shared_ptr exec, - const matrix::Csr *source, - const Array &agg, - matrix::Csr *coarse) - GKO_NOT_IMPLEMENTED; +void amgx_pgm_generate( + std::shared_ptr exec, + const matrix::Csr *source, + const Array &agg, matrix::Csr *coarse, + matrix::Csr *temp) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_GENERATE); diff --git a/hip/multigrid/amgx_pgm_kernels.hip.cpp b/hip/multigrid/amgx_pgm_kernels.hip.cpp index 41fc38854a8..2199f53e5c3 100644 --- a/hip/multigrid/amgx_pgm_kernels.hip.cpp +++ b/hip/multigrid/amgx_pgm_kernels.hip.cpp @@ -156,19 +156,27 @@ void assign_to_exist_agg(std::shared_ptr exec, Array &agg, Array &intermediate_agg) { - auto agg_val = (intermediate_agg.get_num_elems() > 0) - ? intermediate_agg.get_data() - : agg.get_data(); const auto num = agg.get_num_elems(); const dim3 grid(ceildiv(num, default_block_size)); - hipLaunchKernelGGL( - kernel::assign_to_exist_agg_kernel, dim3(grid), - dim3(default_block_size), 0, 0, num, weight_mtx->get_const_row_ptrs(), - weight_mtx->get_const_col_idxs(), weight_mtx->get_const_values(), - diag->get_const_values(), agg.get_const_data(), agg_val); + if (intermediate_agg.get_num_elems() > 0) { + // determinstic kernel + hipLaunchKernelGGL( + kernel::assign_to_exist_agg_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, + weight_mtx->get_const_row_ptrs(), weight_mtx->get_const_col_idxs(), + weight_mtx->get_const_values(), diag->get_const_values(), + agg.get_const_data(), intermediate_agg.get_data()); // Copy the intermediate_agg to agg agg = intermediate_agg; + } else { + // undeterminstic kernel + hipLaunchKernelGGL(kernel::assign_to_exist_agg_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, + weight_mtx->get_const_row_ptrs(), + weight_mtx->get_const_col_idxs(), + weight_mtx->get_const_values(), + diag->get_const_values(), agg.get_data()); } } diff --git a/hip/test/multigrid/CMakeLists.txt b/hip/test/multigrid/CMakeLists.txt index 1c8534e5337..481c2cc1bf2 100644 --- a/hip/test/multigrid/CMakeLists.txt +++ b/hip/test/multigrid/CMakeLists.txt @@ -1 +1 @@ -ginkgo_create_hip_test(amgx_pgm_kernels) +ginkgo_create_hip_test_special_linkage(amgx_pgm_kernels) diff --git a/hip/test/multigrid/amgx_pgm_kernels.hip.cpp b/hip/test/multigrid/amgx_pgm_kernels.cpp similarity index 88% rename from hip/test/multigrid/amgx_pgm_kernels.hip.cpp rename to hip/test/multigrid/amgx_pgm_kernels.cpp index ad1e8f040e7..71acf6ed06a 100644 --- a/hip/test/multigrid/amgx_pgm_kernels.hip.cpp +++ b/hip/test/multigrid/amgx_pgm_kernels.cpp @@ -131,6 +131,10 @@ class AmgxPgm : public ::testing::Test { weight_csr = Csr::create(ref); weight->convert_to(weight_csr.get()); weight_diag = weight_csr->extract_diagonal(); + auto system_dense = gen_mtx(m, m); + make_spd(system_dense.get()); + system_mtx = Csr::create(ref); + system_dense->convert_to(system_mtx.get()); d_agg.set_executor(hip); d_unfinished_agg.set_executor(hip); @@ -139,6 +143,7 @@ class AmgxPgm : public ::testing::Test { d_fine_vector = Mtx::create(hip); d_weight_csr = Csr::create(hip); d_weight_diag = Diag::create(hip); + d_system_mtx = Csr::create(hip); d_agg = agg; d_unfinished_agg = unfinished_agg; d_strongest_neighbor = strongest_neighbor; @@ -146,6 +151,7 @@ class AmgxPgm : public ::testing::Test { d_fine_vector->copy_from(fine_vector.get()); d_weight_csr->copy_from(weight_csr.get()); d_weight_diag->copy_from(weight_diag.get()); + d_system_mtx->copy_from(system_mtx.get()); } void make_symetric(Mtx *mtx) @@ -209,11 +215,13 @@ class AmgxPgm : public ::testing::Test { std::unique_ptr fine_vector; std::unique_ptr weight_diag; std::unique_ptr weight_csr; + std::shared_ptr system_mtx; std::unique_ptr d_coarse_vector; std::unique_ptr d_fine_vector; std::unique_ptr d_weight_diag; std::unique_ptr d_weight_csr; + std::shared_ptr d_system_mtx; gko::size_type n; }; @@ -329,4 +337,26 @@ TEST_F(AmgxPgm, GenerateMtxIsEquivalentToRef) } +TEST_F(AmgxPgm, GenerateMgLevelIsEquivalentToRef) +{ + initialize_data(); + auto mg_level_factory = gko::multigrid::AmgxPgm::build() + .with_deterministic(true) + .on(ref); + auto d_mg_level_factory = gko::multigrid::AmgxPgm::build() + .with_deterministic(true) + .on(hip); + + auto mg_level = mg_level_factory->generate(system_mtx); + auto d_mg_level = d_mg_level_factory->generate(d_system_mtx); + + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_restrict_op()), + gko::as(mg_level->get_restrict_op()), 1e-14); + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_coarse_op()), + gko::as(mg_level->get_coarse_op()), 1e-14); + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_prolong_op()), + gko::as(mg_level->get_prolong_op()), 1e-14); +} + + } // namespace diff --git a/reference/test/multigrid/amgx_pgm_kernels.cpp b/reference/test/multigrid/amgx_pgm_kernels.cpp index 847e859529e..09918310823 100644 --- a/reference/test/multigrid/amgx_pgm_kernels.cpp +++ b/reference/test/multigrid/amgx_pgm_kernels.cpp @@ -47,7 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include +#include #include From 51f003828cf21ee504d3b86419aba91f6177bd5c Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Fri, 26 Mar 2021 03:05:41 +0800 Subject: [PATCH 12/16] review update, fix omp issue, collect dup. helpers --- common/multigrid/amgx_pgm_kernels.hpp.inc | 22 +----- core/test/utils.hpp | 3 + core/test/utils/array_generator.hpp | 69 +++++++++++++++++ core/test/utils/matrix_generator.hpp | 23 +----- core/test/utils/matrix_utils.hpp | 84 ++++++++++++++++++++ core/test/utils/value_generator.hpp | 69 +++++++++++++++++ cuda/test/multigrid/amgx_pgm_kernels.cpp | 67 ++-------------- cuda/test/solver/bicg_kernels.cpp | 29 +------ cuda/test/solver/bicgstab_kernels.cpp | 14 +--- cuda/test/solver/cg_kernels.cpp | 29 +------ cuda/test/solver/cgs_kernels.cpp | 14 +--- cuda/test/solver/fcg_kernels.cpp | 29 +------ cuda/test/solver/idr_kernels.cpp | 12 --- hip/test/multigrid/CMakeLists.txt | 2 +- hip/test/multigrid/amgx_pgm_kernels.cpp | 66 ++-------------- hip/test/solver/bicg_kernels.cpp | 29 +------ hip/test/solver/bicgstab_kernels.cpp | 14 +--- hip/test/solver/cg_kernels.cpp | 29 +------ hip/test/solver/cgs_kernels.cpp | 14 +--- hip/test/solver/fcg_kernels.cpp | 29 +------ hip/test/solver/idr_kernels.cpp | 12 --- omp/multigrid/amgx_pgm_kernels.cpp | 13 ++-- omp/test/multigrid/amgx_pgm_kernels.cpp | 94 +++++++++-------------- omp/test/solver/bicg_kernels.cpp | 29 +------ omp/test/solver/bicgstab_kernels.cpp | 14 +--- omp/test/solver/cg_kernels.cpp | 29 +------ omp/test/solver/cgs_kernels.cpp | 14 +--- omp/test/solver/fcg_kernels.cpp | 29 +------ omp/test/solver/gmres_kernels.cpp | 12 --- omp/test/solver/idr_kernels.cpp | 12 --- reference/multigrid/amgx_pgm_kernels.cpp | 6 +- 31 files changed, 306 insertions(+), 605 deletions(-) create mode 100644 core/test/utils/array_generator.hpp create mode 100644 core/test/utils/matrix_utils.hpp create mode 100644 core/test/utils/value_generator.hpp diff --git a/common/multigrid/amgx_pgm_kernels.hpp.inc b/common/multigrid/amgx_pgm_kernels.hpp.inc index eda2854763e..9c2d434f508 100644 --- a/common/multigrid/amgx_pgm_kernels.hpp.inc +++ b/common/multigrid/amgx_pgm_kernels.hpp.inc @@ -33,20 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace kernel { -template -__global__ __launch_bounds__(default_block_size) void replace_kernel( - size_type size, const IndexType *__restrict__ source, - IndexType *__restrict__ result) -{ - auto tidx = thread::get_thread_id_flat(); - if (tidx >= size) { - return; - } - - result[tidx] = source[tidx] == -1; -} - - template __global__ __launch_bounds__(default_block_size) void match_edge_kernel( size_type num, const IndexType *__restrict__ strongest_neighbor_vals, @@ -60,11 +46,11 @@ __global__ __launch_bounds__(default_block_size) void match_edge_kernel( return; } auto neighbor = strongest_neighbor_vals[tidx]; - if (neighbor != -1 && strongest_neighbor_vals[neighbor] == tidx) { + if (neighbor != -1 && strongest_neighbor_vals[neighbor] == tidx && + tidx < neighbor) { // Use the smaller index as agg point - auto group = min(tidx, neighbor); - agg_vals[tidx] = group; - agg_vals[neighbor] = group; + agg_vals[tidx] = tidx; + agg_vals[neighbor] = tidx; } } diff --git a/core/test/utils.hpp b/core/test/utils.hpp index 6cc2ad102c5..05bf216c8bb 100644 --- a/core/test/utils.hpp +++ b/core/test/utils.hpp @@ -48,8 +48,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/extended_float.hpp" +#include "core/test/utils/array_generator.hpp" #include "core/test/utils/assertions.hpp" #include "core/test/utils/matrix_generator.hpp" +#include "core/test/utils/matrix_utils.hpp" +#include "core/test/utils/value_generator.hpp" namespace gko { diff --git a/core/test/utils/array_generator.hpp b/core/test/utils/array_generator.hpp new file mode 100644 index 00000000000..1ad52c18e26 --- /dev/null +++ b/core/test/utils/array_generator.hpp @@ -0,0 +1,69 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_TEST_UTILS_ARRAY_GENERATOR_HPP_ +#define GKO_CORE_TEST_UTILS_ARRAY_GENERATOR_HPP_ + + +#include +#include +#include + + +#include "core/test/utils/value_generator.hpp" + + +namespace gko { +namespace test { + + +template +Array generate_random_array(size_type num, + ValueDistribution &&value_dist, + Engine &&engine, + std::shared_ptr exec) +{ + Array array(exec->get_master(), num); + auto val = array.get_data(); + for (int i = 0; i < num; i++) { + val[i] = detail::get_rand_value(value_dist, engine); + } + array.set_executor(exec); + return array; +} + + +} // namespace test +} // namespace gko + + +#endif // GKO_CORE_TEST_UTILS_ARRAY_GENERATOR_HPP_ diff --git a/core/test/utils/matrix_generator.hpp b/core/test/utils/matrix_generator.hpp index c3f9b2fb50d..04f98126c41 100644 --- a/core/test/utils/matrix_generator.hpp +++ b/core/test/utils/matrix_generator.hpp @@ -46,28 +46,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -namespace gko { -namespace test { -namespace detail { - - -template -typename std::enable_if::value, ValueType>::type -get_rand_value(Distribution &&dist, Generator &&gen) -{ - return dist(gen); -} +#include "core/test/utils/value_generator.hpp" -template -typename std::enable_if::value, ValueType>::type -get_rand_value(Distribution &&dist, Generator &&gen) -{ - return ValueType(dist(gen), dist(gen)); -} - - -} // namespace detail +namespace gko { +namespace test { /** diff --git a/core/test/utils/matrix_utils.hpp b/core/test/utils/matrix_utils.hpp new file mode 100644 index 00000000000..c8b269d493f --- /dev/null +++ b/core/test/utils/matrix_utils.hpp @@ -0,0 +1,84 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_TEST_UTILS_MATRIX_UTILS_HPP_ +#define GKO_CORE_TEST_UTILS_MATRIX_UTILS_HPP_ + + +#include + +#include "core/test/utils/value_generator.hpp" + + +namespace gko { +namespace test { + + +template +void make_symmetric(matrix::Dense *mtx) +{ + assert(mtx->get_executor() == mtx->get_executor()->get_master()); + for (size_type i = 0; i < mtx->get_size()[0]; ++i) { + for (size_type j = i + 1; j < mtx->get_size()[1]; ++j) { + mtx->at(i, j) = mtx->at(j, i); + } + } +} + + +template +void make_diag_dominant(matrix::Dense *mtx) +{ + assert(mtx->get_executor() == mtx->get_executor()->get_master()); + using std::abs; + for (int i = 0; i < mtx->get_size()[0]; ++i) { + auto sum = gko::zero(); + for (int j = 0; j < mtx->get_size()[1]; ++j) { + sum += abs(mtx->at(i, j)); + } + mtx->at(i, i) = sum; + } +} + + +template +void make_spd(matrix::Dense *mtx) +{ + make_symmetric(mtx); + make_diag_dominant(mtx); +} + + +} // namespace test +} // namespace gko + +#endif // GKO_CORE_TEST_UTILS_MATRIX_UTILS_HPP_ diff --git a/core/test/utils/value_generator.hpp b/core/test/utils/value_generator.hpp new file mode 100644 index 00000000000..8b82ea63d0a --- /dev/null +++ b/core/test/utils/value_generator.hpp @@ -0,0 +1,69 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_TEST_UTILS_VALUE_GENERATOR_HPP_ +#define GKO_CORE_TEST_UTILS_VALUE_GENERATOR_HPP_ + + +#include +#include + + +#include + + +namespace gko { +namespace test { +namespace detail { + + +template +typename std::enable_if::value, ValueType>::type +get_rand_value(Distribution &&dist, Generator &&gen) +{ + return dist(gen); +} + + +template +typename std::enable_if::value, ValueType>::type +get_rand_value(Distribution &&dist, Generator &&gen) +{ + return ValueType(dist(gen), dist(gen)); +} + + +} // namespace detail +} // namespace test +} // namespace gko + +#endif // GKO_CORE_TEST_UTILS_VALUE_GENERATOR_HPP_ diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index 10d2b97a8bd..2814f9177c0 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -58,24 +58,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -template -Array generate_random_array(gko::size_type num, ValueDistribution &&value_dist, - Engine &&engine, - std::shared_ptr exec) -{ - using value_type = typename Array::value_type; - Array array_host(exec->get_master(), num); - auto val = array_host.get_data(); - for (int i = 0; i < num; i++) { - val[i] = - gko::test::detail::get_rand_value(value_dist, engine); - } - Array array(exec); - array = array_host; - return array; -} - - class AmgxPgm : public ::testing::Test { protected: using value_type = gko::default_precision; @@ -83,6 +65,7 @@ class AmgxPgm : public ::testing::Test { using Mtx = gko::matrix::Dense<>; using Csr = gko::matrix::Csr; using Diag = gko::matrix::Diagonal; + AmgxPgm() : rand_engine(30) {} void SetUp() @@ -110,7 +93,7 @@ class AmgxPgm : public ::testing::Test { gko::Array gen_array(gko::size_type num, index_type min_val, index_type max_val) { - return generate_random_array>( + return gko::test::generate_random_array( num, std::uniform_int_distribution<>(min_val, max_val), rand_engine, ref); } @@ -132,7 +115,7 @@ class AmgxPgm : public ::testing::Test { weight->convert_to(weight_csr.get()); weight_diag = weight_csr->extract_diagonal(); auto system_dense = gen_mtx(m, m); - make_spd(system_dense.get()); + gko::test::make_spd(system_dense.get()); system_mtx = Csr::create(ref); system_dense->convert_to(system_mtx.get()); @@ -154,48 +137,12 @@ class AmgxPgm : public ::testing::Test { d_system_mtx->copy_from(system_mtx.get()); } - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - // only for real value - void make_absoulte(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = 0; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = abs(mtx->at(i, j)); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - void make_weight(Mtx *mtx) { - make_symetric(mtx); - make_absoulte(mtx); - make_diag_dominant(mtx); + gko::test::make_symmetric(mtx); + // only works for realvalue cases + mtx->compute_absolute_inplace(); + gko::test::make_diag_dominant(mtx); } std::shared_ptr ref; diff --git a/cuda/test/solver/bicg_kernels.cpp b/cuda/test/solver/bicg_kernels.cpp index a825ae5aa9b..d82cc0a28a0 100644 --- a/cuda/test/solver/bicg_kernels.cpp +++ b/cuda/test/solver/bicg_kernels.cpp @@ -152,33 +152,6 @@ class Bicg : public ::testing::Test { *d_stop_status = *stop_status; } - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - std::shared_ptr ref; std::shared_ptr cuda; @@ -285,7 +258,7 @@ TEST_F(Bicg, CudaBicgStep2IsEquivalentToRef) TEST_F(Bicg, ApplyWithSpdMatrixIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); + gko::test::make_spd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(cuda); diff --git a/cuda/test/solver/bicgstab_kernels.cpp b/cuda/test/solver/bicgstab_kernels.cpp index 7808226870c..b1e0ca85d1b 100644 --- a/cuda/test/solver/bicgstab_kernels.cpp +++ b/cuda/test/solver/bicgstab_kernels.cpp @@ -69,7 +69,7 @@ class Bicgstab : public ::testing::Test { cuda = gko::CudaExecutor::create(0, ref); mtx = gen_mtx(123, 123); - make_diag_dominant(mtx.get()); + gko::test::make_diag_dominant(mtx.get()); d_mtx = Mtx::create(cuda); d_mtx->copy_from(mtx.get()); @@ -171,18 +171,6 @@ class Bicgstab : public ::testing::Test { *stop_status; // copy_from is not a public member function of Array } - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - std::shared_ptr ref; std::shared_ptr cuda; diff --git a/cuda/test/solver/cg_kernels.cpp b/cuda/test/solver/cg_kernels.cpp index d8a98a83805..8333fc7d0a8 100644 --- a/cuda/test/solver/cg_kernels.cpp +++ b/cuda/test/solver/cg_kernels.cpp @@ -123,33 +123,6 @@ class Cg : public ::testing::Test { *d_stop_status = *stop_status; } - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - std::shared_ptr ref; std::shared_ptr cuda; @@ -234,7 +207,7 @@ TEST_F(Cg, CudaCgStep2IsEquivalentToRef) TEST_F(Cg, ApplyIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); + gko::test::make_spd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(cuda); diff --git a/cuda/test/solver/cgs_kernels.cpp b/cuda/test/solver/cgs_kernels.cpp index 543f580cf53..f8cb89d55bb 100644 --- a/cuda/test/solver/cgs_kernels.cpp +++ b/cuda/test/solver/cgs_kernels.cpp @@ -68,7 +68,7 @@ class Cgs : public ::testing::Test { cuda = gko::CudaExecutor::create(0, ref); mtx = gen_mtx(123, 123); - make_diag_dominant(mtx.get()); + gko::test::make_diag_dominant(mtx.get()); d_mtx = Mtx::create(cuda); d_mtx->copy_from(mtx.get()); cuda_cgs_factory = @@ -166,18 +166,6 @@ class Cgs : public ::testing::Test { *d_stop_status = *stop_status; } - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - std::shared_ptr ref; std::shared_ptr cuda; diff --git a/cuda/test/solver/fcg_kernels.cpp b/cuda/test/solver/fcg_kernels.cpp index 85b0b3b7017..b2832945666 100644 --- a/cuda/test/solver/fcg_kernels.cpp +++ b/cuda/test/solver/fcg_kernels.cpp @@ -131,33 +131,6 @@ class Fcg : public ::testing::Test { *d_stop_status = *stop_status; } - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - std::shared_ptr ref; std::shared_ptr cuda; @@ -247,7 +220,7 @@ TEST_F(Fcg, CudaFcgStep2IsEquivalentToRef) TEST_F(Fcg, ApplyIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); + gko::test::make_spd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(cuda); diff --git a/cuda/test/solver/idr_kernels.cpp b/cuda/test/solver/idr_kernels.cpp index 04ea559b6cc..9aa925187a4 100644 --- a/cuda/test/solver/idr_kernels.cpp +++ b/cuda/test/solver/idr_kernels.cpp @@ -160,18 +160,6 @@ class Idr : public ::testing::Test { *stop_status; // copy_from is not a public member function of Array } - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - std::shared_ptr ref; std::shared_ptr cuda; diff --git a/hip/test/multigrid/CMakeLists.txt b/hip/test/multigrid/CMakeLists.txt index 481c2cc1bf2..8fe8bbeba48 100644 --- a/hip/test/multigrid/CMakeLists.txt +++ b/hip/test/multigrid/CMakeLists.txt @@ -1 +1 @@ -ginkgo_create_hip_test_special_linkage(amgx_pgm_kernels) +ginkgo_create_test(amgx_pgm_kernels) diff --git a/hip/test/multigrid/amgx_pgm_kernels.cpp b/hip/test/multigrid/amgx_pgm_kernels.cpp index 71acf6ed06a..39546df6cff 100644 --- a/hip/test/multigrid/amgx_pgm_kernels.cpp +++ b/hip/test/multigrid/amgx_pgm_kernels.cpp @@ -58,24 +58,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -template -Array generate_random_array(gko::size_type num, ValueDistribution &&value_dist, - Engine &&engine, - std::shared_ptr exec) -{ - using value_type = typename Array::value_type; - Array array_host(exec->get_master(), num); - auto val = array_host.get_data(); - for (int i = 0; i < num; i++) { - val[i] = - gko::test::detail::get_rand_value(value_dist, engine); - } - Array array(exec); - array = array_host; - return array; -} - - class AmgxPgm : public ::testing::Test { protected: using value_type = gko::default_precision; @@ -110,7 +92,7 @@ class AmgxPgm : public ::testing::Test { gko::Array gen_array(gko::size_type num, index_type min_val, index_type max_val) { - return generate_random_array>( + return gko::test::generate_random_array( num, std::uniform_int_distribution<>(min_val, max_val), rand_engine, ref); } @@ -132,7 +114,7 @@ class AmgxPgm : public ::testing::Test { weight->convert_to(weight_csr.get()); weight_diag = weight_csr->extract_diagonal(); auto system_dense = gen_mtx(m, m); - make_spd(system_dense.get()); + gko::test::make_spd(system_dense.get()); system_mtx = Csr::create(ref); system_dense->convert_to(system_mtx.get()); @@ -154,48 +136,12 @@ class AmgxPgm : public ::testing::Test { d_system_mtx->copy_from(system_mtx.get()); } - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - // only for real value - void make_absoulte(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = 0; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = abs(mtx->at(i, j)); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - void make_weight(Mtx *mtx) { - make_symetric(mtx); - make_absoulte(mtx); - make_diag_dominant(mtx); + gko::test::make_symmetric(mtx); + // only works for realvalue cases + mtx->compute_absolute_inplace(); + gko::test::make_diag_dominant(mtx); } std::shared_ptr ref; diff --git a/hip/test/solver/bicg_kernels.cpp b/hip/test/solver/bicg_kernels.cpp index 52073945050..c29bd74374c 100644 --- a/hip/test/solver/bicg_kernels.cpp +++ b/hip/test/solver/bicg_kernels.cpp @@ -152,33 +152,6 @@ class Bicg : public ::testing::Test { *d_stop_status = *stop_status; } - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - std::shared_ptr ref; std::shared_ptr hip; @@ -285,7 +258,7 @@ TEST_F(Bicg, HipBicgStep2IsEquivalentToRef) TEST_F(Bicg, ApplyWithSpdMatrixIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); + gko::test::make_spd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(hip); diff --git a/hip/test/solver/bicgstab_kernels.cpp b/hip/test/solver/bicgstab_kernels.cpp index bcf4191a426..5a995656306 100644 --- a/hip/test/solver/bicgstab_kernels.cpp +++ b/hip/test/solver/bicgstab_kernels.cpp @@ -69,7 +69,7 @@ class Bicgstab : public ::testing::Test { hip = gko::HipExecutor::create(0, ref); mtx = gen_mtx(123, 123); - make_diag_dominant(mtx.get()); + gko::test::make_diag_dominant(mtx.get()); d_mtx = Mtx::create(hip); d_mtx->copy_from(mtx.get()); @@ -171,18 +171,6 @@ class Bicgstab : public ::testing::Test { *stop_status; // copy_from is not a public member function of Array } - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - std::shared_ptr ref; std::shared_ptr hip; diff --git a/hip/test/solver/cg_kernels.cpp b/hip/test/solver/cg_kernels.cpp index a46d698cc22..cd4000171c0 100644 --- a/hip/test/solver/cg_kernels.cpp +++ b/hip/test/solver/cg_kernels.cpp @@ -123,33 +123,6 @@ class Cg : public ::testing::Test { *d_stop_status = *stop_status; } - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - std::shared_ptr ref; std::shared_ptr hip; @@ -234,7 +207,7 @@ TEST_F(Cg, HipCgStep2IsEquivalentToRef) TEST_F(Cg, ApplyIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); + gko::test::make_spd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(hip); diff --git a/hip/test/solver/cgs_kernels.cpp b/hip/test/solver/cgs_kernels.cpp index 861f9498780..2277d4417ba 100644 --- a/hip/test/solver/cgs_kernels.cpp +++ b/hip/test/solver/cgs_kernels.cpp @@ -68,7 +68,7 @@ class Cgs : public ::testing::Test { hip = gko::HipExecutor::create(0, ref); mtx = gen_mtx(123, 123); - make_diag_dominant(mtx.get()); + gko::test::make_diag_dominant(mtx.get()); d_mtx = Mtx::create(hip); d_mtx->copy_from(mtx.get()); hip_cgs_factory = @@ -166,18 +166,6 @@ class Cgs : public ::testing::Test { *d_stop_status = *stop_status; } - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - std::shared_ptr ref; std::shared_ptr hip; diff --git a/hip/test/solver/fcg_kernels.cpp b/hip/test/solver/fcg_kernels.cpp index 663c811227c..ca86775ee94 100644 --- a/hip/test/solver/fcg_kernels.cpp +++ b/hip/test/solver/fcg_kernels.cpp @@ -131,33 +131,6 @@ class Fcg : public ::testing::Test { *d_stop_status = *stop_status; } - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - std::shared_ptr ref; std::shared_ptr hip; @@ -247,7 +220,7 @@ TEST_F(Fcg, HipFcgStep2IsEquivalentToRef) TEST_F(Fcg, ApplyIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); + gko::test::make_spd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(hip); diff --git a/hip/test/solver/idr_kernels.cpp b/hip/test/solver/idr_kernels.cpp index c59ed90c9a7..de7f5a14125 100644 --- a/hip/test/solver/idr_kernels.cpp +++ b/hip/test/solver/idr_kernels.cpp @@ -160,18 +160,6 @@ class Idr : public ::testing::Test { *stop_status; // copy_from is not a public member function of Array } - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - std::shared_ptr ref; std::shared_ptr hip; diff --git a/omp/multigrid/amgx_pgm_kernels.cpp b/omp/multigrid/amgx_pgm_kernels.cpp index 9c4efefebe6..3ce1b087fd8 100644 --- a/omp/multigrid/amgx_pgm_kernels.cpp +++ b/omp/multigrid/amgx_pgm_kernels.cpp @@ -75,12 +75,12 @@ void match_edge(std::shared_ptr exec, #pragma omp parallel for for (size_type i = 0; i < agg.get_num_elems(); i++) { if (agg_vals[i] == -1) { - size_type neighbor = strongest_neighbor_vals[i]; - if (neighbor != -1 && strongest_neighbor_vals[neighbor] == i) { + auto neighbor = strongest_neighbor_vals[i]; + if (neighbor != -1 && strongest_neighbor_vals[neighbor] == i && + i < neighbor) { // Use the smaller index as agg point - auto group = min(i, neighbor); - agg_vals[i] = group; - agg_vals[neighbor] = group; + agg_vals[i] = i; + agg_vals[neighbor] = i; } } } @@ -152,7 +152,7 @@ void find_strongest_neighbor( continue; } auto weight = - vals[idx] / max(abs(diag_vals[col]), abs(diag_vals[col])); + vals[idx] / max(abs(diag_vals[row]), abs(diag_vals[col])); if (agg.get_const_data()[col] == -1 && (weight > max_weight_unagg || (weight == max_weight_unagg && col > strongest_unagg))) { @@ -200,7 +200,6 @@ void assign_to_exist_agg(std::shared_ptr exec, ? intermediate_agg.get_data() : agg.get_data(); const auto diag_vals = diag->get_const_values(); -#pragma omp parallel for for (IndexType row = 0; row < agg.get_num_elems(); row++) { if (agg_const_val[row] != -1) { continue; diff --git a/omp/test/multigrid/amgx_pgm_kernels.cpp b/omp/test/multigrid/amgx_pgm_kernels.cpp index 9a27ef3f4fc..e0b063de0bd 100644 --- a/omp/test/multigrid/amgx_pgm_kernels.cpp +++ b/omp/test/multigrid/amgx_pgm_kernels.cpp @@ -57,24 +57,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -template -Array generate_random_array(gko::size_type num, ValueDistribution &&value_dist, - Engine &&engine, - std::shared_ptr exec) -{ - using value_type = typename Array::value_type; - Array array_host(exec->get_master(), num); - auto val = array_host.get_data(); - for (int i = 0; i < num; i++) { - val[i] = - gko::test::detail::get_rand_value(value_dist, engine); - } - Array array(exec); - array = array_host; - return array; -} - - class AmgxPgm : public ::testing::Test { protected: using value_type = gko::default_precision; @@ -101,7 +83,7 @@ class AmgxPgm : public ::testing::Test { gko::Array gen_array(gko::size_type num, index_type min_val, index_type max_val) { - return generate_random_array>( + return gko::test::generate_random_array( num, std::uniform_int_distribution<>(min_val, max_val), rand_engine, ref); } @@ -122,6 +104,10 @@ class AmgxPgm : public ::testing::Test { weight_csr = Csr::create(ref); weight->convert_to(weight_csr.get()); weight_diag = weight_csr->extract_diagonal(); + auto system_dense = gen_mtx(m, m); + gko::test::make_spd(system_dense.get()); + system_mtx = Csr::create(ref); + system_dense->convert_to(system_mtx.get()); d_agg.set_executor(omp); d_unfinished_agg.set_executor(omp); @@ -130,6 +116,7 @@ class AmgxPgm : public ::testing::Test { d_fine_vector = Mtx::create(omp); d_weight_csr = Csr::create(omp); d_weight_diag = Diag::create(omp); + d_system_mtx = Csr::create(omp); d_agg = agg; d_unfinished_agg = unfinished_agg; d_strongest_neighbor = strongest_neighbor; @@ -137,50 +124,15 @@ class AmgxPgm : public ::testing::Test { d_fine_vector->copy_from(fine_vector.get()); d_weight_csr->copy_from(weight_csr.get()); d_weight_diag->copy_from(weight_diag.get()); - } - - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - // only for real value - void make_absoulte(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = 0; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = abs(mtx->at(i, j)); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); + d_system_mtx->copy_from(system_mtx.get()); } void make_weight(Mtx *mtx) { - make_symetric(mtx); - make_absoulte(mtx); - make_diag_dominant(mtx); + gko::test::make_symmetric(mtx); + // it is only works for realvalue case. + mtx->compute_absolute_inplace(); + gko::test::make_diag_dominant(mtx); } std::shared_ptr ref; @@ -200,11 +152,13 @@ class AmgxPgm : public ::testing::Test { std::unique_ptr fine_vector; std::unique_ptr weight_diag; std::unique_ptr weight_csr; + std::shared_ptr system_mtx; std::unique_ptr d_coarse_vector; std::unique_ptr d_fine_vector; std::unique_ptr d_weight_diag; std::unique_ptr d_weight_csr; + std::shared_ptr d_system_mtx; gko::size_type n; }; @@ -320,4 +274,26 @@ TEST_F(AmgxPgm, GenerateMtxIsEquivalentToRef) } +TEST_F(AmgxPgm, GenerateMgLevelIsEquivalentToRef) +{ + initialize_data(); + auto mg_level_factory = gko::multigrid::AmgxPgm::build() + .with_deterministic(true) + .on(ref); + auto d_mg_level_factory = gko::multigrid::AmgxPgm::build() + .with_deterministic(true) + .on(omp); + + auto mg_level = mg_level_factory->generate(system_mtx); + auto d_mg_level = d_mg_level_factory->generate(d_system_mtx); + + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_restrict_op()), + gko::as(mg_level->get_restrict_op()), 1e-14); + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_coarse_op()), + gko::as(mg_level->get_coarse_op()), 1e-14); + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_prolong_op()), + gko::as(mg_level->get_prolong_op()), 1e-14); +} + + } // namespace diff --git a/omp/test/solver/bicg_kernels.cpp b/omp/test/solver/bicg_kernels.cpp index fc51d1847ae..2018fa5f788 100644 --- a/omp/test/solver/bicg_kernels.cpp +++ b/omp/test/solver/bicg_kernels.cpp @@ -134,33 +134,6 @@ class Bicg : public ::testing::Test { *d_stop_status = *stop_status; } - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - std::shared_ptr ref; std::shared_ptr omp; @@ -265,7 +238,7 @@ TEST_F(Bicg, OmpBicgStep2IsEquivalentToRef) TEST_F(Bicg, ApplyWithSpdMatrixIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); + gko::test::make_spd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(omp); diff --git a/omp/test/solver/bicgstab_kernels.cpp b/omp/test/solver/bicgstab_kernels.cpp index 74085b6e7a7..9e51d841e64 100644 --- a/omp/test/solver/bicgstab_kernels.cpp +++ b/omp/test/solver/bicgstab_kernels.cpp @@ -68,7 +68,7 @@ class Bicgstab : public ::testing::Test { omp = gko::OmpExecutor::create(); mtx = gen_mtx(123, 123); - make_diag_dominant(mtx.get()); + gko::test::make_diag_dominant(mtx.get()); d_mtx = Mtx::create(omp); d_mtx->copy_from(mtx.get()); omp_bicgstab_factory = @@ -170,18 +170,6 @@ class Bicgstab : public ::testing::Test { *stop_status; // copy_from is not a public member function of Array } - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - std::shared_ptr ref; std::shared_ptr omp; diff --git a/omp/test/solver/cg_kernels.cpp b/omp/test/solver/cg_kernels.cpp index e71fad019b5..1e86956602a 100644 --- a/omp/test/solver/cg_kernels.cpp +++ b/omp/test/solver/cg_kernels.cpp @@ -122,33 +122,6 @@ class Cg : public ::testing::Test { *d_stop_status = *stop_status; } - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - std::shared_ptr ref; std::shared_ptr omp; @@ -233,7 +206,7 @@ TEST_F(Cg, OmpCgStep2IsEquivalentToRef) TEST_F(Cg, ApplyIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); + gko::test::make_spd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(omp); diff --git a/omp/test/solver/cgs_kernels.cpp b/omp/test/solver/cgs_kernels.cpp index f05d0604919..d473f830bdc 100644 --- a/omp/test/solver/cgs_kernels.cpp +++ b/omp/test/solver/cgs_kernels.cpp @@ -67,7 +67,7 @@ class Cgs : public ::testing::Test { omp = gko::OmpExecutor::create(); mtx = gen_mtx(123, 123); - make_diag_dominant(mtx.get()); + gko::test::make_diag_dominant(mtx.get()); d_mtx = Mtx::create(omp); d_mtx->copy_from(mtx.get()); omp_cgs_factory = @@ -165,18 +165,6 @@ class Cgs : public ::testing::Test { *d_stop_status = *stop_status; } - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - std::shared_ptr ref; std::shared_ptr omp; diff --git a/omp/test/solver/fcg_kernels.cpp b/omp/test/solver/fcg_kernels.cpp index bcf6c57d288..632a2c80808 100644 --- a/omp/test/solver/fcg_kernels.cpp +++ b/omp/test/solver/fcg_kernels.cpp @@ -130,33 +130,6 @@ class Fcg : public ::testing::Test { *d_stop_status = *stop_status; } - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - std::shared_ptr ref; std::shared_ptr omp; @@ -246,7 +219,7 @@ TEST_F(Fcg, OmpFcgStep2IsEquivalentToRef) TEST_F(Fcg, ApplyIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); + gko::test::make_spd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(omp); diff --git a/omp/test/solver/gmres_kernels.cpp b/omp/test/solver/gmres_kernels.cpp index 58b369e748e..66ebe091ce8 100644 --- a/omp/test/solver/gmres_kernels.cpp +++ b/omp/test/solver/gmres_kernels.cpp @@ -111,18 +111,6 @@ class Gmres : public ::testing::Test { std::normal_distribution(-1.0, 1.0), rand_engine, ref); } - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - void initialize_data() { #ifdef GINKGO_FAST_TESTS diff --git a/omp/test/solver/idr_kernels.cpp b/omp/test/solver/idr_kernels.cpp index f26bf4dcf4b..794d68cf3f2 100644 --- a/omp/test/solver/idr_kernels.cpp +++ b/omp/test/solver/idr_kernels.cpp @@ -166,18 +166,6 @@ class Idr : public ::testing::Test { *stop_status; // copy_from is not a public member function of Array } - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum / 4; - } - } - std::shared_ptr ref; std::shared_ptr omp; diff --git a/reference/multigrid/amgx_pgm_kernels.cpp b/reference/multigrid/amgx_pgm_kernels.cpp index 64788db9805..d5b1ddbebb1 100644 --- a/reference/multigrid/amgx_pgm_kernels.cpp +++ b/reference/multigrid/amgx_pgm_kernels.cpp @@ -71,10 +71,12 @@ void match_edge(std::shared_ptr exec, for (size_type i = 0; i < agg.get_num_elems(); i++) { if (agg_vals[i] == -1) { auto neighbor = strongest_neighbor_vals[i]; - if (neighbor != -1 && strongest_neighbor_vals[neighbor] == i) { + // i < neighbor always holds when neighbor is not -1 + if (neighbor != -1 && strongest_neighbor_vals[neighbor] == i && + i < neighbor) { + // Use the smaller index as agg point agg_vals[i] = i; agg_vals[neighbor] = i; - // Use the smaller index as agg point } } } From c93fdfddb21ea29f245dba58e3a19b82d425821f Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Tue, 30 Mar 2021 23:50:17 +0800 Subject: [PATCH 13/16] collect mtx/array utils and add doc and typed_test - symmetric/hermitian/diag_dominant/hpd - array generator - add type test and documents for all in core/test/utils Remind to myself: class member is initialized by declaring order --- core/test/utils.hpp | 1 - core/test/utils/CMakeLists.txt | 3 + core/test/utils/array_generator.hpp | 14 ++ core/test/utils/array_generator_test.cpp | 125 +++++++++++++ core/test/utils/matrix_generator.hpp | 13 +- core/test/utils/matrix_generator_test.cpp | 207 +++++++++++++++------- core/test/utils/matrix_utils.hpp | 100 +++++++++-- core/test/utils/matrix_utils_test.cpp | 201 +++++++++++++++++++++ core/test/utils/unsort_matrix_test.cpp | 53 +++--- core/test/utils/value_generator.hpp | 30 +++- core/test/utils/value_generator_test.cpp | 118 ++++++++++++ cuda/test/multigrid/amgx_pgm_kernels.cpp | 2 +- cuda/test/solver/bicg_kernels.cpp | 2 +- cuda/test/solver/cg_kernels.cpp | 2 +- cuda/test/solver/fcg_kernels.cpp | 2 +- hip/test/multigrid/amgx_pgm_kernels.cpp | 2 +- hip/test/solver/bicg_kernels.cpp | 2 +- hip/test/solver/cg_kernels.cpp | 2 +- hip/test/solver/fcg_kernels.cpp | 2 +- omp/multigrid/amgx_pgm_kernels.cpp | 1 + omp/test/multigrid/amgx_pgm_kernels.cpp | 2 +- omp/test/solver/bicg_kernels.cpp | 2 +- omp/test/solver/cg_kernels.cpp | 2 +- omp/test/solver/fcg_kernels.cpp | 2 +- 24 files changed, 766 insertions(+), 124 deletions(-) create mode 100644 core/test/utils/array_generator_test.cpp create mode 100644 core/test/utils/matrix_utils_test.cpp create mode 100644 core/test/utils/value_generator_test.cpp diff --git a/core/test/utils.hpp b/core/test/utils.hpp index 05bf216c8bb..f493b375831 100644 --- a/core/test/utils.hpp +++ b/core/test/utils.hpp @@ -66,7 +66,6 @@ using ValueTypes = ::testing::Types, std::complex>; #endif - using ComplexValueTypes = #if GINKGO_DPCPP_SINGLE_MODE ::testing::Types>; diff --git a/core/test/utils/CMakeLists.txt b/core/test/utils/CMakeLists.txt index 84e3d46958d..9b3e0e5e349 100644 --- a/core/test/utils/CMakeLists.txt +++ b/core/test/utils/CMakeLists.txt @@ -1,3 +1,6 @@ +ginkgo_create_test(array_generator_test) ginkgo_create_test(assertions_test) ginkgo_create_test(matrix_generator_test) +ginkgo_create_test(matrix_utils_test) ginkgo_create_test(unsort_matrix_test) +ginkgo_create_test(value_generator_test) diff --git a/core/test/utils/array_generator.hpp b/core/test/utils/array_generator.hpp index 1ad52c18e26..8dbdeacc9a7 100644 --- a/core/test/utils/array_generator.hpp +++ b/core/test/utils/array_generator.hpp @@ -46,6 +46,20 @@ namespace gko { namespace test { +/** + * Generate a random array + * + * @tparam ValueType valuetype of the array to generate + * @tparam ValueDistribution type of value distribution + * @tparam Engine type of random engine + * + * @param num the number of elements of array + * @param value_dist distribution of array values + * @param engine a random engine + * @param exec executor where the array should be allocated + * + * @return Array + */ template Array generate_random_array(size_type num, ValueDistribution &&value_dist, diff --git a/core/test/utils/array_generator_test.cpp b/core/test/utils/array_generator_test.cpp new file mode 100644 index 00000000000..ba8c6651be9 --- /dev/null +++ b/core/test/utils/array_generator_test.cpp @@ -0,0 +1,125 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/test/utils/array_generator.hpp" + + +#include +#include + + +#include + + +#include "core/test/utils.hpp" + +namespace { + + +template +class ArrayGenerator : public ::testing::Test { +protected: + using value_type = T; + + ArrayGenerator() : exec(gko::ReferenceExecutor::create()) + { + array = gko::test::generate_random_array( + 500, std::normal_distribution>(20.0, 5.0), + std::ranlux48(42), exec); + } + + std::shared_ptr exec; + gko::Array array; + + template + ValueType get_nth_moment(int n, ValueType c, InputIterator sample_start, + InputIterator sample_end, Closure closure_op) + { + using std::pow; + ValueType res = 0; + ValueType num_elems = 0; + while (sample_start != sample_end) { + auto tmp = *(sample_start++); + res += pow(closure_op(tmp) - c, n); + num_elems += 1; + } + return res / num_elems; + } + + template + void check_average_and_deviation( + InputIterator sample_start, InputIterator sample_end, + gko::remove_complex average_ans, + gko::remove_complex deviation_ans, Closure closure_op) + { + auto average = + this->get_nth_moment(1, gko::zero>(), + sample_start, sample_end, closure_op); + auto deviation = sqrt(this->get_nth_moment(2, average, sample_start, + sample_end, closure_op)); + + // check that average & deviation is within 10% of the required amount + ASSERT_NEAR(average, average_ans, average_ans * 0.1); + ASSERT_NEAR(deviation, deviation_ans, deviation_ans * 0.1); + } +}; + +TYPED_TEST_SUITE(ArrayGenerator, gko::test::ValueTypes); + + +TYPED_TEST(ArrayGenerator, OutputHasCorrectSize) +{ + ASSERT_EQ(this->array.get_num_elems(), 500); +} + + +TYPED_TEST(ArrayGenerator, OutputHasCorrectAverageAndDeviation) +{ + using std::sqrt; + using T = typename TestFixture::value_type; + + // check the real part + this->template check_average_and_deviation( + this->array.get_const_data(), + this->array.get_const_data() + this->array.get_num_elems(), 20.0, 5.0, + [](T &val) { return gko::real(val); }); + // check the imag part when the type is complex + if (!std::is_same>::value) { + this->template check_average_and_deviation( + this->array.get_const_data(), + this->array.get_const_data() + this->array.get_num_elems(), 20.0, + 5.0, [](T &val) { return gko::imag(val); }); + } +} + + +} // namespace diff --git a/core/test/utils/matrix_generator.hpp b/core/test/utils/matrix_generator.hpp index 04f98126c41..9a101f4b043 100644 --- a/core/test/utils/matrix_generator.hpp +++ b/core/test/utils/matrix_generator.hpp @@ -70,6 +70,8 @@ namespace test { * @param engine a random engine * @param exec executor where the matrix should be allocated * @param args additional arguments for the matrix constructor + * + * @return the unique pointer of MatrixType */ template , typename NonzeroDistribution, typename ValueDistribution, typename Engine, typename... MatrixArgs> @@ -128,6 +130,8 @@ std::unique_ptr generate_random_matrix( * @param engine a random engine * @param exec executor where the matrix should be allocated * @param args additional arguments for the matrix constructor + * + * @return the unique pointer of MatrixType */ template , typename NonzeroDistribution, typename Engine, typename... MatrixArgs> @@ -189,6 +193,8 @@ std::unique_ptr generate_random_sparsity_matrix( * @param engine a random engine * @param exec executor where the matrix should be allocated * @param args additional arguments for the matrix constructor + * + * @return the unique pointer of MatrixType */ template , typename NonzeroDistribution, typename ValueDistribution, typename Engine, typename... MatrixArgs> @@ -274,6 +280,8 @@ std::unique_ptr generate_random_triangular_matrix( * @param engine a random engine * @param exec executor where the matrix should be allocated * @param args additional arguments for the matrix constructor + * + * @return the unique pointer of MatrixType */ template , typename NonzeroDistribution, typename ValueDistribution, typename Engine, typename... MatrixArgs> @@ -308,6 +316,8 @@ std::unique_ptr generate_random_lower_triangular_matrix( * @param engine a random engine * @param exec executor where the matrix should be allocated * @param args additional arguments for the matrix constructor + * + * @return the unique pointer of MatrixType */ template , typename NonzeroDistribution, typename ValueDistribution, typename Engine, typename... MatrixArgs> @@ -341,6 +351,8 @@ std::unique_ptr generate_random_upper_triangular_matrix( * @param engine a random engine * @param exec executor where the matrix should be allocated * @param args additional arguments for the matrix constructor + * + * @return the unique pointer of MatrixType */ template , typename ValueDistribution, typename Engine, typename... MatrixArgs> @@ -353,7 +365,6 @@ std::unique_ptr generate_random_band_matrix( using index_type = typename MatrixType::index_type; matrix_data data{gko::dim<2>{size, size}, {}}; - for (size_type row = 0; row < size; ++row) { for (size_type col = row < lower_bandwidth ? 0 : row - lower_bandwidth; col <= std::min(row + upper_bandwidth, size - 1); col++) { diff --git a/core/test/utils/matrix_generator_test.cpp b/core/test/utils/matrix_generator_test.cpp index 1a85604a788..687560a202b 100644 --- a/core/test/utils/matrix_generator_test.cpp +++ b/core/test/utils/matrix_generator_test.cpp @@ -40,129 +40,204 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/test/utils.hpp" + + namespace { +template class MatrixGenerator : public ::testing::Test { protected: + using value_type = T; + using real_type = gko::remove_complex; + using mtx_type = gko::matrix::Dense; + MatrixGenerator() : exec(gko::ReferenceExecutor::create()), - mtx(gko::test::generate_random_matrix( - 500, 100, std::normal_distribution(50, 5), - std::normal_distribution(20.0, 5.0), std::ranlux48(42), + mtx(gko::test::generate_random_matrix( + 500, 100, std::normal_distribution(50, 5), + std::normal_distribution(20.0, 5.0), std::ranlux48(42), exec)), - l_mtx(gko::test::generate_random_lower_triangular_matrix( - 4, 3, true, std::normal_distribution(50, 5), - std::normal_distribution(20.0, 5.0), std::ranlux48(42), + l_mtx(gko::test::generate_random_lower_triangular_matrix( + 4, 3, true, std::normal_distribution(50, 5), + std::normal_distribution(20.0, 5.0), std::ranlux48(42), exec)), - u_mtx(gko::test::generate_random_upper_triangular_matrix( - 3, 4, true, std::normal_distribution(50, 5), - std::normal_distribution(20.0, 5.0), std::ranlux48(42), + u_mtx(gko::test::generate_random_upper_triangular_matrix( + 3, 4, true, std::normal_distribution(50, 5), + std::normal_distribution(20.0, 5.0), std::ranlux48(42), + exec)), + lower_bandwidth(2), + upper_bandwidth(3), + band_mtx(gko::test::generate_random_band_matrix( + 100, lower_bandwidth, upper_bandwidth, + std::normal_distribution(20.0, 5.0), std::ranlux48(42), exec)), nnz_per_row_sample(500, 0), - values_sample(0) + values_sample(0), + band_values_sample(0) { // collect samples of nnz/row and values from the matrix for (int row = 0; row < mtx->get_size()[0]; ++row) { for (int col = 0; col < mtx->get_size()[1]; ++col) { auto val = mtx->at(row, col); - if (val != 0.0) { + if (val != gko::zero()) { ++nnz_per_row_sample[row]; values_sample.push_back(val); } } } + + // collect samples of values from the band matrix + for (int row = 0; row < band_mtx->get_size()[0]; ++row) { + for (int col = 0; col < band_mtx->get_size()[1]; ++col) { + auto val = band_mtx->at(row, col); + if ((col - row <= upper_bandwidth) && + (row - col <= lower_bandwidth)) { + band_values_sample.push_back(val); + } + } + } } std::shared_ptr exec; - std::unique_ptr> mtx; - std::unique_ptr> l_mtx; - std::unique_ptr> u_mtx; + int lower_bandwidth; + int upper_bandwidth; + std::unique_ptr mtx; + std::unique_ptr l_mtx; + std::unique_ptr u_mtx; + std::unique_ptr band_mtx; std::vector nnz_per_row_sample; - std::vector values_sample; + std::vector values_sample; + std::vector band_values_sample; + - template + template ValueType get_nth_moment(int n, ValueType c, InputIterator sample_start, - InputIterator sample_end) + InputIterator sample_end, Closure closure_op) { using std::pow; ValueType res = 0; ValueType num_elems = 0; while (sample_start != sample_end) { auto tmp = *(sample_start++); - res += pow(tmp - c, n); + res += pow(closure_op(tmp) - c, n); num_elems += 1; } return res / num_elems; } + + template + void check_average_and_deviation( + InputIterator sample_start, InputIterator sample_end, + gko::remove_complex average_ans, + gko::remove_complex deviation_ans, Closure closure_op) + { + auto average = + this->get_nth_moment(1, gko::zero>(), + sample_start, sample_end, closure_op); + auto deviation = sqrt(this->get_nth_moment(2, average, sample_start, + sample_end, closure_op)); + + // check that average & deviation is within 10% of the required amount + ASSERT_NEAR(average, average_ans, average_ans * 0.1); + ASSERT_NEAR(deviation, deviation_ans, deviation_ans * 0.1); + } }; +TYPED_TEST_SUITE(MatrixGenerator, gko::test::ValueTypes); + -TEST_F(MatrixGenerator, OutputHasCorrectSize) +TYPED_TEST(MatrixGenerator, OutputHasCorrectSize) { - ASSERT_EQ(mtx->get_size(), gko::dim<2>(500, 100)); + ASSERT_EQ(this->mtx->get_size(), gko::dim<2>(500, 100)); } -TEST_F(MatrixGenerator, OutputHasCorrectNonzeroAverageAndDeviation) +TYPED_TEST(MatrixGenerator, OutputHasCorrectNonzeroAverageAndDeviation) { - using std::sqrt; - auto average = get_nth_moment(1, 0.0, begin(nnz_per_row_sample), - end(nnz_per_row_sample)); - auto deviation = sqrt(get_nth_moment(2, average, begin(nnz_per_row_sample), - end(nnz_per_row_sample))); - - // check that average & deviation is within 10% of the required amount - ASSERT_NEAR(average, 50.0, 5); - ASSERT_NEAR(deviation, 5.0, 0.5); + using T = typename TestFixture::value_type; + // the nonzeros only needs to check the real part + this->template check_average_and_deviation( + begin(this->nnz_per_row_sample), end(this->nnz_per_row_sample), 50.0, + 5.0, [](T val) { return gko::real(val); }); } -TEST_F(MatrixGenerator, OutputHasCorrectValuesAverageAndDeviation) +TYPED_TEST(MatrixGenerator, OutputHasCorrectValuesAverageAndDeviation) { - using std::sqrt; - auto average = - get_nth_moment(1, 0.0, begin(values_sample), end(values_sample)); - auto deviation = sqrt( - get_nth_moment(2, average, begin(values_sample), end(values_sample))); - - // check that average and deviation is within 10% of the required amount - ASSERT_NEAR(average, 20.0, 2.0); - ASSERT_NEAR(deviation, 5.0, 0.5); + using T = typename TestFixture::value_type; + // check the real part + this->template check_average_and_deviation( + begin(this->values_sample), end(this->values_sample), 20.0, 5.0, + [](T &val) { return gko::real(val); }); + // check the imag part when the type is complex + if (!std::is_same>::value) { + this->template check_average_and_deviation( + begin(this->values_sample), end(this->values_sample), 20.0, 5.0, + [](T &val) { return gko::imag(val); }); + } } -TEST_F(MatrixGenerator, CanGenerateLowerTriangularMatrixWithDiagonalOnes) +TYPED_TEST(MatrixGenerator, CanGenerateLowerTriangularMatrixWithDiagonalOnes) { - ASSERT_EQ(l_mtx->at(0, 0), 1.0); - ASSERT_EQ(l_mtx->at(0, 1), 0.0); - ASSERT_EQ(l_mtx->at(0, 2), 0.0); - ASSERT_NE(l_mtx->at(1, 0), 0.0); - ASSERT_EQ(l_mtx->at(1, 1), 1.0); - ASSERT_EQ(l_mtx->at(1, 2), 0.0); - ASSERT_NE(l_mtx->at(2, 0), 0.0); - ASSERT_NE(l_mtx->at(2, 1), 0.0); - ASSERT_EQ(l_mtx->at(2, 2), 1.0); - ASSERT_NE(l_mtx->at(3, 0), 0.0); - ASSERT_NE(l_mtx->at(3, 1), 0.0); - ASSERT_NE(l_mtx->at(3, 2), 0.0); + using T = typename TestFixture::value_type; + ASSERT_EQ(this->l_mtx->at(0, 0), T{1.0}); + ASSERT_EQ(this->l_mtx->at(0, 1), T{0.0}); + ASSERT_EQ(this->l_mtx->at(0, 2), T{0.0}); + ASSERT_NE(this->l_mtx->at(1, 0), T{0.0}); + ASSERT_EQ(this->l_mtx->at(1, 1), T{1.0}); + ASSERT_EQ(this->l_mtx->at(1, 2), T{0.0}); + ASSERT_NE(this->l_mtx->at(2, 0), T{0.0}); + ASSERT_NE(this->l_mtx->at(2, 1), T{0.0}); + ASSERT_EQ(this->l_mtx->at(2, 2), T{1.0}); + ASSERT_NE(this->l_mtx->at(3, 0), T{0.0}); + ASSERT_NE(this->l_mtx->at(3, 1), T{0.0}); + ASSERT_NE(this->l_mtx->at(3, 2), T{0.0}); } -TEST_F(MatrixGenerator, CanGenerateUpperTriangularMatrixWithDiagonalOnes) +TYPED_TEST(MatrixGenerator, CanGenerateUpperTriangularMatrixWithDiagonalOnes) { - ASSERT_EQ(u_mtx->at(0, 0), 1.0); - ASSERT_NE(u_mtx->at(0, 1), 0.0); - ASSERT_NE(u_mtx->at(0, 2), 0.0); - ASSERT_NE(u_mtx->at(0, 3), 0.0); - ASSERT_EQ(u_mtx->at(1, 0), 0.0); - ASSERT_EQ(u_mtx->at(1, 1), 1.0); - ASSERT_NE(u_mtx->at(1, 2), 0.0); - ASSERT_NE(u_mtx->at(1, 3), 0.0); - ASSERT_EQ(u_mtx->at(2, 0), 0.0); - ASSERT_EQ(u_mtx->at(2, 1), 0.0); - ASSERT_EQ(u_mtx->at(2, 2), 1.0); - ASSERT_NE(u_mtx->at(2, 3), 0.0); + using T = typename TestFixture::value_type; + ASSERT_EQ(this->u_mtx->at(0, 0), T{1.0}); + ASSERT_NE(this->u_mtx->at(0, 1), T{0.0}); + ASSERT_NE(this->u_mtx->at(0, 2), T{0.0}); + ASSERT_NE(this->u_mtx->at(0, 3), T{0.0}); + ASSERT_EQ(this->u_mtx->at(1, 0), T{0.0}); + ASSERT_EQ(this->u_mtx->at(1, 1), T{1.0}); + ASSERT_NE(this->u_mtx->at(1, 2), T{0.0}); + ASSERT_NE(this->u_mtx->at(1, 3), T{0.0}); + ASSERT_EQ(this->u_mtx->at(2, 0), T{0.0}); + ASSERT_EQ(this->u_mtx->at(2, 1), T{0.0}); + ASSERT_EQ(this->u_mtx->at(2, 2), T{1.0}); + ASSERT_NE(this->u_mtx->at(2, 3), T{0.0}); +} + + +TYPED_TEST(MatrixGenerator, CanGenerateBandMatrix) +{ + using T = typename TestFixture::value_type; + // the elements out of band are zero + for (int row = 0; row < this->band_mtx->get_size()[0]; row++) { + for (int col = 0; col < this->band_mtx->get_size()[1]; col++) { + if ((col - row > this->upper_bandwidth) || + (row - col > this->lower_bandwidth)) { + ASSERT_EQ(this->band_mtx->at(row, col), T{0.0}); + } + } + } + // check the real part of elements in band + this->template check_average_and_deviation( + begin(this->band_values_sample), end(this->band_values_sample), 20.0, + 5.0, [](T &val) { return gko::real(val); }); + // check the imag part when the type is complex + if (!std::is_same>::value) { + this->template check_average_and_deviation( + begin(this->band_values_sample), end(this->band_values_sample), + 20.0, 5.0, [](T &val) { return gko::imag(val); }); + } } diff --git a/core/test/utils/matrix_utils.hpp b/core/test/utils/matrix_utils.hpp index c8b269d493f..90aa3ea1d6e 100644 --- a/core/test/utils/matrix_utils.hpp +++ b/core/test/utils/matrix_utils.hpp @@ -34,8 +34,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_TEST_UTILS_MATRIX_UTILS_HPP_ +#include +#include #include + #include "core/test/utils/value_generator.hpp" @@ -43,38 +46,109 @@ namespace gko { namespace test { +/** + * Make a symmetric matrix + * + * @tparam ValueType valuetype of Dense matrix to process + * + * @param mtx the dense matrix + */ template void make_symmetric(matrix::Dense *mtx) { - assert(mtx->get_executor() == mtx->get_executor()->get_master()); - for (size_type i = 0; i < mtx->get_size()[0]; ++i) { - for (size_type j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); + GKO_ASSERT_IS_SQUARE_MATRIX(mtx); + auto mtx_host = + make_temporary_clone(mtx->get_executor()->get_master(), mtx); + + for (size_type i = 0; i < mtx_host->get_size()[0]; ++i) { + for (size_type j = i + 1; j < mtx_host->get_size()[1]; ++j) { + mtx_host->at(i, j) = mtx_host->at(j, i); } } } +/** + * Make a hermitian matrix + * + * @tparam ValueType valuetype of Dense matrix to process + * + * @param mtx the dense matrix + */ template -void make_diag_dominant(matrix::Dense *mtx) +void make_hermitian(matrix::Dense *mtx) { - assert(mtx->get_executor() == mtx->get_executor()->get_master()); + GKO_ASSERT_IS_SQUARE_MATRIX(mtx); + auto mtx_host = + make_temporary_clone(mtx->get_executor()->get_master(), mtx); + + for (size_type i = 0; i < mtx_host->get_size()[0]; ++i) { + for (size_type j = i + 1; j < mtx_host->get_size()[1]; ++j) { + mtx_host->at(i, j) = conj(mtx_host->at(j, i)); + } + mtx_host->at(i, i) = gko::real(mtx_host->at(i, i)); + } +} + + +/** + * Make a (strictly) diagonal dominant matrix. It will set the diag value from + * the summation among the absoulue value of the row's elements. When ratio is + * larger than 1, the result will be strictly diagonal dominant matrix except + * for the empty row. When ratio is 1, the result will be diagonal dominant + * matirx. + * + * @tparam ValueType valuetype of Dense matrix to process + * + * @param mtx the dense matrix + * @param ratio the scale to set the diagonal value. default is 1 and it must + * be larger than or equal to 1. + */ +template +void make_diag_dominant(matrix::Dense *mtx, + remove_complex ratio = 1.0) +{ + // To keep the diag dominant, the ratio should be larger than or equal to 1 + GKO_ASSERT_EQ(ratio >= 1.0, true); + auto mtx_host = + make_temporary_clone(mtx->get_executor()->get_master(), mtx); + using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { + for (size_type i = 0; i < mtx_host->get_size()[0]; ++i) { auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); + for (size_type j = 0; j < mtx_host->get_size()[1]; ++j) { + sum += abs(mtx_host->at(i, j)); } - mtx->at(i, i) = sum; + mtx_host->at(i, i) = sum * ratio; } } +/** + * Make a Hermitian postive definite matrix. + * + * @tparam ValueType valuetype of Dense matrix to process + * + * @param mtx the dense matrix + * @param ratio the ratio for make_diag_dominant. default is 1.001 and it must + * be larger than 1. + */ template -void make_spd(matrix::Dense *mtx) +void make_hpd(matrix::Dense *mtx, + remove_complex ratio = 1.001) { - make_symmetric(mtx); - make_diag_dominant(mtx); + GKO_ASSERT_IS_SQUARE_MATRIX(mtx); + // To get strictly diagonally dominant matrix, the ratio should be larger + // than 1. + GKO_ASSERT_EQ(ratio > 1.0, true); + + auto mtx_host = + make_temporary_clone(mtx->get_executor()->get_master(), mtx); + make_hermitian(mtx_host.get()); + // Construct strictly diagonally dominant matrix to ensure positive + // definite. In complex case, the diagonal is set as absolute value and is + // larger than 0, so it still gives positive definite. + make_diag_dominant(mtx_host.get(), ratio); } diff --git a/core/test/utils/matrix_utils_test.cpp b/core/test/utils/matrix_utils_test.cpp new file mode 100644 index 00000000000..dea1b4a55d6 --- /dev/null +++ b/core/test/utils/matrix_utils_test.cpp @@ -0,0 +1,201 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/test/utils/matrix_utils.hpp" + + +#include +#include +#include + + +#include + + +#include "core/test/utils.hpp" +#include "core/test/utils/matrix_generator.hpp" + + +namespace { + + +template +class MatrixUtils : public ::testing::Test { +protected: + using value_type = T; + using real_type = gko::remove_complex; + using mtx_type = gko::matrix::Dense; + + MatrixUtils() + : exec(gko::ReferenceExecutor::create()), + mtx(gko::test::generate_random_matrix( + 500, 500, std::normal_distribution(50, 5), + std::normal_distribution(20.0, 5.0), std::ranlux48(42), + exec)), + unsquare_mtx(mtx_type::create(exec, gko::dim<2>(500, 100))) + {} + + std::shared_ptr exec; + std::unique_ptr mtx; + std::unique_ptr unsquare_mtx; +}; + +TYPED_TEST_SUITE(MatrixUtils, gko::test::ValueTypes); + + +TYPED_TEST(MatrixUtils, MakeSymmetricThrowsError) +{ + ASSERT_THROW(gko::test::make_symmetric(gko::lend(this->unsquare_mtx)), + gko::DimensionMismatch); +} + +TYPED_TEST(MatrixUtils, MakeHermitianThrowsError) +{ + ASSERT_THROW(gko::test::make_hermitian(gko::lend(this->unsquare_mtx)), + gko::DimensionMismatch); +} + + +TYPED_TEST(MatrixUtils, MakeDiagDominantThrowsError) +{ + ASSERT_THROW(gko::test::make_diag_dominant(gko::lend(this->mtx), 0.9), + gko::ValueMismatch); +} + + +TYPED_TEST(MatrixUtils, MakeHpdMatrixThrowsError) +{ + ASSERT_THROW(gko::test::make_hpd(gko::lend(this->mtx), 1.0), + gko::ValueMismatch); +} + + +TYPED_TEST(MatrixUtils, MakeSymmetricCorrectly) +{ + gko::test::make_symmetric(gko::lend(this->mtx)); + + for (gko::size_type i = 0; i < this->mtx->get_size()[0]; i++) { + for (gko::size_type j = 0; j <= i; j++) { + ASSERT_EQ(this->mtx->at(i, j), this->mtx->at(j, i)); + } + } +} + + +TYPED_TEST(MatrixUtils, MakeHermitianCorrectly) +{ + gko::test::make_hermitian(gko::lend(this->mtx)); + + for (gko::size_type i = 0; i < this->mtx->get_size()[0]; i++) { + for (gko::size_type j = 0; j <= i; j++) { + ASSERT_EQ(this->mtx->at(i, j), gko::conj(this->mtx->at(j, i))); + } + } +} + + +TYPED_TEST(MatrixUtils, MakeDiagDominantCorrectly) +{ + using T = typename TestFixture::value_type; + // make_diag_dominant also consider diag value. + // To check the ratio easily, set the diag zeros + for (gko::size_type i = 0; i < this->mtx->get_size()[0]; i++) { + this->mtx->at(i, i) = 0; + } + + gko::test::make_diag_dominant(gko::lend(this->mtx)); + + for (gko::size_type i = 0; i < this->mtx->get_size()[0]; i++) { + gko::remove_complex off_diag_abs = 0; + for (gko::size_type j = 0; j < this->mtx->get_size()[1]; j++) { + if (j != i) { + off_diag_abs += std::abs(this->mtx->at(i, j)); + } + } + ASSERT_NEAR(gko::real(this->mtx->at(i, i)), off_diag_abs, r::value); + } +} + + +TYPED_TEST(MatrixUtils, MakeDiagDominantWithRatioCorrectly) +{ + using T = typename TestFixture::value_type; + gko::remove_complex ratio = 1.001; + // make_diag_dominant also consider diag value. + // To check the ratio easily, set the diag zeros + for (gko::size_type i = 0; i < this->mtx->get_size()[0]; i++) { + this->mtx->at(i, i) = 0; + } + + gko::test::make_diag_dominant(gko::lend(this->mtx), ratio); + + for (gko::size_type i = 0; i < this->mtx->get_size()[0]; i++) { + gko::remove_complex off_diag_abs = 0; + for (gko::size_type j = 0; j < this->mtx->get_size()[1]; j++) { + if (j != i) { + off_diag_abs += std::abs(this->mtx->at(i, j)); + } + } + ASSERT_NEAR(gko::real(this->mtx->at(i, i)), off_diag_abs * ratio, + r::value); + } +} + + +TYPED_TEST(MatrixUtils, MakeHpdMatrixCorrectly) +{ + using T = typename TestFixture::value_type; + auto cpy_mtx = this->mtx->clone(); + + gko::test::make_hpd(gko::lend(this->mtx)); + gko::test::make_hermitian(gko::lend(cpy_mtx)); + gko::test::make_diag_dominant(gko::lend(cpy_mtx), 1.001); + + GKO_ASSERT_MTX_NEAR(this->mtx, cpy_mtx, r::value); +} + + +TYPED_TEST(MatrixUtils, MakeHpdMatrixWithRatioCorrectly) +{ + using T = typename TestFixture::value_type; + gko::remove_complex ratio = 1.00001; + auto cpy_mtx = this->mtx->clone(); + + gko::test::make_hpd(gko::lend(this->mtx), ratio); + gko::test::make_hermitian(gko::lend(cpy_mtx)); + gko::test::make_diag_dominant(gko::lend(cpy_mtx), ratio); + + GKO_ASSERT_MTX_NEAR(this->mtx, cpy_mtx, r::value); +} + + +} // namespace diff --git a/core/test/utils/unsort_matrix_test.cpp b/core/test/utils/unsort_matrix_test.cpp index 0a242c7dc2c..90fbbc2ccf2 100644 --- a/core/test/utils/unsort_matrix_test.cpp +++ b/core/test/utils/unsort_matrix_test.cpp @@ -55,18 +55,19 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { +template class UnsortMatrix : public ::testing::Test { protected: - using value_type = double; - using index_type = gko::int32; + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; using Csr = gko::matrix::Csr; using Coo = gko::matrix::Coo; using Dense = gko::matrix::Dense; UnsortMatrix() : exec(gko::ReferenceExecutor::create()), rand_engine(42), - /* - */ csr_empty(Csr::create(exec, gko::dim<2>(0, 0))), coo_empty(Coo::create(exec, gko::dim<2>(0, 0))) {} @@ -148,54 +149,58 @@ class UnsortMatrix : public ::testing::Test { std::unique_ptr coo_empty; }; +TYPED_TEST_SUITE(UnsortMatrix, gko::test::ValueIndexTypes); -TEST_F(UnsortMatrix, CsrWorks) + +TYPED_TEST(UnsortMatrix, CsrWorks) { - auto csr = get_sorted_csr(); - const auto ref_mtx = get_sorted_csr(); - bool was_sorted = is_csr_matrix_sorted(gko::lend(csr)); + auto csr = this->get_sorted_csr(); + const auto ref_mtx = this->get_sorted_csr(); + bool was_sorted = this->is_csr_matrix_sorted(gko::lend(csr)); - gko::test::unsort_matrix(gko::lend(csr), rand_engine); + gko::test::unsort_matrix(gko::lend(csr), this->rand_engine); - ASSERT_FALSE(is_csr_matrix_sorted(gko::lend(csr))); + ASSERT_FALSE(this->is_csr_matrix_sorted(gko::lend(csr))); ASSERT_TRUE(was_sorted); GKO_ASSERT_MTX_NEAR(csr, ref_mtx, 0.); } -TEST_F(UnsortMatrix, CsrWorksWithEmpty) +TYPED_TEST(UnsortMatrix, CsrWorksWithEmpty) { - const bool was_sorted = is_csr_matrix_sorted(gko::lend(csr_empty)); + const bool was_sorted = + this->is_csr_matrix_sorted(gko::lend(this->csr_empty)); - gko::test::unsort_matrix(gko::lend(csr_empty), rand_engine); + gko::test::unsort_matrix(gko::lend(this->csr_empty), this->rand_engine); ASSERT_TRUE(was_sorted); - ASSERT_EQ(csr_empty->get_num_stored_elements(), 0); + ASSERT_EQ(this->csr_empty->get_num_stored_elements(), 0); } -TEST_F(UnsortMatrix, CooWorks) +TYPED_TEST(UnsortMatrix, CooWorks) { - auto coo = get_sorted_coo(); - const auto ref_mtx = get_sorted_coo(); - const bool was_sorted = is_coo_matrix_sorted(gko::lend(coo)); + auto coo = this->get_sorted_coo(); + const auto ref_mtx = this->get_sorted_coo(); + const bool was_sorted = this->is_coo_matrix_sorted(gko::lend(coo)); - gko::test::unsort_matrix(gko::lend(coo), rand_engine); + gko::test::unsort_matrix(gko::lend(coo), this->rand_engine); - ASSERT_FALSE(is_coo_matrix_sorted(gko::lend(coo))); + ASSERT_FALSE(this->is_coo_matrix_sorted(gko::lend(coo))); ASSERT_TRUE(was_sorted); GKO_ASSERT_MTX_NEAR(coo, ref_mtx, 0.); } -TEST_F(UnsortMatrix, CooWorksWithEmpty) +TYPED_TEST(UnsortMatrix, CooWorksWithEmpty) { - const bool was_sorted = is_coo_matrix_sorted(gko::lend(coo_empty)); + const bool was_sorted = + this->is_coo_matrix_sorted(gko::lend(this->coo_empty)); - gko::test::unsort_matrix(gko::lend(coo_empty), rand_engine); + gko::test::unsort_matrix(gko::lend(this->coo_empty), this->rand_engine); ASSERT_TRUE(was_sorted); - ASSERT_EQ(coo_empty->get_num_stored_elements(), 0); + ASSERT_EQ(this->coo_empty->get_num_stored_elements(), 0); } diff --git a/core/test/utils/value_generator.hpp b/core/test/utils/value_generator.hpp index 8b82ea63d0a..8791bf6ce01 100644 --- a/core/test/utils/value_generator.hpp +++ b/core/test/utils/value_generator.hpp @@ -46,19 +46,35 @@ namespace test { namespace detail { -template +/** + * Generate a random value. + * + * @tparam ValueType valuetype of the value + * @tparam ValueDistribution type of value distribution + * @tparam Engine type of random engine + * + * @param value_dist distribution of array values + * @param engine a random engine + * + * @return ValueType + */ +template typename std::enable_if::value, ValueType>::type -get_rand_value(Distribution &&dist, Generator &&gen) +get_rand_value(ValueDistribution &&value_dist, Engine &&gen) { - return dist(gen); + return value_dist(gen); } - -template +/** + * Specialization for complex types. + * + * @copydoc get_rand_value + */ +template typename std::enable_if::value, ValueType>::type -get_rand_value(Distribution &&dist, Generator &&gen) +get_rand_value(ValueDistribution &&value_dist, Engine &&gen) { - return ValueType(dist(gen), dist(gen)); + return ValueType(value_dist(gen), value_dist(gen)); } diff --git a/core/test/utils/value_generator_test.cpp b/core/test/utils/value_generator_test.cpp new file mode 100644 index 00000000000..58f033404a9 --- /dev/null +++ b/core/test/utils/value_generator_test.cpp @@ -0,0 +1,118 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/test/utils/value_generator.hpp" + + +#include +#include +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class ValueGenerator : public ::testing::Test { +protected: + using value_type = T; + + ValueGenerator() {} + + template + ValueType get_nth_moment(int n, ValueType c, InputIterator sample_start, + InputIterator sample_end, Closure closure_op) + { + using std::pow; + ValueType res = 0; + ValueType num_elems = 0; + while (sample_start != sample_end) { + auto tmp = *(sample_start++); + res += pow(closure_op(tmp) - c, n); + num_elems += 1; + } + return res / num_elems; + } + + template + void check_average_and_deviation( + InputIterator sample_start, InputIterator sample_end, + gko::remove_complex average_ans, + gko::remove_complex deviation_ans, Closure closure_op) + { + auto average = + this->get_nth_moment(1, gko::zero>(), + sample_start, sample_end, closure_op); + auto deviation = sqrt(this->get_nth_moment(2, average, sample_start, + sample_end, closure_op)); + + // check that average & deviation is within 10% of the required amount + ASSERT_NEAR(average, average_ans, average_ans * 0.1); + ASSERT_NEAR(deviation, deviation_ans, deviation_ans * 0.1); + } +}; + +TYPED_TEST_SUITE(ValueGenerator, gko::test::ValueTypes); + + +TYPED_TEST(ValueGenerator, OutputHasCorrectAverageAndDeviation) +{ + using T = typename TestFixture::value_type; + int num = 500; + std::vector values(num); + auto dist = std::normal_distribution(20.0, 5.0); + auto engine = std::ranlux48(42); + + for (int i = 0; i < num; i++) { + values.at(i) = gko::test::detail::get_rand_value(dist, engine); + } + + // check the real part + this->template check_average_and_deviation( + begin(values), end(values), 20.0, 5.0, + [](T &val) { return gko::real(val); }); + // check the imag part when the type is complex + if (!std::is_same>::value) { + this->template check_average_and_deviation( + begin(values), end(values), 20.0, 5.0, + [](T &val) { return gko::imag(val); }); + } +} + + +} // namespace diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index 2814f9177c0..524f4a09f3d 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -115,7 +115,7 @@ class AmgxPgm : public ::testing::Test { weight->convert_to(weight_csr.get()); weight_diag = weight_csr->extract_diagonal(); auto system_dense = gen_mtx(m, m); - gko::test::make_spd(system_dense.get()); + gko::test::make_hpd(system_dense.get()); system_mtx = Csr::create(ref); system_dense->convert_to(system_mtx.get()); diff --git a/cuda/test/solver/bicg_kernels.cpp b/cuda/test/solver/bicg_kernels.cpp index d82cc0a28a0..fb62f702ccf 100644 --- a/cuda/test/solver/bicg_kernels.cpp +++ b/cuda/test/solver/bicg_kernels.cpp @@ -258,7 +258,7 @@ TEST_F(Bicg, CudaBicgStep2IsEquivalentToRef) TEST_F(Bicg, ApplyWithSpdMatrixIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - gko::test::make_spd(mtx.get()); + gko::test::make_hpd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(cuda); diff --git a/cuda/test/solver/cg_kernels.cpp b/cuda/test/solver/cg_kernels.cpp index 8333fc7d0a8..e6443543efb 100644 --- a/cuda/test/solver/cg_kernels.cpp +++ b/cuda/test/solver/cg_kernels.cpp @@ -207,7 +207,7 @@ TEST_F(Cg, CudaCgStep2IsEquivalentToRef) TEST_F(Cg, ApplyIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - gko::test::make_spd(mtx.get()); + gko::test::make_hpd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(cuda); diff --git a/cuda/test/solver/fcg_kernels.cpp b/cuda/test/solver/fcg_kernels.cpp index b2832945666..c18444ee850 100644 --- a/cuda/test/solver/fcg_kernels.cpp +++ b/cuda/test/solver/fcg_kernels.cpp @@ -220,7 +220,7 @@ TEST_F(Fcg, CudaFcgStep2IsEquivalentToRef) TEST_F(Fcg, ApplyIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - gko::test::make_spd(mtx.get()); + gko::test::make_hpd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(cuda); diff --git a/hip/test/multigrid/amgx_pgm_kernels.cpp b/hip/test/multigrid/amgx_pgm_kernels.cpp index 39546df6cff..ce63db8a8dd 100644 --- a/hip/test/multigrid/amgx_pgm_kernels.cpp +++ b/hip/test/multigrid/amgx_pgm_kernels.cpp @@ -114,7 +114,7 @@ class AmgxPgm : public ::testing::Test { weight->convert_to(weight_csr.get()); weight_diag = weight_csr->extract_diagonal(); auto system_dense = gen_mtx(m, m); - gko::test::make_spd(system_dense.get()); + gko::test::make_hpd(system_dense.get()); system_mtx = Csr::create(ref); system_dense->convert_to(system_mtx.get()); diff --git a/hip/test/solver/bicg_kernels.cpp b/hip/test/solver/bicg_kernels.cpp index c29bd74374c..d8510e46145 100644 --- a/hip/test/solver/bicg_kernels.cpp +++ b/hip/test/solver/bicg_kernels.cpp @@ -258,7 +258,7 @@ TEST_F(Bicg, HipBicgStep2IsEquivalentToRef) TEST_F(Bicg, ApplyWithSpdMatrixIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - gko::test::make_spd(mtx.get()); + gko::test::make_hpd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(hip); diff --git a/hip/test/solver/cg_kernels.cpp b/hip/test/solver/cg_kernels.cpp index cd4000171c0..a93c20a3287 100644 --- a/hip/test/solver/cg_kernels.cpp +++ b/hip/test/solver/cg_kernels.cpp @@ -207,7 +207,7 @@ TEST_F(Cg, HipCgStep2IsEquivalentToRef) TEST_F(Cg, ApplyIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - gko::test::make_spd(mtx.get()); + gko::test::make_hpd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(hip); diff --git a/hip/test/solver/fcg_kernels.cpp b/hip/test/solver/fcg_kernels.cpp index ca86775ee94..7a97115a998 100644 --- a/hip/test/solver/fcg_kernels.cpp +++ b/hip/test/solver/fcg_kernels.cpp @@ -220,7 +220,7 @@ TEST_F(Fcg, HipFcgStep2IsEquivalentToRef) TEST_F(Fcg, ApplyIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - gko::test::make_spd(mtx.get()); + gko::test::make_hpd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(hip); diff --git a/omp/multigrid/amgx_pgm_kernels.cpp b/omp/multigrid/amgx_pgm_kernels.cpp index 3ce1b087fd8..5729d01785f 100644 --- a/omp/multigrid/amgx_pgm_kernels.cpp +++ b/omp/multigrid/amgx_pgm_kernels.cpp @@ -200,6 +200,7 @@ void assign_to_exist_agg(std::shared_ptr exec, ? intermediate_agg.get_data() : agg.get_data(); const auto diag_vals = diag->get_const_values(); +#pragma omp parallel for for (IndexType row = 0; row < agg.get_num_elems(); row++) { if (agg_const_val[row] != -1) { continue; diff --git a/omp/test/multigrid/amgx_pgm_kernels.cpp b/omp/test/multigrid/amgx_pgm_kernels.cpp index e0b063de0bd..a452c9783ce 100644 --- a/omp/test/multigrid/amgx_pgm_kernels.cpp +++ b/omp/test/multigrid/amgx_pgm_kernels.cpp @@ -105,7 +105,7 @@ class AmgxPgm : public ::testing::Test { weight->convert_to(weight_csr.get()); weight_diag = weight_csr->extract_diagonal(); auto system_dense = gen_mtx(m, m); - gko::test::make_spd(system_dense.get()); + gko::test::make_hpd(system_dense.get()); system_mtx = Csr::create(ref); system_dense->convert_to(system_mtx.get()); diff --git a/omp/test/solver/bicg_kernels.cpp b/omp/test/solver/bicg_kernels.cpp index 2018fa5f788..5049ab9ec10 100644 --- a/omp/test/solver/bicg_kernels.cpp +++ b/omp/test/solver/bicg_kernels.cpp @@ -238,7 +238,7 @@ TEST_F(Bicg, OmpBicgStep2IsEquivalentToRef) TEST_F(Bicg, ApplyWithSpdMatrixIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - gko::test::make_spd(mtx.get()); + gko::test::make_hpd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(omp); diff --git a/omp/test/solver/cg_kernels.cpp b/omp/test/solver/cg_kernels.cpp index 1e86956602a..0a5014aad64 100644 --- a/omp/test/solver/cg_kernels.cpp +++ b/omp/test/solver/cg_kernels.cpp @@ -206,7 +206,7 @@ TEST_F(Cg, OmpCgStep2IsEquivalentToRef) TEST_F(Cg, ApplyIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - gko::test::make_spd(mtx.get()); + gko::test::make_hpd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(omp); diff --git a/omp/test/solver/fcg_kernels.cpp b/omp/test/solver/fcg_kernels.cpp index 632a2c80808..85fd4cfebfd 100644 --- a/omp/test/solver/fcg_kernels.cpp +++ b/omp/test/solver/fcg_kernels.cpp @@ -219,7 +219,7 @@ TEST_F(Fcg, OmpFcgStep2IsEquivalentToRef) TEST_F(Fcg, ApplyIsEquivalentToRef) { auto mtx = gen_mtx(50, 50); - gko::test::make_spd(mtx.get()); + gko::test::make_hpd(mtx.get()); auto x = gen_mtx(50, 3); auto b = gen_mtx(50, 3); auto d_mtx = Mtx::create(omp); From b5145bb879b0f5527a05c5ce7962d2edc122b970 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Fri, 9 Apr 2021 02:34:18 +0800 Subject: [PATCH 14/16] Review update Co-authored-by: Pratik Nayak Co-authored-by: Tobias Ribizel --- common/multigrid/amgx_pgm_kernels.hpp.inc | 21 +++++++++++---------- core/test/utils/matrix_utils.hpp | 2 +- cuda/multigrid/amgx_pgm_kernels.cu | 1 + cuda/test/multigrid/amgx_pgm_kernels.cpp | 4 ++-- hip/multigrid/amgx_pgm_kernels.hip.cpp | 1 + hip/test/multigrid/amgx_pgm_kernels.cpp | 4 ++-- omp/multigrid/amgx_pgm_kernels.cpp | 14 +++++++------- omp/test/multigrid/amgx_pgm_kernels.cpp | 4 ++-- reference/multigrid/amgx_pgm_kernels.cpp | 14 +++++++------- 9 files changed, 34 insertions(+), 31 deletions(-) diff --git a/common/multigrid/amgx_pgm_kernels.hpp.inc b/common/multigrid/amgx_pgm_kernels.hpp.inc index 9c2d434f508..623a8945a44 100644 --- a/common/multigrid/amgx_pgm_kernels.hpp.inc +++ b/common/multigrid/amgx_pgm_kernels.hpp.inc @@ -122,13 +122,13 @@ __global__ } auto weight = weight_vals[idx] / max(abs(diag[row]), abs(diag[col])); if (agg[col] == -1 && - (weight > max_weight_unagg || - (weight == max_weight_unagg && col > strongest_unagg))) { + thrust::tie(weight, col) > + thrust::tie(max_weight_unagg, strongest_unagg)) { max_weight_unagg = weight; strongest_unagg = col; } else if (agg[col] != -1 && - (weight > max_weight_agg || - (weight == max_weight_agg && col > strongest_agg))) { + thrust::tie(weight, col) > + thrust::tie(max_weight_agg, strongest_agg)) { max_weight_agg = weight; strongest_agg = col; } @@ -173,8 +173,8 @@ __global__ } auto weight = weight_vals[idx] / max(abs(diag[row]), abs(diag[col])); if (agg_const_val[col] != -1 && - (weight > max_weight_agg || - (weight == max_weight_agg && col > strongest_agg))) { + thrust::tie(weight, col) > + thrust::tie(max_weight_agg, strongest_agg)) { max_weight_agg = weight; strongest_agg = col; } @@ -209,8 +209,8 @@ __global__ } auto weight = weight_vals[idx] / max(abs(diag[row]), abs(diag[col])); if (agg_val[col] != -1 && - (weight > max_weight_agg || - (weight == max_weight_agg && col > strongest_agg))) { + thrust::tie(weight, col) > + thrust::tie(max_weight_agg, strongest_agg)) { max_weight_agg = weight; strongest_agg = col; } @@ -239,6 +239,7 @@ __global__ __launch_bounds__(default_block_size) void get_source_row_map_kernel( row_map[row] = atomic_add(result_row_ptrs + result_idx, num_elems); } + template __global__ __launch_bounds__(default_block_size) void move_row_kernel( const size_type source_nrows, const IndexType *__restrict__ agg_val, @@ -316,10 +317,10 @@ __global__ __launch_bounds__(default_block_size) void copy_to_coarse_kernel( return; } auto temp_i = temp_row_ptrs[row]; - for (auto i = coarse_row_ptrs[row]; i < coarse_row_ptrs[row + 1]; - i++, temp_i++) { + for (auto i = coarse_row_ptrs[row]; i < coarse_row_ptrs[row + 1]; i++) { coarse_col_idxs[i] = temp_col_idxs[temp_i]; coarse_values[i] = temp_values[temp_i]; + temp_i++; } } diff --git a/core/test/utils/matrix_utils.hpp b/core/test/utils/matrix_utils.hpp index 90aa3ea1d6e..dc6586f07b7 100644 --- a/core/test/utils/matrix_utils.hpp +++ b/core/test/utils/matrix_utils.hpp @@ -96,7 +96,7 @@ void make_hermitian(matrix::Dense *mtx) * the summation among the absoulue value of the row's elements. When ratio is * larger than 1, the result will be strictly diagonal dominant matrix except * for the empty row. When ratio is 1, the result will be diagonal dominant - * matirx. + * matrix. * * @tparam ValueType valuetype of Dense matrix to process * diff --git a/cuda/multigrid/amgx_pgm_kernels.cu b/cuda/multigrid/amgx_pgm_kernels.cu index 1560d7bf7f7..834f8d17f31 100644 --- a/cuda/multigrid/amgx_pgm_kernels.cu +++ b/cuda/multigrid/amgx_pgm_kernels.cu @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index 524f4a09f3d..6de292a8a7f 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -140,7 +140,7 @@ class AmgxPgm : public ::testing::Test { void make_weight(Mtx *mtx) { gko::test::make_symmetric(mtx); - // only works for realvalue cases + // only works for real value cases. mtx->compute_absolute_inplace(); gko::test::make_diag_dominant(mtx); } @@ -213,7 +213,7 @@ TEST_F(AmgxPgm, RenumberIsEquivalentToRef) ASSERT_EQ(d_num_agg, num_agg); GKO_ASSERT_ARRAY_EQ(d_agg, agg); - ASSERT_LE(num_agg, 300); + ASSERT_LE(num_agg, n); } diff --git a/hip/multigrid/amgx_pgm_kernels.hip.cpp b/hip/multigrid/amgx_pgm_kernels.hip.cpp index 2199f53e5c3..57498bca7df 100644 --- a/hip/multigrid/amgx_pgm_kernels.hip.cpp +++ b/hip/multigrid/amgx_pgm_kernels.hip.cpp @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include diff --git a/hip/test/multigrid/amgx_pgm_kernels.cpp b/hip/test/multigrid/amgx_pgm_kernels.cpp index ce63db8a8dd..dd1ae6032c0 100644 --- a/hip/test/multigrid/amgx_pgm_kernels.cpp +++ b/hip/test/multigrid/amgx_pgm_kernels.cpp @@ -139,7 +139,7 @@ class AmgxPgm : public ::testing::Test { void make_weight(Mtx *mtx) { gko::test::make_symmetric(mtx); - // only works for realvalue cases + // only works for real value cases mtx->compute_absolute_inplace(); gko::test::make_diag_dominant(mtx); } @@ -212,7 +212,7 @@ TEST_F(AmgxPgm, RenumberIsEquivalentToRef) ASSERT_EQ(d_num_agg, num_agg); GKO_ASSERT_ARRAY_EQ(d_agg, agg); - ASSERT_LE(num_agg, 300); + ASSERT_LE(num_agg, n); } diff --git a/omp/multigrid/amgx_pgm_kernels.cpp b/omp/multigrid/amgx_pgm_kernels.cpp index 5729d01785f..408478242dc 100644 --- a/omp/multigrid/amgx_pgm_kernels.cpp +++ b/omp/multigrid/amgx_pgm_kernels.cpp @@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -154,14 +155,13 @@ void find_strongest_neighbor( auto weight = vals[idx] / max(abs(diag_vals[row]), abs(diag_vals[col])); if (agg.get_const_data()[col] == -1 && - (weight > max_weight_unagg || - (weight == max_weight_unagg && col > strongest_unagg))) { + std::tie(weight, col) > + std::tie(max_weight_unagg, strongest_unagg)) { max_weight_unagg = weight; strongest_unagg = col; } else if (agg.get_const_data()[col] != -1 && - (weight > max_weight_agg || - (weight == max_weight_agg && - col > strongest_agg))) { + std::tie(weight, col) > + std::tie(max_weight_agg, strongest_agg)) { max_weight_agg = weight; strongest_agg = col; } @@ -215,8 +215,8 @@ void assign_to_exist_agg(std::shared_ptr exec, auto weight = vals[idx] / max(abs(diag_vals[row]), abs(diag_vals[col])); if (agg_const_val[col] != -1 && - (weight > max_weight_agg || - (weight == max_weight_agg && col > strongest_agg))) { + std::tie(weight, col) > + std::tie(max_weight_agg, strongest_agg)) { max_weight_agg = weight; strongest_agg = col; } diff --git a/omp/test/multigrid/amgx_pgm_kernels.cpp b/omp/test/multigrid/amgx_pgm_kernels.cpp index a452c9783ce..bd20b3749b1 100644 --- a/omp/test/multigrid/amgx_pgm_kernels.cpp +++ b/omp/test/multigrid/amgx_pgm_kernels.cpp @@ -130,7 +130,7 @@ class AmgxPgm : public ::testing::Test { void make_weight(Mtx *mtx) { gko::test::make_symmetric(mtx); - // it is only works for realvalue case. + // it is only works for real value case. mtx->compute_absolute_inplace(); gko::test::make_diag_dominant(mtx); } @@ -203,7 +203,7 @@ TEST_F(AmgxPgm, RenumberIsEquivalentToRef) ASSERT_EQ(d_num_agg, num_agg); GKO_ASSERT_ARRAY_EQ(d_agg, agg); - ASSERT_LE(num_agg, 300); + ASSERT_LE(num_agg, n); } diff --git a/reference/multigrid/amgx_pgm_kernels.cpp b/reference/multigrid/amgx_pgm_kernels.cpp index d5b1ddbebb1..16002259fcf 100644 --- a/reference/multigrid/amgx_pgm_kernels.cpp +++ b/reference/multigrid/amgx_pgm_kernels.cpp @@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -148,14 +149,13 @@ void find_strongest_neighbor( auto weight = vals[idx] / max(abs(diag_vals[row]), abs(diag_vals[col])); if (agg.get_const_data()[col] == -1 && - (weight > max_weight_unagg || - (weight == max_weight_unagg && col > strongest_unagg))) { + std::tie(weight, col) > + std::tie(max_weight_unagg, strongest_unagg)) { max_weight_unagg = weight; strongest_unagg = col; } else if (agg.get_const_data()[col] != -1 && - (weight > max_weight_agg || - (weight == max_weight_agg && - col > strongest_agg))) { + std::tie(weight, col) > + std::tie(max_weight_agg, strongest_agg)) { max_weight_agg = weight; strongest_agg = col; } @@ -208,8 +208,8 @@ void assign_to_exist_agg(std::shared_ptr exec, auto weight = vals[idx] / max(abs(diag_vals[row]), abs(diag_vals[col])); if (agg_const_val[col] != -1 && - (weight > max_weight_agg || - (weight == max_weight_agg && col > strongest_agg))) { + std::tie(weight, col) > + std::tie(max_weight_agg, strongest_agg)) { max_weight_agg = weight; strongest_agg = col; } From bef43736140b14b568129a4a2e83330700369ed8 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Tue, 27 Apr 2021 02:46:32 +0800 Subject: [PATCH 15/16] improve renumber and pass rstr/prlg into generate Co-authored-by: Tobias Ribizel --- common/multigrid/amgx_pgm_kernels.hpp.inc | 4 +- core/multigrid/amgx_pgm.cpp | 23 +++---- core/multigrid/amgx_pgm_kernels.hpp | 14 ++-- cuda/multigrid/amgx_pgm_kernels.cu | 10 +-- cuda/test/multigrid/amgx_pgm_kernels.cpp | 64 +++++++++++++++---- dpcpp/multigrid/amgx_pgm_kernels.dp.cpp | 12 ++-- hip/multigrid/amgx_pgm_kernels.hip.cpp | 10 +-- hip/test/multigrid/amgx_pgm_kernels.cpp | 64 +++++++++++++++---- omp/multigrid/amgx_pgm_kernels.cpp | 13 ++-- omp/test/multigrid/amgx_pgm_kernels.cpp | 64 +++++++++++++++---- reference/multigrid/amgx_pgm_kernels.cpp | 8 ++- reference/test/multigrid/amgx_pgm_kernels.cpp | 12 ++-- 12 files changed, 218 insertions(+), 80 deletions(-) diff --git a/common/multigrid/amgx_pgm_kernels.hpp.inc b/common/multigrid/amgx_pgm_kernels.hpp.inc index 623a8945a44..3ec6bf81a7b 100644 --- a/common/multigrid/amgx_pgm_kernels.hpp.inc +++ b/common/multigrid/amgx_pgm_kernels.hpp.inc @@ -77,7 +77,9 @@ __global__ __launch_bounds__(default_block_size) void fill_agg_kernel( if (tidx >= num) { return; } - result[index[tidx]] = 1; + // agg_vals[i] == i always holds in the aggregated group whose identifier is + // i because we use the index of element as the aggregated group identifier. + result[tidx] = (index[tidx] == tidx); } diff --git a/core/multigrid/amgx_pgm.cpp b/core/multigrid/amgx_pgm.cpp index 1803c4252d2..d9fac590691 100644 --- a/core/multigrid/amgx_pgm.cpp +++ b/core/multigrid/amgx_pgm.cpp @@ -74,15 +74,17 @@ namespace { template std::unique_ptr amgx_pgm_generate( std::shared_ptr exec, - const matrix::Csr *source, const size_type num_agg, - const Array &agg) + const matrix::Csr *source, + const matrix::Csr *prolong_op, + const matrix::Csr *restrict_op) { + auto num_agg = prolong_op->get_size()[1]; auto coarse = matrix::Csr::create( exec, dim<2>{num_agg, num_agg}, 0, source->get_strategy()); auto temp = matrix::Csr::create( exec, dim<2>{num_agg, num_agg}, source->get_num_stored_elements()); - exec->run(amgx_pgm::make_amgx_pgm_generate(source, agg, coarse.get(), - temp.get())); + exec->run(amgx_pgm::make_amgx_pgm_generate(source, prolong_op, restrict_op, + coarse.get(), temp.get())); return std::move(coarse); } @@ -158,13 +160,8 @@ void AmgxPgm::generate() // Renumber the index exec->run(amgx_pgm::make_renumber(agg_, &num_agg)); - // Construct the coarse matrix - auto coarse_matrix = - share(amgx_pgm_generate(exec, amgxpgm_op, num_agg, agg_)); - // this->set_multigrid_level(system_matrix_, coarse_matrix); - auto coarse_dim = coarse_matrix->get_size()[0]; + auto coarse_dim = num_agg; auto fine_dim = system_matrix_->get_size()[0]; - // TODO: prolong_op can be done with lightway format auto prolong_op = share( matrix_type::create(exec, gko::dim<2>{fine_dim, coarse_dim}, fine_dim)); @@ -175,7 +172,11 @@ void AmgxPgm::generate() exec->run(amgx_pgm::make_fill_array(prolong_op->get_values(), fine_dim, one())); // TODO: implement the restrict_op from aggregation. - auto restrict_op = share(prolong_op->transpose()); + auto restrict_op = gko::as(share(prolong_op->transpose())); + + // Construct the coarse matrix + auto coarse_matrix = share(amgx_pgm_generate( + exec, amgxpgm_op, prolong_op.get(), restrict_op.get())); this->set_multigrid_level(prolong_op, coarse_matrix, restrict_op); } diff --git a/core/multigrid/amgx_pgm_kernels.hpp b/core/multigrid/amgx_pgm_kernels.hpp index e0006be63a7..5bbb48b9594 100644 --- a/core/multigrid/amgx_pgm_kernels.hpp +++ b/core/multigrid/amgx_pgm_kernels.hpp @@ -78,12 +78,14 @@ namespace amgx_pgm { const matrix::Diagonal *diag, Array &agg, \ Array &intermediate_agg) -#define GKO_DECLARE_AMGX_PGM_GENERATE(ValueType, IndexType) \ - void amgx_pgm_generate(std::shared_ptr exec, \ - const matrix::Csr *source, \ - const Array &agg, \ - matrix::Csr *coarse, \ - matrix::Csr *temp) +#define GKO_DECLARE_AMGX_PGM_GENERATE(ValueType, IndexType) \ + void amgx_pgm_generate( \ + std::shared_ptr exec, \ + const matrix::Csr *source, \ + const matrix::Csr *prolong_op, \ + const matrix::Csr *restrict_op, \ + matrix::Csr *coarse, \ + matrix::Csr *temp) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/cuda/multigrid/amgx_pgm_kernels.cu b/cuda/multigrid/amgx_pgm_kernels.cu index 834f8d17f31..03f657573a7 100644 --- a/cuda/multigrid/amgx_pgm_kernels.cu +++ b/cuda/multigrid/amgx_pgm_kernels.cu @@ -111,8 +111,6 @@ void renumber(std::shared_ptr exec, Array &agg, { const auto num = agg.get_num_elems(); Array agg_map(exec, num + 1); - components::fill_array(exec, agg_map.get_data(), agg_map.get_num_elems(), - zero()); const dim3 grid(ceildiv(num, default_block_size)); kernel::fill_agg_kernel<<>>( num, agg.get_const_data(), agg_map.get_data()); @@ -177,10 +175,12 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( template void amgx_pgm_generate(std::shared_ptr exec, const matrix::Csr *source, - const Array &agg, + const matrix::Csr *prolong_op, + const matrix::Csr *restrict_op, matrix::Csr *coarse, matrix::Csr *temp) { + const auto agg_const_val = prolong_op->get_const_col_idxs(); const auto source_nrows = source->get_size()[0]; const auto source_nnz = source->get_num_stored_elements(); const auto coarse_nrows = coarse->get_size()[0]; @@ -192,13 +192,13 @@ void amgx_pgm_generate(std::shared_ptr exec, dim3 grid(ceildiv(source_nrows, default_block_size)); // agg source_row (for row size) coarse row source map kernel::get_source_row_map_kernel<<>>( - source_nrows, agg.get_const_data(), source->get_const_row_ptrs(), + source_nrows, agg_const_val, source->get_const_row_ptrs(), temp->get_row_ptrs(), row_map.get_data()); // prefix sum of temp_row_ptrs components::prefix_sum(exec, temp->get_row_ptrs(), coarse_nrows + 1); // copy source -> to coarse and change column index kernel::move_row_kernel<<>>( - source_nrows, agg.get_const_data(), row_map.get_const_data(), + source_nrows, agg_const_val, row_map.get_const_data(), source->get_const_row_ptrs(), source->get_const_col_idxs(), as_cuda_type(source->get_const_values()), temp->get_const_row_ptrs(), temp->get_col_idxs(), as_cuda_type(temp->get_values())); diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index 6de292a8a7f..b92934ce089 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -98,13 +98,35 @@ class AmgxPgm : public ::testing::Test { ref); } + gko::Array gen_agg_array(gko::size_type num, + gko::size_type num_agg) + { + auto agg_array = gen_array(num, 0, num_agg - 1); + auto agg_array_val = agg_array.get_data(); + std::vector select_agg(num); + std::iota(select_agg.begin(), select_agg.end(), 0); + // use the first num_agg item as the aggregated index. + std::shuffle(select_agg.begin(), select_agg.end(), rand_engine); + // the value of agg_array is the i-th of aggregate group + for (gko::size_type i = 0; i < num; i++) { + agg_array_val[i] = select_agg[agg_array_val[i]]; + } + // the aggregated group must contain the identifier-th element + // agg_val[i] == i holds in the aggregated group whose identifier is i + for (gko::size_type i = 0; i < num_agg; i++) { + auto agg_idx = select_agg[i]; + agg_array_val[agg_idx] = agg_idx; + } + return agg_array; + } + void initialize_data() { - int m = 597; + m = 597; n = 300; int nrhs = 3; - agg = gen_array(m, 0, n - 1); + agg = gen_agg_array(m, n); unfinished_agg = gen_array(m, -1, n - 1); strongest_neighbor = gen_array(m, 0, n - 1); coarse_vector = gen_mtx(n, nrhs); @@ -171,6 +193,7 @@ class AmgxPgm : public ::testing::Test { std::shared_ptr d_system_mtx; gko::size_type n; + gko::size_type m; }; @@ -193,8 +216,10 @@ TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) index_type num_unagg; index_type d_num_unagg; - gko::kernels::reference::amgx_pgm::count_unagg(ref, agg, &num_unagg); - gko::kernels::cuda::amgx_pgm::count_unagg(cuda, d_agg, &d_num_unagg); + gko::kernels::reference::amgx_pgm::count_unagg(ref, unfinished_agg, + &num_unagg); + gko::kernels::cuda::amgx_pgm::count_unagg(cuda, d_unfinished_agg, + &d_num_unagg); ASSERT_EQ(d_num_unagg, num_unagg); } @@ -203,8 +228,6 @@ TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) TEST_F(AmgxPgm, RenumberIsEquivalentToRef) { initialize_data(); - auto x = unfinished_agg; - auto d_x = d_unfinished_agg; index_type num_agg; index_type d_num_agg; @@ -213,7 +236,7 @@ TEST_F(AmgxPgm, RenumberIsEquivalentToRef) ASSERT_EQ(d_num_agg, num_agg); GKO_ASSERT_ARRAY_EQ(d_agg, agg); - ASSERT_LE(num_agg, n); + ASSERT_EQ(num_agg, n); } @@ -271,15 +294,34 @@ TEST_F(AmgxPgm, GenerateMtxIsEquivalentToRef) auto csr_coarse = Csr::create(ref, gko::dim<2>{n, n}, 0); auto d_csr_coarse = Csr::create(cuda, gko::dim<2>{n, n}, 0); auto csr_temp = Csr::create(ref, gko::dim<2>{n, n}, - weight_csr->get_num_stored_elements()); + system_mtx->get_num_stored_elements()); auto d_csr_temp = Csr::create(cuda, gko::dim<2>{n, n}, - d_weight_csr->get_num_stored_elements()); + d_system_mtx->get_num_stored_elements()); + index_type num_agg; + // renumber again + gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); + auto prolong_op = Csr::create(ref, gko::dim<2>{m, n}, m); + for (int i = 0; i < m; i++) { + prolong_op->get_col_idxs()[i] = agg.get_const_data()[i]; + } + std::iota(prolong_op->get_row_ptrs(), prolong_op->get_row_ptrs() + m + 1, + 0); + std::fill_n(prolong_op->get_values(), m, gko::one()); + auto restrict_op = gko::as(prolong_op->transpose()); + auto d_prolong_op = Csr::create(cuda); + auto d_restrict_op = Csr::create(cuda); + d_prolong_op->copy_from(prolong_op.get()); + d_restrict_op->copy_from(restrict_op.get()); gko::kernels::cuda::amgx_pgm::amgx_pgm_generate( - cuda, d_weight_csr.get(), d_agg, d_csr_coarse.get(), d_csr_temp.get()); + cuda, d_system_mtx.get(), d_prolong_op.get(), d_restrict_op.get(), + d_csr_coarse.get(), d_csr_temp.get()); gko::kernels::reference::amgx_pgm::amgx_pgm_generate( - ref, weight_csr.get(), agg, csr_coarse.get(), csr_temp.get()); + ref, system_mtx.get(), prolong_op.get(), restrict_op.get(), + csr_coarse.get(), csr_temp.get()); + // it should be checked already in renumber + GKO_ASSERT_EQ(num_agg, n); GKO_ASSERT_MTX_NEAR(d_csr_coarse, csr_coarse, 1e-14); } diff --git a/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp b/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp index 32817e5e834..283127ce6d5 100644 --- a/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp +++ b/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp @@ -101,11 +101,13 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( template -void amgx_pgm_generate( - std::shared_ptr exec, - const matrix::Csr *source, - const Array &agg, matrix::Csr *coarse, - matrix::Csr *temp) GKO_NOT_IMPLEMENTED; +void amgx_pgm_generate(std::shared_ptr exec, + const matrix::Csr *source, + const matrix::Csr *prolong_op, + const matrix::Csr *restrict_op, + matrix::Csr *coarse, + matrix::Csr *temp) + GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_GENERATE); diff --git a/hip/multigrid/amgx_pgm_kernels.hip.cpp b/hip/multigrid/amgx_pgm_kernels.hip.cpp index 57498bca7df..e2ed35d42e9 100644 --- a/hip/multigrid/amgx_pgm_kernels.hip.cpp +++ b/hip/multigrid/amgx_pgm_kernels.hip.cpp @@ -113,8 +113,6 @@ void renumber(std::shared_ptr exec, Array &agg, { const auto num = agg.get_num_elems(); Array agg_map(exec, num + 1); - components::fill_array(exec, agg_map.get_data(), agg_map.get_num_elems(), - zero()); const dim3 grid(ceildiv(num, default_block_size)); hipLaunchKernelGGL(kernel::fill_agg_kernel, dim3(grid), dim3(default_block_size), 0, 0, num, @@ -188,10 +186,12 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( template void amgx_pgm_generate(std::shared_ptr exec, const matrix::Csr *source, - const Array &agg, + const matrix::Csr *prolong_op, + const matrix::Csr *restrict_op, matrix::Csr *coarse, matrix::Csr *temp) { + const auto agg_const_val = prolong_op->get_const_col_idxs(); const auto source_nrows = source->get_size()[0]; const auto source_nnz = source->get_num_stored_elements(); const auto coarse_nrows = coarse->get_size()[0]; @@ -204,14 +204,14 @@ void amgx_pgm_generate(std::shared_ptr exec, // agg source_row (for row size) coarse row source map hipLaunchKernelGGL(kernel::get_source_row_map_kernel, dim3(grid), dim3(default_block_size), 0, 0, source_nrows, - agg.get_const_data(), source->get_const_row_ptrs(), + agg_const_val, source->get_const_row_ptrs(), temp->get_row_ptrs(), row_map.get_data()); // prefix sum of temp_row_ptrs components::prefix_sum(exec, temp->get_row_ptrs(), coarse_nrows + 1); // copy source -> to coarse and change column index hipLaunchKernelGGL( kernel::move_row_kernel, dim3(grid), dim3(default_block_size), 0, 0, - source_nrows, agg.get_const_data(), row_map.get_const_data(), + source_nrows, agg_const_val, row_map.get_const_data(), source->get_const_row_ptrs(), source->get_const_col_idxs(), as_hip_type(source->get_const_values()), temp->get_const_row_ptrs(), temp->get_col_idxs(), as_hip_type(temp->get_values())); diff --git a/hip/test/multigrid/amgx_pgm_kernels.cpp b/hip/test/multigrid/amgx_pgm_kernels.cpp index dd1ae6032c0..73909e522aa 100644 --- a/hip/test/multigrid/amgx_pgm_kernels.cpp +++ b/hip/test/multigrid/amgx_pgm_kernels.cpp @@ -97,13 +97,35 @@ class AmgxPgm : public ::testing::Test { ref); } + gko::Array gen_agg_array(gko::size_type num, + gko::size_type num_agg) + { + auto agg_array = gen_array(num, 0, num_agg - 1); + auto agg_array_val = agg_array.get_data(); + std::vector select_agg(num); + std::iota(select_agg.begin(), select_agg.end(), 0); + // use the first num_agg item as the aggregated index. + std::shuffle(select_agg.begin(), select_agg.end(), rand_engine); + // the value of agg_array is the i-th of aggregate group + for (gko::size_type i = 0; i < num; i++) { + agg_array_val[i] = select_agg[agg_array_val[i]]; + } + // the aggregated group must contain the identifier-th element + // agg_val[i] == i holds in the aggregated group whose identifier is i + for (gko::size_type i = 0; i < num_agg; i++) { + auto agg_idx = select_agg[i]; + agg_array_val[agg_idx] = agg_idx; + } + return agg_array; + } + void initialize_data() { - int m = 597; + m = 597; n = 300; int nrhs = 3; - agg = gen_array(m, 0, n - 1); + agg = gen_agg_array(m, n); unfinished_agg = gen_array(m, -1, n - 1); strongest_neighbor = gen_array(m, 0, n - 1); coarse_vector = gen_mtx(n, nrhs); @@ -170,6 +192,7 @@ class AmgxPgm : public ::testing::Test { std::shared_ptr d_system_mtx; gko::size_type n; + gko::size_type m; }; @@ -192,8 +215,10 @@ TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) index_type num_unagg; index_type d_num_unagg; - gko::kernels::reference::amgx_pgm::count_unagg(ref, agg, &num_unagg); - gko::kernels::hip::amgx_pgm::count_unagg(hip, d_agg, &d_num_unagg); + gko::kernels::reference::amgx_pgm::count_unagg(ref, unfinished_agg, + &num_unagg); + gko::kernels::hip::amgx_pgm::count_unagg(hip, d_unfinished_agg, + &d_num_unagg); ASSERT_EQ(d_num_unagg, num_unagg); } @@ -202,8 +227,6 @@ TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) TEST_F(AmgxPgm, RenumberIsEquivalentToRef) { initialize_data(); - auto x = unfinished_agg; - auto d_x = d_unfinished_agg; index_type num_agg; index_type d_num_agg; @@ -212,7 +235,7 @@ TEST_F(AmgxPgm, RenumberIsEquivalentToRef) ASSERT_EQ(d_num_agg, num_agg); GKO_ASSERT_ARRAY_EQ(d_agg, agg); - ASSERT_LE(num_agg, n); + ASSERT_EQ(num_agg, n); } @@ -270,15 +293,34 @@ TEST_F(AmgxPgm, GenerateMtxIsEquivalentToRef) auto csr_coarse = Csr::create(ref, gko::dim<2>{n, n}, 0); auto d_csr_coarse = Csr::create(hip, gko::dim<2>{n, n}, 0); auto csr_temp = Csr::create(ref, gko::dim<2>{n, n}, - weight_csr->get_num_stored_elements()); + system_mtx->get_num_stored_elements()); auto d_csr_temp = Csr::create(hip, gko::dim<2>{n, n}, - d_weight_csr->get_num_stored_elements()); + d_system_mtx->get_num_stored_elements()); + index_type num_agg; + // renumber again + gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); + auto prolong_op = Csr::create(ref, gko::dim<2>{m, n}, m); + for (int i = 0; i < m; i++) { + prolong_op->get_col_idxs()[i] = agg.get_const_data()[i]; + } + std::iota(prolong_op->get_row_ptrs(), prolong_op->get_row_ptrs() + m + 1, + 0); + std::fill_n(prolong_op->get_values(), m, gko::one()); + auto restrict_op = gko::as(prolong_op->transpose()); + auto d_prolong_op = Csr::create(hip); + auto d_restrict_op = Csr::create(hip); + d_prolong_op->copy_from(prolong_op.get()); + d_restrict_op->copy_from(restrict_op.get()); gko::kernels::hip::amgx_pgm::amgx_pgm_generate( - hip, d_weight_csr.get(), d_agg, d_csr_coarse.get(), d_csr_temp.get()); + hip, d_system_mtx.get(), d_prolong_op.get(), d_restrict_op.get(), + d_csr_coarse.get(), d_csr_temp.get()); gko::kernels::reference::amgx_pgm::amgx_pgm_generate( - ref, weight_csr.get(), agg, csr_coarse.get(), csr_temp.get()); + ref, system_mtx.get(), prolong_op.get(), restrict_op.get(), + csr_coarse.get(), csr_temp.get()); + // it should be checked already in renumber + GKO_ASSERT_EQ(num_agg, n); GKO_ASSERT_MTX_NEAR(d_csr_coarse, csr_coarse, 1e-14); } diff --git a/omp/multigrid/amgx_pgm_kernels.cpp b/omp/multigrid/amgx_pgm_kernels.cpp index 408478242dc..dc6329d982c 100644 --- a/omp/multigrid/amgx_pgm_kernels.cpp +++ b/omp/multigrid/amgx_pgm_kernels.cpp @@ -113,10 +113,11 @@ void renumber(std::shared_ptr exec, Array &agg, Array agg_map(exec, num + 1); auto agg_vals = agg.get_data(); auto agg_map_vals = agg_map.get_data(); - components::fill_array(exec, agg_map_vals, num + 1, zero()); + // agg_vals[i] == i always holds in the aggregated group whose identifier is + // i because we use the index of element as the aggregated group identifier. #pragma omp parallel for for (size_type i = 0; i < num; i++) { - agg_map_vals[agg_vals[i]] = 1; + agg_map_vals[i] = (agg_vals[i] == i); } components::prefix_sum(exec, agg_map_vals, num + 1); #pragma omp parallel for @@ -241,11 +242,13 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( template void amgx_pgm_generate(std::shared_ptr exec, const matrix::Csr *source, - const Array &agg, + const matrix::Csr *prolong_op, + const matrix::Csr *restrict_op, matrix::Csr *coarse, matrix::Csr *temp) { // agg[i] -> I, agg[j] -> J + const auto agg_const_val = prolong_op->get_const_col_idxs(); const auto coarse_nrows = coarse->get_size()[0]; const auto source_nrows = source->get_size()[0]; const auto source_row_ptrs = source->get_const_row_ptrs(); @@ -254,9 +257,9 @@ void amgx_pgm_generate(std::shared_ptr exec, vector> row_list( source_nrows, map{exec}, exec); for (size_type i = 0; i < source_nrows; i++) { - IndexType row_idx = agg.get_const_data()[i]; + IndexType row_idx = agg_const_val[i]; for (auto j = source_row_ptrs[i]; j < source_row_ptrs[i + 1]; j++) { - const auto col = agg.get_const_data()[source_col_idxs[j]]; + const auto col = agg_const_val[source_col_idxs[j]]; const auto val = source_vals[j]; row_list[row_idx][col] += val; } diff --git a/omp/test/multigrid/amgx_pgm_kernels.cpp b/omp/test/multigrid/amgx_pgm_kernels.cpp index bd20b3749b1..91dc2c2fce8 100644 --- a/omp/test/multigrid/amgx_pgm_kernels.cpp +++ b/omp/test/multigrid/amgx_pgm_kernels.cpp @@ -88,13 +88,35 @@ class AmgxPgm : public ::testing::Test { ref); } + gko::Array gen_agg_array(gko::size_type num, + gko::size_type num_agg) + { + auto agg_array = gen_array(num, 0, num_agg - 1); + auto agg_array_val = agg_array.get_data(); + std::vector select_agg(num); + std::iota(select_agg.begin(), select_agg.end(), 0); + // use the first num_agg item as the aggregated index. + std::shuffle(select_agg.begin(), select_agg.end(), rand_engine); + // the value of agg_array is the i-th of aggregate group + for (gko::size_type i = 0; i < num; i++) { + agg_array_val[i] = select_agg[agg_array_val[i]]; + } + // the aggregated group must contain the identifier-th element + // agg_val[i] == i holds in the aggregated group whose identifier is i + for (gko::size_type i = 0; i < num_agg; i++) { + auto agg_idx = select_agg[i]; + agg_array_val[agg_idx] = agg_idx; + } + return agg_array; + } + void initialize_data() { - int m = 597; + m = 597; n = 300; int nrhs = 3; - agg = gen_array(m, 0, n - 1); + agg = gen_agg_array(m, n); unfinished_agg = gen_array(m, -1, n - 1); strongest_neighbor = gen_array(m, 0, n - 1); coarse_vector = gen_mtx(n, nrhs); @@ -161,6 +183,7 @@ class AmgxPgm : public ::testing::Test { std::shared_ptr d_system_mtx; gko::size_type n; + gko::size_type m; }; @@ -183,8 +206,10 @@ TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) index_type num_unagg; index_type d_num_unagg; - gko::kernels::reference::amgx_pgm::count_unagg(ref, agg, &num_unagg); - gko::kernels::omp::amgx_pgm::count_unagg(omp, d_agg, &d_num_unagg); + gko::kernels::reference::amgx_pgm::count_unagg(ref, unfinished_agg, + &num_unagg); + gko::kernels::omp::amgx_pgm::count_unagg(omp, d_unfinished_agg, + &d_num_unagg); ASSERT_EQ(d_num_unagg, num_unagg); } @@ -193,8 +218,6 @@ TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) TEST_F(AmgxPgm, RenumberIsEquivalentToRef) { initialize_data(); - auto x = unfinished_agg; - auto d_x = d_unfinished_agg; index_type num_agg; index_type d_num_agg; @@ -203,7 +226,7 @@ TEST_F(AmgxPgm, RenumberIsEquivalentToRef) ASSERT_EQ(d_num_agg, num_agg); GKO_ASSERT_ARRAY_EQ(d_agg, agg); - ASSERT_LE(num_agg, n); + ASSERT_EQ(num_agg, n); } @@ -261,15 +284,34 @@ TEST_F(AmgxPgm, GenerateMtxIsEquivalentToRef) auto csr_coarse = Csr::create(ref, gko::dim<2>{n, n}, 0); auto d_csr_coarse = Csr::create(omp, gko::dim<2>{n, n}, 0); auto csr_temp = Csr::create(ref, gko::dim<2>{n, n}, - weight_csr->get_num_stored_elements()); + system_mtx->get_num_stored_elements()); auto d_csr_temp = Csr::create(omp, gko::dim<2>{n, n}, - d_weight_csr->get_num_stored_elements()); + d_system_mtx->get_num_stored_elements()); + index_type num_agg; + // renumber again + gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); + auto prolong_op = Csr::create(ref, gko::dim<2>{m, n}, m); + for (int i = 0; i < m; i++) { + prolong_op->get_col_idxs()[i] = agg.get_const_data()[i]; + } + std::iota(prolong_op->get_row_ptrs(), prolong_op->get_row_ptrs() + m + 1, + 0); + std::fill_n(prolong_op->get_values(), m, gko::one()); + auto restrict_op = gko::as(prolong_op->transpose()); + auto d_prolong_op = Csr::create(omp); + auto d_restrict_op = Csr::create(omp); + d_prolong_op->copy_from(prolong_op.get()); + d_restrict_op->copy_from(restrict_op.get()); gko::kernels::omp::amgx_pgm::amgx_pgm_generate( - omp, d_weight_csr.get(), d_agg, d_csr_coarse.get(), d_csr_temp.get()); + omp, d_system_mtx.get(), d_prolong_op.get(), d_restrict_op.get(), + d_csr_coarse.get(), d_csr_temp.get()); gko::kernels::reference::amgx_pgm::amgx_pgm_generate( - ref, weight_csr.get(), agg, csr_coarse.get(), csr_temp.get()); + ref, system_mtx.get(), prolong_op.get(), restrict_op.get(), + csr_coarse.get(), csr_temp.get()); + // it should be checked already in renumber + GKO_ASSERT_EQ(num_agg, n); GKO_ASSERT_MTX_NEAR(d_csr_coarse, csr_coarse, 1e-14); } diff --git a/reference/multigrid/amgx_pgm_kernels.cpp b/reference/multigrid/amgx_pgm_kernels.cpp index 16002259fcf..7d03fdff9f0 100644 --- a/reference/multigrid/amgx_pgm_kernels.cpp +++ b/reference/multigrid/amgx_pgm_kernels.cpp @@ -234,11 +234,13 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( template void amgx_pgm_generate(std::shared_ptr exec, const matrix::Csr *source, - const Array &agg, + const matrix::Csr *prolong_op, + const matrix::Csr *restrict_op, matrix::Csr *coarse, matrix::Csr *temp) { // agg[i] -> I, agg[j] -> J + const auto agg_const_val = prolong_op->get_const_col_idxs(); const auto coarse_nrows = coarse->get_size()[0]; const auto source_nrows = source->get_size()[0]; const auto source_row_ptrs = source->get_const_row_ptrs(); @@ -247,9 +249,9 @@ void amgx_pgm_generate(std::shared_ptr exec, gko::vector> row_list( source_nrows, gko::map{exec}, exec); for (size_type i = 0; i < source_nrows; i++) { - IndexType row_idx = agg.get_const_data()[i]; + IndexType row_idx = agg_const_val[i]; for (auto j = source_row_ptrs[i]; j < source_row_ptrs[i + 1]; j++) { - const auto col = agg.get_const_data()[source_col_idxs[j]]; + const auto col = agg_const_val[source_col_idxs[j]]; const auto val = source_vals[j]; row_list[row_idx][col] += val; } diff --git a/reference/test/multigrid/amgx_pgm_kernels.cpp b/reference/test/multigrid/amgx_pgm_kernels.cpp index 09918310823..2cff6c8537b 100644 --- a/reference/test/multigrid/amgx_pgm_kernels.cpp +++ b/reference/test/multigrid/amgx_pgm_kernels.cpp @@ -508,11 +508,10 @@ TYPED_TEST(AmgxPgm, GenerateMtx) gko::Array agg(this->exec, 5); auto agg_vals = agg.get_data(); // 0 - 2, 1 - 3, 4 - agg_vals[0] = 0; - agg_vals[1] = 1; - agg_vals[2] = 0; - agg_vals[3] = 1; - agg_vals[4] = 2; + auto prolong_op = mtx_type::create(this->exec, gko::dim<2>{5, 3}, 0); + prolong_op->read( + {{5, 3}, {{0, 0, 1}, {1, 1, 1}, {2, 0, 1}, {3, 1, 1}, {4, 2, 1}}}); + auto restrict_op = gko::as(prolong_op->transpose()); auto coarse_ans = mtx_type::create(this->exec, gko::dim<2>{3, 3}, 0); coarse_ans->read({{3, 3}, {{0, 0, 4}, @@ -528,7 +527,8 @@ TYPED_TEST(AmgxPgm, GenerateMtx) auto empty = gko::matrix::Csr::create(this->exec); gko::kernels::reference::amgx_pgm::amgx_pgm_generate( - this->exec, this->mtx.get(), agg, csr_coarse.get(), empty.get()); + this->exec, this->mtx.get(), prolong_op.get(), restrict_op.get(), + csr_coarse.get(), empty.get()); GKO_ASSERT_MTX_NEAR(csr_coarse, coarse_ans, r::value); } From 8b5901bd64bf1417f2b1bc4191fa3c93d1f4ed33 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Fri, 30 Apr 2021 20:56:15 +0800 Subject: [PATCH 16/16] use two csr multiplication to generate coarse need to improve it with less memory footprint --- common/multigrid/amgx_pgm_kernels.hpp.inc | 102 ------------------ core/device_hooks/common_kernels.inc.cpp | 5 - core/multigrid/amgx_pgm.cpp | 36 ++----- core/multigrid/amgx_pgm_kernels.hpp | 13 +-- cuda/multigrid/amgx_pgm_kernels.cu | 59 ---------- cuda/test/multigrid/amgx_pgm_kernels.cpp | 38 ------- dpcpp/multigrid/amgx_pgm_kernels.dp.cpp | 12 --- hip/multigrid/amgx_pgm_kernels.hip.cpp | 63 ----------- hip/test/multigrid/amgx_pgm_kernels.cpp | 38 ------- omp/multigrid/amgx_pgm_kernels.cpp | 54 ---------- omp/test/multigrid/amgx_pgm_kernels.cpp | 38 ------- reference/multigrid/amgx_pgm_kernels.cpp | 53 --------- reference/test/multigrid/amgx_pgm_kernels.cpp | 43 +++----- 13 files changed, 24 insertions(+), 530 deletions(-) diff --git a/common/multigrid/amgx_pgm_kernels.hpp.inc b/common/multigrid/amgx_pgm_kernels.hpp.inc index 3ec6bf81a7b..540478ba427 100644 --- a/common/multigrid/amgx_pgm_kernels.hpp.inc +++ b/common/multigrid/amgx_pgm_kernels.hpp.inc @@ -225,106 +225,4 @@ __global__ } -template -__global__ __launch_bounds__(default_block_size) void get_source_row_map_kernel( - const size_type source_nrows, const IndexType *__restrict__ agg_val, - const IndexType *__restrict__ source_row_ptrs, - IndexType *__restrict__ result_row_ptrs, IndexType *__restrict__ row_map) -{ - auto row = thread::get_thread_id_flat(); - if (row >= source_nrows) { - return; - } - const auto num_elems = source_row_ptrs[row + 1] - source_row_ptrs[row]; - const auto result_idx = agg_val[row]; - // atomic_add returns the old value, so it can be the starting point. - row_map[row] = atomic_add(result_row_ptrs + result_idx, num_elems); -} - - -template -__global__ __launch_bounds__(default_block_size) void move_row_kernel( - const size_type source_nrows, const IndexType *__restrict__ agg_val, - const IndexType *__restrict__ row_map, - const IndexType *__restrict__ source_row_ptrs, - const IndexType *__restrict__ source_col_idxs, - const ValueType *__restrict__ source_values, - const IndexType *__restrict__ result_row_ptrs, - IndexType *__restrict__ result_col_idxs, - ValueType *__restrict__ result_values) -{ - auto row = thread::get_thread_id_flat(); - if (row >= source_nrows) { - return; - } - auto result_i = result_row_ptrs[agg_val[row]] + row_map[row]; - for (auto i = source_row_ptrs[row]; i < source_row_ptrs[row + 1]; - i++, result_i++) { - result_col_idxs[result_i] = agg_val[source_col_idxs[i]]; - result_values[result_i] = source_values[i]; - } -} - - -template -__global__ __launch_bounds__(default_block_size) void merge_col_kernel( - const size_type nrows, const IndexType *__restrict__ temp_row_ptrs, - IndexType *__restrict__ temp_col_idxs, ValueType *__restrict__ temp_values, - IndexType *__restrict__ coarse_row_ptrs) -{ - auto row = thread::get_thread_id_flat(); - if (row >= nrows) { - return; - } - - IndexType num_elems = zero(); - const auto start = temp_row_ptrs[row]; - const auto end = temp_row_ptrs[row + 1]; - IndexType col = temp_col_idxs[start]; - ValueType value = temp_values[start]; - for (auto i = start + 1; i < end; i++) { - const auto current_col = temp_col_idxs[i]; - if (current_col != col) { - // apply to the original data. It is sorted, so the writing position - // is before read position - temp_col_idxs[start + num_elems] = col; - temp_values[start + num_elems] = value; - value = zero(); - col = current_col; - num_elems++; - } - value += temp_values[i]; - } - // If start != end, need to process the final column - if (start != end) { - temp_col_idxs[start + num_elems] = col; - temp_values[start + num_elems] = value; - num_elems++; - } - coarse_row_ptrs[row] = num_elems; -} - - -template -__global__ __launch_bounds__(default_block_size) void copy_to_coarse_kernel( - const size_type nrows, const IndexType *__restrict__ temp_row_ptrs, - const IndexType *__restrict__ temp_col_idxs, - const ValueType *__restrict__ temp_values, - const IndexType *__restrict__ coarse_row_ptrs, - IndexType *__restrict__ coarse_col_idxs, - ValueType *__restrict__ coarse_values) -{ - auto row = thread::get_thread_id_flat(); - if (row >= nrows) { - return; - } - auto temp_i = temp_row_ptrs[row]; - for (auto i = coarse_row_ptrs[row]; i < coarse_row_ptrs[row + 1]; i++) { - coarse_col_idxs[i] = temp_col_idxs[temp_i]; - coarse_values[i] = temp_values[temp_i]; - temp_i++; - } -} - - } // namespace kernel diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index cbf1e2d9737..877051c3e04 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -1328,11 +1328,6 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); -template -GKO_DECLARE_AMGX_PGM_GENERATE(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_GENERATE); - } // namespace amgx_pgm diff --git a/core/multigrid/amgx_pgm.cpp b/core/multigrid/amgx_pgm.cpp index d9fac590691..83421932016 100644 --- a/core/multigrid/amgx_pgm.cpp +++ b/core/multigrid/amgx_pgm.cpp @@ -60,7 +60,6 @@ GKO_REGISTER_OPERATION(renumber, amgx_pgm::renumber); GKO_REGISTER_OPERATION(find_strongest_neighbor, amgx_pgm::find_strongest_neighbor); GKO_REGISTER_OPERATION(assign_to_exist_agg, amgx_pgm::assign_to_exist_agg); -GKO_REGISTER_OPERATION(amgx_pgm_generate, amgx_pgm::amgx_pgm_generate); GKO_REGISTER_OPERATION(fill_array, components::fill_array); GKO_REGISTER_OPERATION(fill_seq_array, components::fill_seq_array); @@ -68,30 +67,6 @@ GKO_REGISTER_OPERATION(fill_seq_array, components::fill_seq_array); } // namespace amgx_pgm -namespace { - - -template -std::unique_ptr amgx_pgm_generate( - std::shared_ptr exec, - const matrix::Csr *source, - const matrix::Csr *prolong_op, - const matrix::Csr *restrict_op) -{ - auto num_agg = prolong_op->get_size()[1]; - auto coarse = matrix::Csr::create( - exec, dim<2>{num_agg, num_agg}, 0, source->get_strategy()); - auto temp = matrix::Csr::create( - exec, dim<2>{num_agg, num_agg}, source->get_num_stored_elements()); - exec->run(amgx_pgm::make_amgx_pgm_generate(source, prolong_op, restrict_op, - coarse.get(), temp.get())); - return std::move(coarse); -} - - -} // namespace - - template void AmgxPgm::generate() { @@ -160,7 +135,7 @@ void AmgxPgm::generate() // Renumber the index exec->run(amgx_pgm::make_renumber(agg_, &num_agg)); - auto coarse_dim = num_agg; + gko::dim<2>::dimension_type coarse_dim = num_agg; auto fine_dim = system_matrix_->get_size()[0]; // TODO: prolong_op can be done with lightway format auto prolong_op = share( @@ -175,8 +150,13 @@ void AmgxPgm::generate() auto restrict_op = gko::as(share(prolong_op->transpose())); // Construct the coarse matrix - auto coarse_matrix = share(amgx_pgm_generate( - exec, amgxpgm_op, prolong_op.get(), restrict_op.get())); + // TODO: use less memory footprint to improve it + auto coarse_matrix = + share(matrix_type::create(exec, gko::dim<2>{coarse_dim, coarse_dim})); + auto tmp = matrix_type::create(exec, gko::dim<2>{coarse_dim, fine_dim}); + restrict_op->apply(amgxpgm_op, tmp.get()); + tmp->apply(prolong_op.get(), coarse_matrix.get()); + this->set_multigrid_level(prolong_op, coarse_matrix, restrict_op); } diff --git a/core/multigrid/amgx_pgm_kernels.hpp b/core/multigrid/amgx_pgm_kernels.hpp index 5bbb48b9594..793780ae505 100644 --- a/core/multigrid/amgx_pgm_kernels.hpp +++ b/core/multigrid/amgx_pgm_kernels.hpp @@ -78,15 +78,6 @@ namespace amgx_pgm { const matrix::Diagonal *diag, Array &agg, \ Array &intermediate_agg) -#define GKO_DECLARE_AMGX_PGM_GENERATE(ValueType, IndexType) \ - void amgx_pgm_generate( \ - std::shared_ptr exec, \ - const matrix::Csr *source, \ - const matrix::Csr *prolong_op, \ - const matrix::Csr *restrict_op, \ - matrix::Csr *coarse, \ - matrix::Csr *temp) - #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL(IndexType); \ @@ -97,9 +88,7 @@ namespace amgx_pgm { template \ GKO_DECLARE_AMGX_PGM_FIND_STRONGEST_NEIGHBOR(ValueType, IndexType); \ template \ - GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG(ValueType, IndexType); \ - template \ - GKO_DECLARE_AMGX_PGM_GENERATE(ValueType, IndexType) + GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG(ValueType, IndexType) } // namespace amgx_pgm diff --git a/cuda/multigrid/amgx_pgm_kernels.cu b/cuda/multigrid/amgx_pgm_kernels.cu index 03f657573a7..15d9940b22e 100644 --- a/cuda/multigrid/amgx_pgm_kernels.cu +++ b/cuda/multigrid/amgx_pgm_kernels.cu @@ -172,65 +172,6 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); -template -void amgx_pgm_generate(std::shared_ptr exec, - const matrix::Csr *source, - const matrix::Csr *prolong_op, - const matrix::Csr *restrict_op, - matrix::Csr *coarse, - matrix::Csr *temp) -{ - const auto agg_const_val = prolong_op->get_const_col_idxs(); - const auto source_nrows = source->get_size()[0]; - const auto source_nnz = source->get_num_stored_elements(); - const auto coarse_nrows = coarse->get_size()[0]; - Array row_map(exec, source_nrows); - // fill coarse row pointer as zero - components::fill_array(exec, temp->get_row_ptrs(), coarse_nrows + 1, - zero()); - // compute each source row should be moved and also change column index - dim3 grid(ceildiv(source_nrows, default_block_size)); - // agg source_row (for row size) coarse row source map - kernel::get_source_row_map_kernel<<>>( - source_nrows, agg_const_val, source->get_const_row_ptrs(), - temp->get_row_ptrs(), row_map.get_data()); - // prefix sum of temp_row_ptrs - components::prefix_sum(exec, temp->get_row_ptrs(), coarse_nrows + 1); - // copy source -> to coarse and change column index - kernel::move_row_kernel<<>>( - source_nrows, agg_const_val, row_map.get_const_data(), - source->get_const_row_ptrs(), source->get_const_col_idxs(), - as_cuda_type(source->get_const_values()), temp->get_const_row_ptrs(), - temp->get_col_idxs(), as_cuda_type(temp->get_values())); - // sort csr - csr::sort_by_column_index(exec, temp); - // summation of the elements with same position - grid = ceildiv(coarse_nrows, default_block_size); - kernel::merge_col_kernel<<>>( - coarse_nrows, temp->get_const_row_ptrs(), temp->get_col_idxs(), - as_cuda_type(temp->get_values()), coarse->get_row_ptrs()); - // build the coarse matrix - components::prefix_sum(exec, coarse->get_row_ptrs(), coarse_nrows + 1); - // prefix sum of coarse->get_row_ptrs - const auto coarse_nnz = - exec->copy_val_to_host(coarse->get_row_ptrs() + coarse_nrows); - // reallocate size of column and values - matrix::CsrBuilder coarse_builder{coarse}; - auto &coarse_col_idxs_array = coarse_builder.get_col_idx_array(); - auto &coarse_vals_array = coarse_builder.get_value_array(); - coarse_col_idxs_array.resize_and_reset(coarse_nnz); - coarse_vals_array.resize_and_reset(coarse_nnz); - // copy the result - kernel::copy_to_coarse_kernel<<>>( - coarse_nrows, temp->get_const_row_ptrs(), temp->get_const_col_idxs(), - as_cuda_type(temp->get_const_values()), coarse->get_const_row_ptrs(), - coarse_col_idxs_array.get_data(), - as_cuda_type(coarse_vals_array.get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_GENERATE); - - } // namespace amgx_pgm } // namespace cuda } // namespace kernels diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index b92934ce089..07672359615 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -288,44 +288,6 @@ TEST_F(AmgxPgm, AssignToExistAggUnderteminsticIsEquivalentToRef) } -TEST_F(AmgxPgm, GenerateMtxIsEquivalentToRef) -{ - initialize_data(); - auto csr_coarse = Csr::create(ref, gko::dim<2>{n, n}, 0); - auto d_csr_coarse = Csr::create(cuda, gko::dim<2>{n, n}, 0); - auto csr_temp = Csr::create(ref, gko::dim<2>{n, n}, - system_mtx->get_num_stored_elements()); - auto d_csr_temp = Csr::create(cuda, gko::dim<2>{n, n}, - d_system_mtx->get_num_stored_elements()); - index_type num_agg; - // renumber again - gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); - auto prolong_op = Csr::create(ref, gko::dim<2>{m, n}, m); - for (int i = 0; i < m; i++) { - prolong_op->get_col_idxs()[i] = agg.get_const_data()[i]; - } - std::iota(prolong_op->get_row_ptrs(), prolong_op->get_row_ptrs() + m + 1, - 0); - std::fill_n(prolong_op->get_values(), m, gko::one()); - auto restrict_op = gko::as(prolong_op->transpose()); - auto d_prolong_op = Csr::create(cuda); - auto d_restrict_op = Csr::create(cuda); - d_prolong_op->copy_from(prolong_op.get()); - d_restrict_op->copy_from(restrict_op.get()); - - gko::kernels::cuda::amgx_pgm::amgx_pgm_generate( - cuda, d_system_mtx.get(), d_prolong_op.get(), d_restrict_op.get(), - d_csr_coarse.get(), d_csr_temp.get()); - gko::kernels::reference::amgx_pgm::amgx_pgm_generate( - ref, system_mtx.get(), prolong_op.get(), restrict_op.get(), - csr_coarse.get(), csr_temp.get()); - - // it should be checked already in renumber - GKO_ASSERT_EQ(num_agg, n); - GKO_ASSERT_MTX_NEAR(d_csr_coarse, csr_coarse, 1e-14); -} - - TEST_F(AmgxPgm, GenerateMgLevelIsEquivalentToRef) { initialize_data(); diff --git a/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp b/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp index 283127ce6d5..14baa306bca 100644 --- a/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp +++ b/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp @@ -100,18 +100,6 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); -template -void amgx_pgm_generate(std::shared_ptr exec, - const matrix::Csr *source, - const matrix::Csr *prolong_op, - const matrix::Csr *restrict_op, - matrix::Csr *coarse, - matrix::Csr *temp) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_GENERATE); - - } // namespace amgx_pgm } // namespace dpcpp } // namespace kernels diff --git a/hip/multigrid/amgx_pgm_kernels.hip.cpp b/hip/multigrid/amgx_pgm_kernels.hip.cpp index e2ed35d42e9..d7f4685b785 100644 --- a/hip/multigrid/amgx_pgm_kernels.hip.cpp +++ b/hip/multigrid/amgx_pgm_kernels.hip.cpp @@ -183,69 +183,6 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); -template -void amgx_pgm_generate(std::shared_ptr exec, - const matrix::Csr *source, - const matrix::Csr *prolong_op, - const matrix::Csr *restrict_op, - matrix::Csr *coarse, - matrix::Csr *temp) -{ - const auto agg_const_val = prolong_op->get_const_col_idxs(); - const auto source_nrows = source->get_size()[0]; - const auto source_nnz = source->get_num_stored_elements(); - const auto coarse_nrows = coarse->get_size()[0]; - Array row_map(exec, source_nrows); - // fill coarse row pointer as zero - components::fill_array(exec, temp->get_row_ptrs(), coarse_nrows + 1, - zero()); - // compute each source row should be moved and also change column index - dim3 grid(ceildiv(source_nrows, default_block_size)); - // agg source_row (for row size) coarse row source map - hipLaunchKernelGGL(kernel::get_source_row_map_kernel, dim3(grid), - dim3(default_block_size), 0, 0, source_nrows, - agg_const_val, source->get_const_row_ptrs(), - temp->get_row_ptrs(), row_map.get_data()); - // prefix sum of temp_row_ptrs - components::prefix_sum(exec, temp->get_row_ptrs(), coarse_nrows + 1); - // copy source -> to coarse and change column index - hipLaunchKernelGGL( - kernel::move_row_kernel, dim3(grid), dim3(default_block_size), 0, 0, - source_nrows, agg_const_val, row_map.get_const_data(), - source->get_const_row_ptrs(), source->get_const_col_idxs(), - as_hip_type(source->get_const_values()), temp->get_const_row_ptrs(), - temp->get_col_idxs(), as_hip_type(temp->get_values())); - // sort csr - csr::sort_by_column_index(exec, temp); - // summation of the elements with same position - grid = ceildiv(coarse_nrows, default_block_size); - hipLaunchKernelGGL(kernel::merge_col_kernel, dim3(grid), - dim3(default_block_size), 0, 0, coarse_nrows, - temp->get_const_row_ptrs(), temp->get_col_idxs(), - as_hip_type(temp->get_values()), coarse->get_row_ptrs()); - // build the coarse matrix - components::prefix_sum(exec, coarse->get_row_ptrs(), coarse_nrows + 1); - // prefix sum of coarse->get_row_ptrs - const auto coarse_nnz = - exec->copy_val_to_host(coarse->get_row_ptrs() + coarse_nrows); - // reallocate size of column and values - matrix::CsrBuilder coarse_builder{coarse}; - auto &coarse_col_idxs_array = coarse_builder.get_col_idx_array(); - auto &coarse_vals_array = coarse_builder.get_value_array(); - coarse_col_idxs_array.resize_and_reset(coarse_nnz); - coarse_vals_array.resize_and_reset(coarse_nnz); - // copy the result - hipLaunchKernelGGL( - kernel::copy_to_coarse_kernel, dim3(grid), dim3(default_block_size), 0, - 0, coarse_nrows, temp->get_const_row_ptrs(), temp->get_const_col_idxs(), - as_hip_type(temp->get_const_values()), coarse->get_const_row_ptrs(), - coarse_col_idxs_array.get_data(), - as_hip_type(coarse_vals_array.get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_GENERATE); - - } // namespace amgx_pgm } // namespace hip } // namespace kernels diff --git a/hip/test/multigrid/amgx_pgm_kernels.cpp b/hip/test/multigrid/amgx_pgm_kernels.cpp index 73909e522aa..879eae1876e 100644 --- a/hip/test/multigrid/amgx_pgm_kernels.cpp +++ b/hip/test/multigrid/amgx_pgm_kernels.cpp @@ -287,44 +287,6 @@ TEST_F(AmgxPgm, AssignToExistAggUnderteminsticIsEquivalentToRef) } -TEST_F(AmgxPgm, GenerateMtxIsEquivalentToRef) -{ - initialize_data(); - auto csr_coarse = Csr::create(ref, gko::dim<2>{n, n}, 0); - auto d_csr_coarse = Csr::create(hip, gko::dim<2>{n, n}, 0); - auto csr_temp = Csr::create(ref, gko::dim<2>{n, n}, - system_mtx->get_num_stored_elements()); - auto d_csr_temp = Csr::create(hip, gko::dim<2>{n, n}, - d_system_mtx->get_num_stored_elements()); - index_type num_agg; - // renumber again - gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); - auto prolong_op = Csr::create(ref, gko::dim<2>{m, n}, m); - for (int i = 0; i < m; i++) { - prolong_op->get_col_idxs()[i] = agg.get_const_data()[i]; - } - std::iota(prolong_op->get_row_ptrs(), prolong_op->get_row_ptrs() + m + 1, - 0); - std::fill_n(prolong_op->get_values(), m, gko::one()); - auto restrict_op = gko::as(prolong_op->transpose()); - auto d_prolong_op = Csr::create(hip); - auto d_restrict_op = Csr::create(hip); - d_prolong_op->copy_from(prolong_op.get()); - d_restrict_op->copy_from(restrict_op.get()); - - gko::kernels::hip::amgx_pgm::amgx_pgm_generate( - hip, d_system_mtx.get(), d_prolong_op.get(), d_restrict_op.get(), - d_csr_coarse.get(), d_csr_temp.get()); - gko::kernels::reference::amgx_pgm::amgx_pgm_generate( - ref, system_mtx.get(), prolong_op.get(), restrict_op.get(), - csr_coarse.get(), csr_temp.get()); - - // it should be checked already in renumber - GKO_ASSERT_EQ(num_agg, n); - GKO_ASSERT_MTX_NEAR(d_csr_coarse, csr_coarse, 1e-14); -} - - TEST_F(AmgxPgm, GenerateMgLevelIsEquivalentToRef) { initialize_data(); diff --git a/omp/multigrid/amgx_pgm_kernels.cpp b/omp/multigrid/amgx_pgm_kernels.cpp index dc6329d982c..07e84630d85 100644 --- a/omp/multigrid/amgx_pgm_kernels.cpp +++ b/omp/multigrid/amgx_pgm_kernels.cpp @@ -239,60 +239,6 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); -template -void amgx_pgm_generate(std::shared_ptr exec, - const matrix::Csr *source, - const matrix::Csr *prolong_op, - const matrix::Csr *restrict_op, - matrix::Csr *coarse, - matrix::Csr *temp) -{ - // agg[i] -> I, agg[j] -> J - const auto agg_const_val = prolong_op->get_const_col_idxs(); - const auto coarse_nrows = coarse->get_size()[0]; - const auto source_nrows = source->get_size()[0]; - const auto source_row_ptrs = source->get_const_row_ptrs(); - const auto source_col_idxs = source->get_const_col_idxs(); - const auto source_vals = source->get_const_values(); - vector> row_list( - source_nrows, map{exec}, exec); - for (size_type i = 0; i < source_nrows; i++) { - IndexType row_idx = agg_const_val[i]; - for (auto j = source_row_ptrs[i]; j < source_row_ptrs[i + 1]; j++) { - const auto col = agg_const_val[source_col_idxs[j]]; - const auto val = source_vals[j]; - row_list[row_idx][col] += val; - } - } - auto coarse_row_ptrs = coarse->get_row_ptrs(); -#pragma omp parallel for - for (size_type i = 0; i < coarse_nrows; i++) { - coarse_row_ptrs[i] = row_list[i].size(); - } - components::prefix_sum(exec, coarse_row_ptrs, coarse_nrows + 1); - - auto nnz = coarse_row_ptrs[coarse_nrows]; - matrix::CsrBuilder coarse_builder{coarse}; - auto &coarse_col_idxs_array = coarse_builder.get_col_idx_array(); - auto &coarse_vals_array = coarse_builder.get_value_array(); - coarse_col_idxs_array.resize_and_reset(nnz); - coarse_vals_array.resize_and_reset(nnz); - auto coarse_col_idxs = coarse_col_idxs_array.get_data(); - auto coarse_vals = coarse_vals_array.get_data(); -#pragma omp parallel for - for (size_type i = 0; i < coarse_nrows; i++) { - auto ind = coarse_row_ptrs[i]; - for (auto pair : row_list[i]) { - coarse_col_idxs[ind] = pair.first; - coarse_vals[ind] = pair.second; - ind++; - } - } -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_GENERATE); - - } // namespace amgx_pgm } // namespace omp } // namespace kernels diff --git a/omp/test/multigrid/amgx_pgm_kernels.cpp b/omp/test/multigrid/amgx_pgm_kernels.cpp index 91dc2c2fce8..59b40657703 100644 --- a/omp/test/multigrid/amgx_pgm_kernels.cpp +++ b/omp/test/multigrid/amgx_pgm_kernels.cpp @@ -278,44 +278,6 @@ TEST_F(AmgxPgm, AssignToExistAggUnderteminsticIsEquivalentToRef) } -TEST_F(AmgxPgm, GenerateMtxIsEquivalentToRef) -{ - initialize_data(); - auto csr_coarse = Csr::create(ref, gko::dim<2>{n, n}, 0); - auto d_csr_coarse = Csr::create(omp, gko::dim<2>{n, n}, 0); - auto csr_temp = Csr::create(ref, gko::dim<2>{n, n}, - system_mtx->get_num_stored_elements()); - auto d_csr_temp = Csr::create(omp, gko::dim<2>{n, n}, - d_system_mtx->get_num_stored_elements()); - index_type num_agg; - // renumber again - gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); - auto prolong_op = Csr::create(ref, gko::dim<2>{m, n}, m); - for (int i = 0; i < m; i++) { - prolong_op->get_col_idxs()[i] = agg.get_const_data()[i]; - } - std::iota(prolong_op->get_row_ptrs(), prolong_op->get_row_ptrs() + m + 1, - 0); - std::fill_n(prolong_op->get_values(), m, gko::one()); - auto restrict_op = gko::as(prolong_op->transpose()); - auto d_prolong_op = Csr::create(omp); - auto d_restrict_op = Csr::create(omp); - d_prolong_op->copy_from(prolong_op.get()); - d_restrict_op->copy_from(restrict_op.get()); - - gko::kernels::omp::amgx_pgm::amgx_pgm_generate( - omp, d_system_mtx.get(), d_prolong_op.get(), d_restrict_op.get(), - d_csr_coarse.get(), d_csr_temp.get()); - gko::kernels::reference::amgx_pgm::amgx_pgm_generate( - ref, system_mtx.get(), prolong_op.get(), restrict_op.get(), - csr_coarse.get(), csr_temp.get()); - - // it should be checked already in renumber - GKO_ASSERT_EQ(num_agg, n); - GKO_ASSERT_MTX_NEAR(d_csr_coarse, csr_coarse, 1e-14); -} - - TEST_F(AmgxPgm, GenerateMgLevelIsEquivalentToRef) { initialize_data(); diff --git a/reference/multigrid/amgx_pgm_kernels.cpp b/reference/multigrid/amgx_pgm_kernels.cpp index 7d03fdff9f0..5ad1ff8d108 100644 --- a/reference/multigrid/amgx_pgm_kernels.cpp +++ b/reference/multigrid/amgx_pgm_kernels.cpp @@ -231,59 +231,6 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); -template -void amgx_pgm_generate(std::shared_ptr exec, - const matrix::Csr *source, - const matrix::Csr *prolong_op, - const matrix::Csr *restrict_op, - matrix::Csr *coarse, - matrix::Csr *temp) -{ - // agg[i] -> I, agg[j] -> J - const auto agg_const_val = prolong_op->get_const_col_idxs(); - const auto coarse_nrows = coarse->get_size()[0]; - const auto source_nrows = source->get_size()[0]; - const auto source_row_ptrs = source->get_const_row_ptrs(); - const auto source_col_idxs = source->get_const_col_idxs(); - const auto source_vals = source->get_const_values(); - gko::vector> row_list( - source_nrows, gko::map{exec}, exec); - for (size_type i = 0; i < source_nrows; i++) { - IndexType row_idx = agg_const_val[i]; - for (auto j = source_row_ptrs[i]; j < source_row_ptrs[i + 1]; j++) { - const auto col = agg_const_val[source_col_idxs[j]]; - const auto val = source_vals[j]; - row_list[row_idx][col] += val; - } - } - auto coarse_row_ptrs = coarse->get_row_ptrs(); - for (size_type i = 0; i < coarse_nrows; i++) { - coarse_row_ptrs[i] = row_list[i].size(); - } - components::prefix_sum(exec, coarse_row_ptrs, coarse_nrows + 1); - - auto nnz = coarse_row_ptrs[coarse_nrows]; - matrix::CsrBuilder coarse_builder{coarse}; - auto &coarse_col_idxs_array = coarse_builder.get_col_idx_array(); - auto &coarse_vals_array = coarse_builder.get_value_array(); - coarse_col_idxs_array.resize_and_reset(nnz); - coarse_vals_array.resize_and_reset(nnz); - auto coarse_col_idxs = coarse_col_idxs_array.get_data(); - auto coarse_vals = coarse_vals_array.get_data(); - - for (size_type i = 0; i < coarse_nrows; i++) { - auto ind = coarse_row_ptrs[i]; - for (auto pair : row_list[i]) { - coarse_col_idxs[ind] = pair.first; - coarse_vals[ind] = pair.second; - ind++; - } - } -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_GENERATE); - - } // namespace amgx_pgm } // namespace reference } // namespace kernels diff --git a/reference/test/multigrid/amgx_pgm_kernels.cpp b/reference/test/multigrid/amgx_pgm_kernels.cpp index 2cff6c8537b..c0270312d6c 100644 --- a/reference/test/multigrid/amgx_pgm_kernels.cpp +++ b/reference/test/multigrid/amgx_pgm_kernels.cpp @@ -500,37 +500,24 @@ TYPED_TEST(AmgxPgm, AssignToExistAgg) } -TYPED_TEST(AmgxPgm, GenerateMtx) +TYPED_TEST(AmgxPgm, GenerateMgLevel) { - using index_type = typename TestFixture::index_type; using value_type = typename TestFixture::value_type; - using mtx_type = typename TestFixture::Mtx; - gko::Array agg(this->exec, 5); - auto agg_vals = agg.get_data(); - // 0 - 2, 1 - 3, 4 - auto prolong_op = mtx_type::create(this->exec, gko::dim<2>{5, 3}, 0); + using Mtx = typename TestFixture::Mtx; + auto prolong_op = gko::share(Mtx::create(this->exec, gko::dim<2>{5, 2}, 0)); + // 0-2-4, 1-3 prolong_op->read( - {{5, 3}, {{0, 0, 1}, {1, 1, 1}, {2, 0, 1}, {3, 1, 1}, {4, 2, 1}}}); - auto restrict_op = gko::as(prolong_op->transpose()); - auto coarse_ans = mtx_type::create(this->exec, gko::dim<2>{3, 3}, 0); - coarse_ans->read({{3, 3}, - {{0, 0, 4}, - {0, 1, -3}, - {0, 2, -1}, - {1, 0, -3}, - {1, 1, 5}, - {1, 2, -1}, - {2, 0, -2}, - {2, 1, -2}, - {2, 2, 5}}}); - auto csr_coarse = mtx_type::create(this->exec, gko::dim<2>{3, 3}, 0); - auto empty = gko::matrix::Csr::create(this->exec); - - gko::kernels::reference::amgx_pgm::amgx_pgm_generate( - this->exec, this->mtx.get(), prolong_op.get(), restrict_op.get(), - csr_coarse.get(), empty.get()); - - GKO_ASSERT_MTX_NEAR(csr_coarse, coarse_ans, r::value); + {{5, 2}, {{0, 0, 1}, {1, 1, 1}, {2, 0, 1}, {3, 1, 1}, {4, 0, 1}}}); + auto restrict_op = gko::share(gko::as(prolong_op->transpose())); + + auto coarse_fine = this->amgxpgm_factory->generate(this->mtx); + + GKO_ASSERT_MTX_NEAR(gko::as(coarse_fine->get_restrict_op()), + restrict_op, r::value); + GKO_ASSERT_MTX_NEAR(gko::as(coarse_fine->get_coarse_op()), + this->coarse, r::value); + GKO_ASSERT_MTX_NEAR(gko::as(coarse_fine->get_prolong_op()), prolong_op, + r::value); }