From 4f882e17842e52c063503f1ecd344bf482a8bbb9 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sun, 8 Oct 2023 12:48:39 +0200 Subject: [PATCH] add new interface for RCM --- core/reorder/rcm.cpp | 184 +++++++++++++++++++++++-- core/test/reorder/rcm.cpp | 22 +++ cuda/test/reorder/rcm_kernels.cpp | 29 ++-- include/ginkgo/core/reorder/rcm.hpp | 135 +++++++++--------- omp/test/reorder/rcm_kernels.cpp | 56 +++++--- reference/test/reorder/rcm_kernels.cpp | 39 +++++- 6 files changed, 355 insertions(+), 110 deletions(-) diff --git a/core/reorder/rcm.cpp b/core/reorder/rcm.cpp index ce4c26225a1..1139deb7359 100644 --- a/core/reorder/rcm.cpp +++ b/core/reorder/rcm.cpp @@ -66,22 +66,85 @@ GKO_REGISTER_OPERATION(get_degree_of_nodes, rcm::get_degree_of_nodes); template -void Rcm::generate( - std::shared_ptr& exec, - std::unique_ptr adjacency_matrix) const +void rcm_reorder(matrix::SparsityCsr* mtx, + IndexType* permutation, IndexType* inv_permutation, + starting_strategy strategy) { - const IndexType num_rows = adjacency_matrix->get_size()[0]; - const auto mtx = adjacency_matrix.get(); - auto degrees = array(exec, num_rows); - // RCM is only valid for symmetric matrices. Need to add an expensive check - // for symmetricity here ? + const auto exec = mtx->get_executor(); + const IndexType num_rows = mtx->get_size()[0]; + array degrees{exec, mtx->get_size()[0]}; exec->run(rcm::make_get_degree_of_nodes(num_rows, mtx->get_const_row_ptrs(), degrees.get_data())); exec->run(rcm::make_get_permutation( num_rows, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), - degrees.get_const_data(), permutation_->get_permutation(), - inv_permutation_.get() ? inv_permutation_->get_permutation() : nullptr, - parameters_.strategy)); + degrees.get_const_data(), permutation, inv_permutation, strategy)); +} + + +template +Rcm::Rcm(std::shared_ptr exec) + : EnablePolymorphicObject>(std::move(exec)) +{} + + +template +Rcm::Rcm(const Factory* factory, + const ReorderingBaseArgs& args) + : EnablePolymorphicObject>( + factory->get_executor()), + parameters_{factory->get_parameters()} +{ + // Always execute the reordering on the cpu. + const auto is_gpu_executor = + this->get_executor() != this->get_executor()->get_master(); + auto cpu_exec = is_gpu_executor ? this->get_executor()->get_master() + : this->get_executor(); + + auto adjacency_matrix = SparsityMatrix::create(cpu_exec); + array degrees; + + // The adjacency matrix has to be square. + GKO_ASSERT_IS_SQUARE_MATRIX(args.system_matrix); + // This is needed because it does not make sense to call the copy and + // convert if the existing matrix is empty. + if (args.system_matrix->get_size()) { + auto tmp = + copy_and_convert_to(cpu_exec, args.system_matrix); + // This function provided within the Sparsity matrix format removes + // the diagonal elements and outputs an adjacency matrix. + adjacency_matrix = tmp->to_adjacency_matrix(); + } + + auto const dim = adjacency_matrix->get_size(); + permutation_ = PermutationMatrix::create(cpu_exec, dim); + + // To make it explicit. + inv_permutation_ = nullptr; + if (parameters_.construct_inverse_permutation) { + inv_permutation_ = PermutationMatrix::create(cpu_exec, dim); + } + + rcm_reorder( + adjacency_matrix.get(), permutation_->get_permutation(), + inv_permutation_ ? inv_permutation_->get_permutation() : nullptr, + parameters_.strategy); + + // Copy back results to gpu if necessary. + if (is_gpu_executor) { + const auto gpu_exec = this->get_executor(); + auto gpu_perm = share(PermutationMatrix::create(gpu_exec, dim)); + gpu_perm->copy_from(permutation_); + permutation_ = gpu_perm; + if (inv_permutation_) { + auto gpu_inv_perm = share(PermutationMatrix::create(gpu_exec, dim)); + gpu_inv_perm->copy_from(inv_permutation_); + inv_permutation_ = gpu_inv_perm; + } + } + auto permutation_array = + make_array_view(this->get_executor(), permutation_->get_size()[0], + permutation_->get_permutation()); + this->set_permutation_array(permutation_array); } @@ -90,4 +153,103 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_RCM); } // namespace reorder + + +namespace experimental { +namespace reorder { + + +template +Rcm::Rcm(std::shared_ptr exec, + const parameters_type& params) + : EnablePolymorphicObject(std::move(exec)), + parameters_{params} +{} + + +template +std::unique_ptr> Rcm::generate( + std::shared_ptr system_matrix) const +{ + auto product = + std::unique_ptr(static_cast( + this->LinOpFactory::generate(std::move(system_matrix)).release())); + return product; +} + + +template +std::unique_ptr Rcm::generate_impl( + std::shared_ptr system_matrix) const +{ + GKO_ASSERT_IS_SQUARE_MATRIX(system_matrix); + const auto exec = this->get_executor(); + const auto host_exec = exec->get_master(); + const auto num_rows = system_matrix->get_size()[0]; + using complex_scalar = matrix::Dense>; + using real_scalar = matrix::Dense; + using complex_identity = matrix::Identity>; + using real_identity = matrix::Identity; + using complex_mtx = matrix::Csr, IndexType>; + using real_mtx = matrix::Csr; + using sparsity_mtx = matrix::SparsityCsr; + std::unique_ptr converted; + // extract row pointers and column indices + IndexType* d_row_ptrs{}; + IndexType* d_col_idxs{}; + size_type d_nnz{}; + if (auto convertible = dynamic_cast*>( + system_matrix.get())) { + auto conv_csr = complex_mtx::create(exec); + convertible->convert_to(conv_csr); + if (!parameters_.skip_symmetrize) { + auto scalar = + initialize({one>()}, exec); + auto id = complex_identity::create(exec, conv_csr->get_size()[0]); + // compute A^T + A + conv_csr->transpose()->apply(scalar, id, scalar, conv_csr); + } + d_nnz = conv_csr->get_num_stored_elements(); + d_row_ptrs = conv_csr->get_row_ptrs(); + d_col_idxs = conv_csr->get_col_idxs(); + converted = std::move(conv_csr); + } else { + auto conv_csr = real_mtx::create(exec); + as>(system_matrix)->convert_to(conv_csr); + if (!parameters_.skip_symmetrize) { + auto scalar = initialize({one()}, exec); + auto id = real_identity::create(exec, conv_csr->get_size()[0]); + // compute A^T + A + conv_csr->transpose()->apply(scalar, id, scalar, conv_csr); + } + d_nnz = conv_csr->get_num_stored_elements(); + d_row_ptrs = conv_csr->get_row_ptrs(); + d_col_idxs = conv_csr->get_col_idxs(); + converted = std::move(conv_csr); + } + + array permutation(host_exec, num_rows); + + // remove diagonal entries + auto pattern = + sparsity_mtx::create(exec, gko::dim<2>{num_rows, num_rows}, + make_array_view(exec, d_nnz, d_col_idxs), + make_array_view(exec, num_rows + 1, d_row_ptrs)); + pattern = pattern->to_adjacency_matrix(); + rcm_reorder(pattern.get(), permutation.get_data(), + static_cast(nullptr), parameters_.strategy); + + // permutation gets copied to device via gko::array constructor + return permutation_type::create(exec, dim<2>{num_rows, num_rows}, + std::move(permutation)); +} + + +#undef GKO_DECLARE_RCM +#define GKO_DECLARE_RCM(IndexType) class Rcm +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM); + + +} // namespace reorder +} // namespace experimental } // namespace gko diff --git a/core/test/reorder/rcm.cpp b/core/test/reorder/rcm.cpp index dfd90fe137a..d25944733ce 100644 --- a/core/test/reorder/rcm.cpp +++ b/core/test/reorder/rcm.cpp @@ -52,6 +52,7 @@ class Rcm : public ::testing::Test { using v_type = double; using i_type = int; using reorder_type = gko::reorder::Rcm; + using new_reorder_type = gko::experimental::reorder::Rcm; Rcm() : exec(gko::ReferenceExecutor::create()), @@ -67,4 +68,25 @@ TEST_F(Rcm, RcmFactoryKnowsItsExecutor) ASSERT_EQ(this->rcm_factory->get_executor(), this->exec); } + +TEST_F(Rcm, NewInterfaceDefaults) +{ + auto param = new_reorder_type::build(); + + ASSERT_EQ(param.skip_symmetrize, false); + ASSERT_EQ(param.strategy, + gko::reorder::starting_strategy::pseudo_peripheral); +} + + +TEST_F(Rcm, NewInterfaceSetParameters) +{ + auto param = + new_reorder_type::build().with_skip_symmetrize(true).with_strategy( + gko::reorder::starting_strategy::minimum_degree); + + ASSERT_EQ(param.skip_symmetrize, false); + ASSERT_EQ(param.strategy, gko::reorder::starting_strategy::minimum_degree); +} + } // namespace diff --git a/cuda/test/reorder/rcm_kernels.cpp b/cuda/test/reorder/rcm_kernels.cpp index 169f26e208e..828c2f8f5bb 100644 --- a/cuda/test/reorder/rcm_kernels.cpp +++ b/cuda/test/reorder/rcm_kernels.cpp @@ -49,34 +49,39 @@ class Rcm : public CudaTestFixture { using i_type = int; using CsrMtx = gko::matrix::Csr; using reorder_type = gko::reorder::Rcm; + using new_reorder_type = gko::experimental::reorder::Rcm; using perm_type = gko::matrix::Permutation; Rcm() - : // clang-format off - p_mtx(gko::initialize({{1.0, 2.0, 0.0, -1.3, 2.1}, + : p_mtx(gko::initialize({{1.0, 2.0, 0.0, -1.3, 2.1}, {2.0, 5.0, 1.5, 0.0, 0.0}, {0.0, 1.5, 1.5, 1.1, 0.0}, {-1.3, 0.0, 1.1, 2.0, 0.0}, {2.1, 0.0, 0.0, 0.0, 1.0}}, - exec)), - // clang-format on - rcm_factory(reorder_type::build().on(exec)), - reorder_op(rcm_factory->generate(p_mtx)) + exec)) {} - std::unique_ptr rcm_factory; std::shared_ptr p_mtx; - std::unique_ptr reorder_op; }; -TEST_F(Rcm, IsExecutedOnCpuExecutor) +TEST_F(Rcm, IsEquivalentToRef) { - // This only executes successfully if computed on cpu executor. - auto p = reorder_op->get_permutation(); + auto reorder_op = reorder_type::build().on(ref)->generate(p_mtx); + auto dreorder_op = reorder_type::build().on(exec)->generate(p_mtx); - ASSERT_TRUE(true); + GKO_ASSERT_ARRAY_EQ(dreorder_op->get_permutation_array(), + reorder_op->get_permutation_array()); +} + + +TEST_F(Rcm, IsEquivalentToRefNewInterface) +{ + auto reorder_op = new_reorder_type::build().on(ref)->generate(p_mtx); + auto dreorder_op = new_reorder_type::build().on(exec)->generate(p_mtx); + + GKO_ASSERT_MTX_EQ_SPARSITY(dreorder_op, reorder_op); } diff --git a/include/ginkgo/core/reorder/rcm.hpp b/include/ginkgo/core/reorder/rcm.hpp index 72ba6827f2b..27e246c8f16 100644 --- a/include/ginkgo/core/reorder/rcm.hpp +++ b/include/ginkgo/core/reorder/rcm.hpp @@ -149,73 +149,9 @@ class Rcm : public EnablePolymorphicObject, GKO_ENABLE_BUILD_METHOD(Factory); protected: - /** - * Generates the permutation matrix and if required the inverse permutation - * matrix. - */ - void generate(std::shared_ptr& exec, - std::unique_ptr adjacency_matrix) const; - - explicit Rcm(std::shared_ptr exec) - : EnablePolymorphicObject>( - std::move(exec)) - {} - - explicit Rcm(const Factory* factory, const ReorderingBaseArgs& args) - : EnablePolymorphicObject>( - factory->get_executor()), - parameters_{factory->get_parameters()} - { - // Always execute the reordering on the cpu. - const auto is_gpu_executor = - this->get_executor() != this->get_executor()->get_master(); - auto cpu_exec = is_gpu_executor ? this->get_executor()->get_master() - : this->get_executor(); - - auto adjacency_matrix = SparsityMatrix::create(cpu_exec); - array degrees; - - // The adjacency matrix has to be square. - GKO_ASSERT_IS_SQUARE_MATRIX(args.system_matrix); - // This is needed because it does not make sense to call the copy and - // convert if the existing matrix is empty. - if (args.system_matrix->get_size()) { - auto tmp = copy_and_convert_to(cpu_exec, - args.system_matrix); - // This function provided within the Sparsity matrix format removes - // the diagonal elements and outputs an adjacency matrix. - adjacency_matrix = tmp->to_adjacency_matrix(); - } - - auto const dim = adjacency_matrix->get_size(); - permutation_ = PermutationMatrix::create(cpu_exec, dim); - - // To make it explicit. - inv_permutation_ = nullptr; - if (parameters_.construct_inverse_permutation) { - inv_permutation_ = PermutationMatrix::create(cpu_exec, dim); - } - - this->generate(cpu_exec, std::move(adjacency_matrix)); - - // Copy back results to gpu if necessary. - if (is_gpu_executor) { - const auto gpu_exec = this->get_executor(); - auto gpu_perm = share(PermutationMatrix::create(gpu_exec, dim)); - gpu_perm->copy_from(permutation_); - permutation_ = gpu_perm; - if (inv_permutation_) { - auto gpu_inv_perm = - share(PermutationMatrix::create(gpu_exec, dim)); - gpu_inv_perm->copy_from(inv_permutation_); - inv_permutation_ = gpu_inv_perm; - } - } - auto permutation_array = - make_array_view(this->get_executor(), permutation_->get_size()[0], - permutation_->get_permutation()); - this->set_permutation_array(permutation_array); - } + explicit Rcm(std::shared_ptr exec); + + explicit Rcm(const Factory* factory, const ReorderingBaseArgs& args); private: std::shared_ptr permutation_; @@ -224,6 +160,71 @@ class Rcm : public EnablePolymorphicObject, } // namespace reorder + + +namespace experimental { +namespace reorder { + + +using rcm_starting_strategy = gko::reorder::starting_strategy; + + +/** + * @copydoc gko::reorder::Rcm + */ +template +class Rcm : public EnablePolymorphicObject, LinOpFactory>, + public EnablePolymorphicAssignment> { +public: + struct parameters_type; + friend class EnablePolymorphicObject, LinOpFactory>; + friend class enable_parameters_type>; + + using index_type = IndexType; + using permutation_type = matrix::Permutation; + + struct parameters_type + : public enable_parameters_type> { + /** + * If set to false, computes the RCM reordering on A + A^T, otherwise + * assumes that A is symmetric and uses it directly. + */ + bool GKO_FACTORY_PARAMETER_SCALAR(skip_symmetrize, false); + + /** + * This parameter controls the strategy used to determine a starting + * vertex. + */ + rcm_starting_strategy GKO_FACTORY_PARAMETER_SCALAR( + strategy, rcm_starting_strategy::pseudo_peripheral); + }; + + /** + * @copydoc LinOpFactory::generate + * @note This function overrides the default LinOpFactory::generate to + * return a Permutation instead of a generic LinOp, which would + * need to be cast to Permutation again to access its indices. + * It is only necessary because smart pointers aren't covariant. + */ + std::unique_ptr generate( + std::shared_ptr system_matrix) const; + + /** Creates a new parameter_type to set up the factory. */ + static parameters_type build() { return {}; } + +protected: + explicit Rcm(std::shared_ptr exec, + const parameters_type& params = {}); + + std::unique_ptr generate_impl( + std::shared_ptr system_matrix) const override; + + parameters_type parameters_; +}; + + +} // namespace reorder +} // namespace experimental } // namespace gko diff --git a/omp/test/reorder/rcm_kernels.cpp b/omp/test/reorder/rcm_kernels.cpp index 48698ac1b49..9155ecb1a4c 100644 --- a/omp/test/reorder/rcm_kernels.cpp +++ b/omp/test/reorder/rcm_kernels.cpp @@ -62,6 +62,7 @@ class Rcm : public ::testing::Test { using Mtx = gko::matrix::Dense; using CsrMtx = gko::matrix::Csr; using reorder_type = gko::reorder::Rcm; + using new_reorder_type = gko::experimental::reorder::Rcm; using strategy = gko::reorder::starting_strategy; using perm_type = gko::matrix::Permutation; @@ -110,22 +111,22 @@ class Rcm : public ::testing::Test { } static bool is_valid_start_node(std::shared_ptr mtx, - std::shared_ptr reorder, - i_type start, - std::vector& already_visited) + const i_type* permutation, i_type start, + std::vector& already_visited, + gko::reorder::starting_strategy strategy) { if (already_visited[start]) { return false; } - const auto n = reorder->get_permutation()->get_size()[0]; + const auto n = mtx->get_size()[0]; auto degrees = std::vector(n); for (gko::size_type i = 0; i < n; ++i) { degrees[i] = mtx->get_const_row_ptrs()[i + 1] - mtx->get_const_row_ptrs()[i]; } - switch (reorder->get_parameters().strategy) { + switch (strategy) { case strategy::minimum_degree: { auto min_degree = std::numeric_limits::max(); for (gko::size_type i = 0; i < n; ++i) { @@ -195,10 +196,10 @@ class Rcm : public ::testing::Test { } static bool is_rcm_ordered(std::shared_ptr mtx, - std::shared_ptr reorder) + const i_type* permutation, + gko::reorder::starting_strategy strategy) { - const auto n = - gko::as(reorder->get_permutation())->get_size()[0]; + const auto n = mtx->get_size()[0]; const auto row_ptrs = mtx->get_const_row_ptrs(); const auto col_idxs = mtx->get_const_col_idxs(); auto degrees = std::vector(n); @@ -209,14 +210,8 @@ class Rcm : public ::testing::Test { // Following checks for cm ordering, therefore create a reversed perm. auto perm = std::vector(n); - std::copy_n(gko::as(reorder->get_permutation()) - ->get_const_permutation(), - n, perm.begin()); - for (gko::size_type i = 0; i < n / 2; ++i) { - const auto tmp = perm[i]; - perm[i] = perm[n - i - 1]; - perm[n - i - 1] = tmp; - } + std::copy_n(permutation, n, perm.begin()); + std::reverse(perm.begin(), perm.end()); // Now check for cm ordering. @@ -224,8 +219,8 @@ class Rcm : public ::testing::Test { std::vector already_visited(n); while (base_offset != n) { // Assert valid start node. - if (!is_valid_start_node(mtx, reorder, perm[base_offset], - already_visited)) { + if (!is_valid_start_node(mtx, permutation, perm[base_offset], + already_visited, strategy)) { return false; } @@ -330,7 +325,30 @@ TEST_F(Rcm, OmpPermutationIsRcmOrdered) auto perm = d_reorder_op->get_permutation(); - ASSERT_PRED2(is_rcm_ordered, d_1138_bus_mtx, d_reorder_op); + ASSERT_PRED3(is_rcm_ordered, d_1138_bus_mtx, perm->get_const_permutation(), + d_reorder_op->get_parameters().strategy); +} + +TEST_F(Rcm, OmpPermutationIsRcmOrderedMinDegree) +{ + d_reorder_op = + reorder_type::build() + .with_strategy(gko::reorder::starting_strategy::minimum_degree) + .on(omp) + ->generate(d_1138_bus_mtx); + + auto perm = d_reorder_op->get_permutation(); + + ASSERT_PRED3(is_rcm_ordered, d_1138_bus_mtx, perm->get_const_permutation(), + d_reorder_op->get_parameters().strategy); +} + +TEST_F(Rcm, OmpPermutationIsRcmOrderedNewInterface) +{ + auto perm = new_reorder_type::build().on(omp)->generate(d_1138_bus_mtx); + + ASSERT_PRED3(is_rcm_ordered, d_1138_bus_mtx, perm->get_const_permutation(), + gko::reorder::starting_strategy::pseudo_peripheral); } } // namespace diff --git a/reference/test/reorder/rcm_kernels.cpp b/reference/test/reorder/rcm_kernels.cpp index b23e8bec097..ec43de5e1f6 100644 --- a/reference/test/reorder/rcm_kernels.cpp +++ b/reference/test/reorder/rcm_kernels.cpp @@ -59,9 +59,9 @@ class Rcm : public ::testing::Test { using i_type = int; using CsrMtx = gko::matrix::Csr; using reorder_type = gko::reorder::Rcm; + using new_reorder_type = gko::experimental::reorder::Rcm; using perm_type = gko::matrix::Permutation; - Rcm() : exec(gko::ReferenceExecutor::create()), // clang-format off @@ -83,6 +83,17 @@ class Rcm : public ::testing::Test { {1., 0., 0., 0., 1., 0., 0., 1., 1.}, {1., 0., 0., 1., 1., 0., 0., 1., 1.}}, exec)), + p_mtx_1_lower(gko::initialize( + {{1., 0., 0., 0., 0., 0., 0., 0., 0.}, + {0., 1., 0., 0., 0., 0., 0., 0., 0.}, + {0., 1., 1., 0., 0., 0., 0., 0., 0.}, + {1., 1., 0., 1., 0., 0., 0., 0., 0.}, + {1., 0., 0., 1., 1., 0., 0., 0., 0.}, + {1., 1., 1., 1., 1., 1., 0., 0., 0.}, + {0., 1., 1., 1., 1., 1., 1., 0., 0.}, + {1., 0., 0., 0., 1., 0., 0., 1., 0.}, + {1., 0., 0., 1., 1., 0., 0., 1., 1.}}, + exec)), // clang-format on rcm_factory(reorder_type::build().on(exec)), reorder_op_0(rcm_factory->generate(p_mtx_0)), @@ -95,6 +106,7 @@ class Rcm : public ::testing::Test { std::unique_ptr reorder_op_0; std::shared_ptr p_mtx_1; std::unique_ptr reorder_op_1; + std::shared_ptr p_mtx_1_lower; static bool is_permutation(const perm_type* input_perm) { @@ -140,4 +152,29 @@ TEST_F(Rcm, PermutesPerfectFullBand) } +TEST_F(Rcm, NewInterfaceWorksOnSymmetric) +{ + std::vector correct = {7, 8, 0, 4, 3, 5, 6, 1, 2}; + + auto permutation = + new_reorder_type::build().with_skip_symmetrize(true).on(exec)->generate( + p_mtx_1); + + auto p = permutation->get_const_permutation(); + ASSERT_TRUE(std::equal(p, p + correct.size(), correct.begin())); +} + + +TEST_F(Rcm, NewInterfaceWorksOnNonsymmetric) +{ + std::vector correct = {7, 8, 0, 4, 3, 5, 6, 1, 2}; + + auto permutation = + new_reorder_type::build().on(exec)->generate(p_mtx_1_lower); + + auto p = permutation->get_const_permutation(); + ASSERT_TRUE(std::equal(p, p + correct.size(), correct.begin())); +} + + } // namespace