diff --git a/core/base/index_set.cpp b/core/base/index_set.cpp index 0c9d7416e57..60bbbb6cda0 100644 --- a/core/base/index_set.cpp +++ b/core/base/index_set.cpp @@ -105,23 +105,6 @@ IndexType IndexSet::get_local_index(const IndexType index) const } -template -IndexType IndexSet::get_subset_id(const IndexType index) const -{ - auto ss_end_host = make_temporary_clone>( - this->get_executor()->get_master(), &this->subsets_end_); - auto ss_begin_host = make_temporary_clone>( - this->get_executor()->get_master(), &this->subsets_begin_); - for (size_type id = 0; id < this->get_num_subsets(); ++id) { - if (index < ss_end_host->get_const_data()[id] && - index >= ss_begin_host->get_const_data()[id]) { - return id; - } - } - return -1; -} - - template Array IndexSet::to_global_indices() const { diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index a6c3a2a0431..1faf5c68ddf 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -625,6 +625,9 @@ Csr::create_submatrix( { using Mat = Csr; auto exec = this->get_executor(); + if (!row_index_set.get_num_elems() || !col_index_set.get_num_elems()) { + return Mat::create(exec); + } if (row_index_set.is_contiguous() && col_index_set.is_contiguous()) { auto row_st = row_index_set.get_executor()->copy_val_to_host( row_index_set.get_subsets_begin()); @@ -643,7 +646,7 @@ Csr::create_submatrix( auto sub_mat_size = gko::dim<2>(submat_num_rows, submat_num_cols); Array row_ptrs(exec, submat_num_rows + 1); exec->run(csr::make_calculate_nonzeros_per_row_in_index_set( - this, row_index_set, col_index_set, &row_ptrs)); + this, row_index_set, col_index_set, row_ptrs.get_data())); exec->run( csr::make_prefix_sum(row_ptrs.get_data(), submat_num_rows + 1)); auto num_nnz = diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp index 14f7f491f24..5f3fd4d9f3c 100644 --- a/core/matrix/csr_kernels.hpp +++ b/core/matrix/csr_kernels.hpp @@ -172,7 +172,7 @@ namespace kernels { std::shared_ptr exec, \ const matrix::Csr* source, \ const IndexSet& row_index_set, \ - const IndexSet& col_index_set, Array* row_nnz) + const IndexSet& col_index_set, IndexType* row_nnz) #define GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType) \ void compute_submatrix(std::shared_ptr exec, \ diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index 7ab812b50fc..35f2f8341e4 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -1164,7 +1164,7 @@ void calculate_nonzeros_per_row_in_index_set( const matrix::Csr* source, const IndexSet& row_index_set, const IndexSet& col_index_set, - Array* row_nnz) GKO_NOT_IMPLEMENTED; + IndexType* row_nnz) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index 835ba943a26..d55cb29e395 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -1391,7 +1391,7 @@ void calculate_nonzeros_per_row_in_index_set( const matrix::Csr* source, const IndexSet& row_index_set, const IndexSet& col_index_set, - Array* row_nnz) GKO_NOT_IMPLEMENTED; + IndexType* row_nnz) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index f1d09c1a163..3bedaaa3e15 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -951,7 +951,7 @@ void calculate_nonzeros_per_row_in_index_set( const matrix::Csr* source, const IndexSet& row_index_set, const IndexSet& col_index_set, - Array* row_nnz) GKO_NOT_IMPLEMENTED; + IndexType* row_nnz) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); diff --git a/include/ginkgo/core/base/index_set.hpp b/include/ginkgo/core/base/index_set.hpp index 98d19fda78c..072a1585e70 100644 --- a/include/ginkgo/core/base/index_set.hpp +++ b/include/ginkgo/core/base/index_set.hpp @@ -82,10 +82,17 @@ namespace gko { * @ingroup IndexSet */ template -class IndexSet : public EnablePolymorphicObject> { +class IndexSet : public EnablePolymorphicObject>, + public EnablePolymorphicAssignment>, + public EnableCreateMethod> { friend class EnablePolymorphicObject; + friend class EnableCreateMethod; public: + using EnableCreateMethod::create; + using EnablePolymorphicAssignment::convert_to; + using EnablePolymorphicAssignment::move_to; + /** * The type of elements stored in the index set. */ @@ -97,7 +104,9 @@ class IndexSet : public EnablePolymorphicObject> { * @param exec the Executor where the IndexSet data is allocated */ IndexSet(std::shared_ptr exec) - : EnablePolymorphicObject(std::move(exec)) + : EnablePolymorphicObject(std::move(exec)), + index_space_size_{0}, + num_stored_indices_{0} {} /** @@ -109,14 +118,18 @@ class IndexSet : public EnablePolymorphicObject> { * @param is_sorted a parameter that specifies if the indices array is * sorted or not. `true` if sorted. */ - IndexSet(std::shared_ptr executor, - std::initializer_list init_list, - const bool is_sorted = false) + explicit IndexSet(std::shared_ptr executor, + std::initializer_list init_list, + const bool is_sorted = false) : EnablePolymorphicObject(std::move(executor)), - index_space_size_( - *(std::max_element(std::begin(init_list), std::end(init_list))) + - 1) + index_space_size_(init_list.size() > 0 + ? *(std::max_element(std::begin(init_list), + std::end(init_list))) + + 1 + : 0), + num_stored_indices_{init_list.size()} { + GKO_ASSERT(index_space_size_ > 0); this->populate_subsets( Array(this->get_executor(), init_list), is_sorted); } @@ -131,9 +144,10 @@ class IndexSet : public EnablePolymorphicObject> { * @param is_sorted a parameter that specifies if the indices array is * sorted or not. `true` if sorted. */ - IndexSet(std::shared_ptr executor, - const index_type size, const gko::Array& indices, - const bool is_sorted = false) + explicit IndexSet(std::shared_ptr executor, + const index_type size, + const gko::Array& indices, + const bool is_sorted = false) : EnablePolymorphicObject(std::move(executor)), index_space_size_(size) { @@ -217,28 +231,6 @@ class IndexSet : public EnablePolymorphicObject> { */ index_type get_local_index(index_type global_index) const; - /** - * Return which set the global index belongs to. - * - * Consider the set idx_set = (0, 1, 2, 4, 6, 7, 8, 9). This function - * returns the subset id in the index set of the input global index. For - * example, `idx_set.get_subset_id(0) == 0` `idx_set.get_subset_id(4) - * == 1` and `idx_set.get_subset_id(6) == 2`. - * - * @note This function returns a scalar value and needs a scalar value. - * For repeated queries, it is more efficient to use the Array - * functions that take and return arrays which allow for more - * throughput. - * - * @param global_index the global index. - * - * @return the local index of the element in the index set. - * - * @warning This single entry query can have significant kernel launch - * overheads and should be avoided if possible. - */ - index_type get_subset_id(index_type global_index) const; - /** * This is an array version of the scalar function above. * diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index 9bfa9335945..7065deccba0 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -793,13 +793,6 @@ class Csr : public EnableLinOp>, std::unique_ptr> extract_diagonal() const override; - std::unique_ptr> create_submatrix( - const gko::IndexSet& row_index_set, - const gko::IndexSet& column_index_set) const; - - std::unique_ptr> create_submatrix( - const gko::span& row_span, const gko::span& column_span) const; - std::unique_ptr compute_absolute() const override; void compute_absolute_inplace() override; @@ -959,7 +952,7 @@ class Csr : public EnableLinOp>, this->inv_scale_impl(make_temporary_clone(exec, alpha).get()); } - /* + /** * Creates a constant (immutable) Csr matrix from a set of constant arrays. * * @param exec the executor to create the matrix on @@ -987,7 +980,7 @@ class Csr : public EnableLinOp>, gko::detail::array_const_cast(std::move(row_ptrs)), strategy}); } - /* + /** * This is version of create_const with a default strategy. */ static std::unique_ptr create_const( @@ -1001,6 +994,36 @@ class Csr : public EnableLinOp>, Csr::make_default_strategy(exec)); } + /** + * Creates a submatrix from this Csr matrix given row and column IndexSet + * objects. + * + * @param row_index_set the row index set containing the set of rows to be + * in the submatrix. + * @param column_index_set the col index set containing the set of columns + * to be in the submatrix. + * @return A new CSR matrix with the elements that belong to the row and + * columns of this matrix as specified by the index sets. + * @note This is not a view but creates a new, separate CSR matrix. + */ + std::unique_ptr> create_submatrix( + const gko::IndexSet& row_index_set, + const gko::IndexSet& column_index_set) const; + + /** + * Creates a submatrix from this Csr matrix given row and column spans + * + * @param row_span the row span containing the contiguous set of rows to be + * in the submatrix. + * @param column_span the column span containing the contiguous set of + * columns to be in the submatrix. + * @return A new CSR matrix with the elements that belong to the row and + * columns of this matrix as specified by the index sets. + * @note This is not a view but creates a new, separate CSR matrix. + */ + std::unique_ptr> create_submatrix( + const gko::span& row_span, const gko::span& column_span) const; + protected: /** * Creates an uninitialized CSR matrix of the specified size. diff --git a/omp/base/index_set_kernels.cpp b/omp/base/index_set_kernels.cpp index de81f30a991..223373053fe 100644 --- a/omp/base/index_set_kernels.cpp +++ b/omp/base/index_set_kernels.cpp @@ -73,10 +73,11 @@ void to_global_indices(std::shared_ptr exec, { #pragma omp parallel for for (size_type subset = 0; subset < num_subsets; ++subset) { - for (size_type i = 0; - i < superset_indices[subset + 1] - superset_indices[subset]; ++i) { - decomp_indices[superset_indices[subset] + i] = - subset_begin[subset] + i; + IndexType local_i{}; + for (auto i = superset_indices[subset]; + i < superset_indices[subset + 1]; ++i) { + decomp_indices[i] = local_i + subset_begin[subset]; + local_i++; } } } @@ -157,20 +158,19 @@ void global_to_local(std::shared_ptr exec, #pragma omp parallel for for (size_type i = 0; i < num_indices; ++i) { auto index = global_indices[i]; - if (index >= index_space_size) { + if (index < 0 || index >= index_space_size) { local_indices[i] = invalid_index(); continue; } const auto bucket = std::distance( - subset_begin, - std::upper_bound(subset_begin, subset_begin + num_subsets, index)); - auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - if (subset_end[shifted_bucket] <= index || - index < subset_begin[shifted_bucket]) { + subset_begin + 1, + std::upper_bound(subset_begin + 1, subset_begin + num_subsets + 1, + index)); + if (index >= subset_end[bucket] || index < subset_begin[bucket]) { local_indices[i] = invalid_index(); } else { - local_indices[i] = index - subset_begin[shifted_bucket] + - superset_indices[shifted_bucket]; + local_indices[i] = + index - subset_begin[bucket] + superset_indices[bucket]; } } } @@ -190,17 +190,16 @@ void local_to_global(std::shared_ptr exec, #pragma omp parallel for for (size_type i = 0; i < num_indices; ++i) { auto index = local_indices[i]; - if (index >= superset_indices[num_subsets]) { + if (index < 0 || index >= superset_indices[num_subsets]) { global_indices[i] = invalid_index(); continue; } const auto bucket = std::distance( - superset_indices, - std::upper_bound(superset_indices, + superset_indices + 1, + std::upper_bound(superset_indices + 1, superset_indices + num_subsets + 1, index)); - auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - global_indices[i] = subset_begin[shifted_bucket] + index - - superset_indices[shifted_bucket]; + global_indices[i] = + subset_begin[bucket] + index - superset_indices[bucket]; } } diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 30e1604805a..8fae745f757 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -749,7 +749,7 @@ void calculate_nonzeros_per_row_in_index_set( std::shared_ptr exec, const matrix::Csr* source, const IndexSet& row_index_set, - const IndexSet& col_index_set, Array* row_nnz) + const IndexSet& col_index_set, IndexType* row_nnz) { auto num_row_subsets = row_index_set.get_num_subsets(); auto num_col_subsets = col_index_set.get_num_subsets(); @@ -765,7 +765,7 @@ void calculate_nonzeros_per_row_in_index_set( size_type res_row = row_superset_indices[set]; for (auto row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { - row_nnz->get_data()[res_row] = zero(); + row_nnz[res_row] = zero(); for (size_type i = src_ptrs[row]; i < src_ptrs[row + 1]; ++i) { auto index = source->get_const_col_idxs()[i]; if (index >= col_index_set.get_size()) { @@ -777,11 +777,9 @@ void calculate_nonzeros_per_row_in_index_set( col_subset_begin + num_col_subsets, index)); auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - if (col_subset_end[shifted_bucket] <= index || - (index < col_subset_begin[shifted_bucket])) { - continue; - } else { - row_nnz->get_data()[res_row]++; + if (index < col_subset_end[shifted_bucket] && + index >= col_subset_begin[shifted_bucket]) { + row_nnz[res_row]++; } } res_row++; @@ -869,10 +867,8 @@ void compute_submatrix_from_index_set( col_subset_begin + num_col_subsets, index)); auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1); - if (col_subset_end[shifted_bucket] <= index || - (index < col_subset_begin[shifted_bucket])) { - continue; - } else { + if (index < col_subset_end[shifted_bucket] && + (index >= col_subset_begin[shifted_bucket])) { res_col_idxs[res_nnz] = index - col_subset_begin[shifted_bucket] + col_superset_indices[shifted_bucket]; diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index 86a2992e015..7a61c72ac7b 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -773,9 +773,9 @@ TEST_F(Csr, CalculateNnzPerRowInIndexSetIsEquivalentToRef) auto drow_nnz = gko::Array(this->omp, row_nnz); gko::kernels::reference::csr::calculate_nonzeros_per_row_in_index_set( - this->ref, this->mtx2.get(), rset, cset, &row_nnz); + this->ref, this->mtx2.get(), rset, cset, row_nnz.get_data()); gko::kernels::omp::csr::calculate_nonzeros_per_row_in_index_set( - this->omp, this->dmtx2.get(), drset, dcset, &drow_nnz); + this->omp, this->dmtx2.get(), drset, dcset, drow_nnz.get_data()); GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); } @@ -797,7 +797,7 @@ TEST_F(Csr, ComputeSubmatrixFromIndexSetIsEquivalentToRef) auto row_nnz = gko::Array(this->ref, rset.get_num_elems() + 1); row_nnz.fill(gko::zero()); gko::kernels::reference::csr::calculate_nonzeros_per_row_in_index_set( - this->ref, this->mtx2.get(), rset, cset, &row_nnz); + this->ref, this->mtx2.get(), rset, cset, row_nnz.get_data()); gko::kernels::reference::components::prefix_sum( this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); auto num_nnz = row_nnz.get_data()[rset.get_num_elems()]; diff --git a/reference/base/index_set_kernels.cpp b/reference/base/index_set_kernels.cpp index 25602e55322..acdffa3fb0e 100644 --- a/reference/base/index_set_kernels.cpp +++ b/reference/base/index_set_kernels.cpp @@ -181,7 +181,7 @@ void global_to_local(std::shared_ptr exec, shifted_bucket = 0; } auto index = global_indices[i]; - if (index >= index_space_size) { + if (index < 0 || index >= index_space_size) { local_indices[i] = invalid_index(); continue; } @@ -221,7 +221,7 @@ void local_to_global(std::shared_ptr exec, shifted_bucket = 0; } auto index = local_indices[i]; - if (index >= superset_indices[num_subsets]) { + if (index < 0 || index >= superset_indices[num_subsets]) { global_indices[i] = invalid_index(); continue; } diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 365847ef592..f7021c5224a 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -630,7 +630,7 @@ void calculate_nonzeros_per_row_in_index_set( std::shared_ptr exec, const matrix::Csr* source, const IndexSet& row_index_set, - const IndexSet& col_index_set, Array* row_nnz) + const IndexSet& col_index_set, IndexType* row_nnz) { auto num_row_subsets = row_index_set.get_num_subsets(); auto row_subset_begin = row_index_set.get_subsets_begin(); @@ -644,7 +644,7 @@ void calculate_nonzeros_per_row_in_index_set( size_type res_row = row_superset_indices[set]; for (auto row = row_subset_begin[set]; row < row_subset_end[set]; ++row) { - row_nnz->get_data()[res_row] = zero(); + row_nnz[res_row] = zero(); for (size_type i = src_ptrs[row]; i < src_ptrs[row + 1]; ++i) { auto index = source->get_const_col_idxs()[i]; if (index >= col_index_set.get_size()) { @@ -660,7 +660,7 @@ void calculate_nonzeros_per_row_in_index_set( (index < col_subset_begin[shifted_bucket])) { continue; } else { - row_nnz->get_data()[res_row]++; + row_nnz[res_row]++; } } res_row++; diff --git a/reference/test/base/index_set.cpp b/reference/test/base/index_set.cpp index 1c954c8ce1d..026201106c1 100644 --- a/reference/test/base/index_set.cpp +++ b/reference/test/base/index_set.cpp @@ -321,20 +321,6 @@ TYPED_TEST(IndexSet, CanGetLocalIndex) } -TYPED_TEST(IndexSet, CanGetSubsetId) -{ - auto idx_arr = gko::Array{this->exec, {0, 1, 2, 4, 6, 7, 8, 9}}; - auto idx_set = gko::IndexSet{this->exec, 10, idx_arr}; - - ASSERT_EQ(idx_set.get_num_elems(), 8); - EXPECT_EQ(idx_set.get_subset_id(6), 2); - EXPECT_EQ(idx_set.get_subset_id(7), 2); - EXPECT_EQ(idx_set.get_subset_id(0), 0); - EXPECT_EQ(idx_set.get_subset_id(8), 2); - EXPECT_EQ(idx_set.get_subset_id(4), 1); -} - - TYPED_TEST(IndexSet, CanDetectNonExistentIndices) { auto idx_arr = gko::Array{ diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index d8d5fefd2f5..62422489a7f 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -1784,6 +1784,47 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) ASSERT_EQ(mat->get_num_stored_elements(), 23); + { + SCOPED_TRACE("Both empty index sets"); + auto row_set = gko::IndexSet(this->exec); + auto col_set = gko::IndexSet(this->exec); + auto sub_mat1 = mat->create_submatrix(row_set, col_set); + auto ref1 = Mtx::create(this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } + + { + SCOPED_TRACE("One empty index set"); + auto row_set = gko::IndexSet(this->exec); + auto col_set = gko::IndexSet(this->exec, {0}); + auto sub_mat1 = mat->create_submatrix(row_set, col_set); + auto ref1 = Mtx::create(this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } + + { + SCOPED_TRACE("Full index set"); + auto row_set = + gko::IndexSet(this->exec, {0, 1, 2, 3, 4, 5, 6}); + auto col_set = gko::IndexSet(this->exec, {0, 1, 2, 3, 4}); + auto sub_mat1 = mat->create_submatrix(row_set, col_set); + auto ref1 = gko::initialize( + { + I{1.0, 3.0, 4.5, 0.0, 2.0}, // 0 + I{1.0, 0.0, 4.5, 7.5, 3.0}, // 1 + I{0.0, 3.0, 4.5, 0.0, 2.0}, // 2 + I{0.0, -1.0, 2.5, 0.0, 2.0}, // 3 + I{1.0, 0.0, -1.0, 3.5, 1.0}, // 4 + I{0.0, 1.0, 0.0, 0.0, 2.0}, // 5 + I{0.0, 3.0, 0.0, 7.5, 1.0} // 6 + }, + this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } + { SCOPED_TRACE("Small square 2x2"); auto row_set = gko::IndexSet(this->exec, {0, 1}); @@ -1822,6 +1863,8 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet) { SCOPED_TRACE("Square 4x4"); auto row_set = gko::IndexSet(this->exec, {1, 4, 5, 6}); + // This is unsorted to make sure that the output is correct (sorted) + // even when the input is sorted. auto col_set = gko::IndexSet(this->exec, {4, 3, 0, 1}); auto sub_mat1 = mat->create_submatrix(row_set, col_set); auto ref1 = gko::initialize({I{1.0, 0.0, 7.5, 3.0}, // 1