diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc index 7c8cb7f7a5f..853a694ab9c 100644 --- a/common/cuda_hip/matrix/csr_kernels.hpp.inc +++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc @@ -1046,3 +1046,60 @@ __global__ __launch_bounds__(default_block_size) void inv_symm_permute_kernel( out_vals[out_begin + i] = in_vals[in_begin + i]; } } + + +namespace kernel { + + +template +__global__ + __launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals( + const size_type num_rows, const size_type num_cols, + const size_type num_nnz, const size_type row_offset, + const size_type col_offset, + const IndexType* __restrict__ source_row_ptrs, + const IndexType* __restrict__ source_col_idxs, + const ValueType* __restrict__ source_values, + const IndexType* __restrict__ result_row_ptrs, + IndexType* __restrict__ result_col_idxs, + ValueType* __restrict__ result_values) +{ + const auto res_row = thread::get_thread_id_flat(); + if (res_row < num_rows) { + const auto src_row = res_row + row_offset; + auto res_nnz = result_row_ptrs[res_row]; + for (auto nnz = source_row_ptrs[src_row]; + nnz < source_row_ptrs[src_row + 1]; ++nnz) { + const auto res_col = source_col_idxs[nnz] - col_offset; + if (res_col < num_cols && res_col >= 0) { + result_col_idxs[res_nnz] = res_col; + result_values[res_nnz] = source_values[nnz]; + res_nnz++; + } + } + } +} + + +template +__global__ + __launch_bounds__(default_block_size) void calculate_nnz_per_row_in_span( + const span row_span, const span col_span, + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + IndexType* __restrict__ nnz_per_row) +{ + const auto src_row = thread::get_thread_id_flat() + row_span.begin; + if (src_row < row_span.end) { + IndexType nnz{}; + for (auto i = row_ptrs[src_row]; i < row_ptrs[src_row + 1]; ++i) { + if (col_idxs[i] >= col_span.begin && col_idxs[i] < col_span.end) { + nnz++; + } + } + nnz_per_row[src_row - row_span.begin] = nnz; + } +} + + +} // namespace kernel diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 5d55806c8ee..df8cfd05346 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -433,6 +433,8 @@ GKO_STUB_VALUE_AND_INDEX_TYPE( GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); +GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); template GKO_DECLARE_CSR_SCALE_KERNEL(ValueType, IndexType) diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index 1c8ab720900..ae683e11813 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -49,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/absolute_array.hpp" #include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_kernels.hpp" @@ -69,6 +70,9 @@ GKO_REGISTER_OPERATION(convert_to_sellp, csr::convert_to_sellp); GKO_REGISTER_OPERATION(calculate_total_cols, csr::calculate_total_cols); GKO_REGISTER_OPERATION(convert_to_ell, csr::convert_to_ell); GKO_REGISTER_OPERATION(convert_to_hybrid, csr::convert_to_hybrid); +GKO_REGISTER_OPERATION(calculate_nonzeros_per_row_in_span, + csr::calculate_nonzeros_per_row_in_span); +GKO_REGISTER_OPERATION(compute_submatrix, csr::compute_submatrix); GKO_REGISTER_OPERATION(transpose, csr::transpose); GKO_REGISTER_OPERATION(conj_transpose, csr::conj_transpose); GKO_REGISTER_OPERATION(inv_symm_permute, csr::inv_symm_permute); @@ -85,6 +89,7 @@ GKO_REGISTER_OPERATION(is_sorted_by_column_index, csr::is_sorted_by_column_index); GKO_REGISTER_OPERATION(extract_diagonal, csr::extract_diagonal); GKO_REGISTER_OPERATION(fill_array, components::fill_array); +GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum); GKO_REGISTER_OPERATION(inplace_absolute_array, components::inplace_absolute_array); GKO_REGISTER_OPERATION(outplace_absolute_array, @@ -536,6 +541,31 @@ bool Csr::is_sorted_by_column_index() const } +template +std::unique_ptr> +Csr::create_submatrix(const gko::span& row_span, + const gko::span& column_span) const +{ + using Mat = Csr; + auto exec = this->get_executor(); + auto sub_mat_size = gko::dim<2>(row_span.length(), column_span.length()); + Array row_ptrs(exec, row_span.length() + 1); + exec->run(csr::make_calculate_nonzeros_per_row_in_span( + this, row_span, column_span, &row_ptrs)); + exec->run(csr::make_prefix_sum(row_ptrs.get_data(), row_span.length() + 1)); + auto num_nnz = + exec->copy_val_to_host(row_ptrs.get_data() + sub_mat_size[0]); + auto sub_mat = Mat::create(exec, sub_mat_size, + std::move(Array(exec, num_nnz)), + std::move(Array(exec, num_nnz)), + std::move(row_ptrs), this->get_strategy()); + exec->run(csr::make_compute_submatrix(this, row_span, column_span, + sub_mat.get())); + sub_mat->make_srow(); + return sub_mat; +} + + template std::unique_ptr> Csr::extract_diagonal() const diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp index 0e6a7346b7c..13f238e58a7 100644 --- a/core/matrix/csr_kernels.hpp +++ b/core/matrix/csr_kernels.hpp @@ -174,6 +174,18 @@ namespace kernels { const matrix::Csr* source, \ Array* result) +#define GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL(ValueType, IndexType) \ + void calculate_nonzeros_per_row_in_span( \ + std::shared_ptr exec, \ + const matrix::Csr* source, const span& row_span, \ + const span& col_span, Array* row_nnz) + +#define GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType) \ + void compute_submatrix(std::shared_ptr exec, \ + const matrix::Csr* source, \ + gko::span row_span, gko::span col_span, \ + matrix::Csr* result) + #define GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \ void sort_by_column_index(std::shared_ptr exec, \ matrix::Csr* to_sort) @@ -240,6 +252,10 @@ namespace kernels { template \ GKO_DECLARE_CSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType); \ template \ + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType); \ + template \ GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType); \ template \ GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType); \ diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index 00e6c15039d..f7c3d7c385c 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -1240,6 +1240,55 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); +template +void calculate_nonzeros_per_row_in_span( + std::shared_ptr exec, + const matrix::Csr* source, const span& row_span, + const span& col_span, Array* row_nnz) +{ + const auto num_rows = source->get_size()[0]; + auto row_ptrs = source->get_const_row_ptrs(); + auto col_idxs = source->get_const_col_idxs(); + auto grid_dim = ceildiv(row_span.length(), default_block_size); + + kernel::calculate_nnz_per_row_in_span<<>>( + row_span, col_span, as_cuda_type(row_ptrs), as_cuda_type(col_idxs), + as_cuda_type(row_nnz->get_data())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); + + +template +void compute_submatrix(std::shared_ptr exec, + const matrix::Csr* source, + gko::span row_span, gko::span col_span, + matrix::Csr* result) +{ + auto row_offset = row_span.begin; + auto col_offset = col_span.begin; + auto num_rows = result->get_size()[0]; + auto num_cols = result->get_size()[1]; + auto row_ptrs = source->get_const_row_ptrs(); + auto grid_dim = ceildiv(num_rows, default_block_size); + + auto num_nnz = source->get_num_stored_elements(); + grid_dim = ceildiv(num_nnz, default_block_size); + kernel::compute_submatrix_idxs_and_vals<<>>( + num_rows, num_cols, num_nnz, row_offset, col_offset, + as_cuda_type(source->get_const_row_ptrs()), + as_cuda_type(source->get_const_col_idxs()), + as_cuda_type(source->get_const_values()), + as_cuda_type(result->get_const_row_ptrs()), + as_cuda_type(result->get_col_idxs()), + as_cuda_type(result->get_values())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); + + template void convert_to_hybrid(std::shared_ptr exec, const matrix::Csr* source, diff --git a/cuda/test/matrix/csr_kernels.cpp b/cuda/test/matrix/csr_kernels.cpp index 0847ac2235b..26ee15a0ae0 100644 --- a/cuda/test/matrix/csr_kernels.cpp +++ b/cuda/test/matrix/csr_kernels.cpp @@ -51,6 +51,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/test/utils/unsort_matrix.hpp" #include "cuda/test/utils.hpp" @@ -100,6 +101,14 @@ class Csr : public ::testing::Test { std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); } + void set_up_apply_data() + { + mtx2 = Mtx::create(ref); + mtx2->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 5)); + dmtx2 = Mtx::create(cuda); + dmtx2->copy_from(mtx2.get()); + } + void set_up_apply_data(std::shared_ptr strategy, int num_vectors = 1) { @@ -147,6 +156,7 @@ class Csr : public ::testing::Test { dmtx->copy_from(mtx.get()); } + std::shared_ptr ref; std::shared_ptr cuda; @@ -154,6 +164,7 @@ class Csr : public ::testing::Test { std::ranlux48 rand_engine; std::unique_ptr mtx; + std::unique_ptr mtx2; std::unique_ptr complex_mtx; std::unique_ptr square_mtx; std::unique_ptr expected; @@ -162,6 +173,7 @@ class Csr : public ::testing::Test { std::unique_ptr beta; std::unique_ptr dmtx; + std::unique_ptr dmtx2; std::unique_ptr complex_dmtx; std::unique_ptr square_dmtx; std::unique_ptr dresult; @@ -926,4 +938,74 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) } +TEST_F(Csr, CalculateNnzPerRowInSpanIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + auto drow_nnz = gko::Array(this->cuda, row_nnz); + + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::cuda::csr::calculate_nonzeros_per_row_in_span( + this->cuda, this->dmtx2.get(), rspan, cspan, &drow_nnz); + + GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +} + + +TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + using IndexType = int; + using ValueType = double; + set_up_apply_data(); + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::reference::components::prefix_sum( + this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto num_nnz = row_nnz.get_data()[rspan.length()]; + auto drow_nnz = gko::Array(this->cuda, row_nnz); + auto smat1 = + Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->ref, num_nnz)), + std::move(gko::Array(this->ref, num_nnz)), + std::move(row_nnz)); + auto sdmat1 = + Mtx::create(this->cuda, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->cuda, num_nnz)), + std::move(gko::Array(this->cuda, num_nnz)), + std::move(drow_nnz)); + + + gko::kernels::reference::csr::compute_submatrix(this->ref, this->mtx2.get(), + rspan, cspan, smat1.get()); + gko::kernels::cuda::csr::compute_submatrix(this->cuda, this->dmtx2.get(), + rspan, cspan, sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + +TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + gko::span rspan{47, 81}; + gko::span cspan{2, 31}; + + auto smat1 = this->mtx2->create_submatrix(rspan, cspan); + auto sdmat1 = this->dmtx2->create_submatrix(rspan, cspan); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + } // namespace diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index 7435ff35221..e8df9d6c675 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -1625,6 +1625,113 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL); +namespace kernel { + + +template +void calc_nnz_in_span(const span row_span, const span col_span, + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + IndexType* __restrict__ nnz_per_row, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1) + row_span.begin; + if (tidx < row_span.end) { + nnz_per_row[tidx - row_span.begin] = zero(); + for (size_type col = row_ptrs[tidx]; col < row_ptrs[tidx + 1]; ++col) { + if (col_idxs[col] >= col_span.begin && + col_idxs[col] < col_span.end) { + nnz_per_row[tidx - row_span.begin]++; + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(calc_nnz_in_span, calc_nnz_in_span); + + +template +void compute_submatrix_idxs_and_vals(size_type num_rows, size_type num_cols, + size_type num_nnz, size_type row_offset, + size_type col_offset, + const IndexType* __restrict__ src_row_ptrs, + const IndexType* __restrict__ src_col_idxs, + const ValueType* __restrict__ src_values, + const IndexType* __restrict__ res_row_ptrs, + IndexType* __restrict__ res_col_idxs, + ValueType* __restrict__ res_values, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < num_rows) { + size_type res_nnz = res_row_ptrs[tidx]; + for (size_type nnz = src_row_ptrs[row_offset + tidx]; + nnz < src_row_ptrs[row_offset + tidx + 1]; ++nnz) { + if ((src_col_idxs[nnz] < (col_offset + num_cols) && + src_col_idxs[nnz] >= col_offset)) { + res_col_idxs[res_nnz] = src_col_idxs[nnz] - col_offset; + res_values[res_nnz] = src_values[nnz]; + res_nnz++; + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(compute_submatrix_idxs_and_vals, + compute_submatrix_idxs_and_vals); + + +} // namespace kernel + + +template +void calculate_nonzeros_per_row_in_span( + std::shared_ptr exec, + const matrix::Csr* source, const span& row_span, + const span& col_span, Array* row_nnz) +{ + const auto num_rows = source->get_size()[0]; + auto row_ptrs = source->get_const_row_ptrs(); + auto col_idxs = source->get_const_col_idxs(); + auto grid_dim = ceildiv(row_span.length(), default_block_size); + auto block_dim = default_block_size; + + kernel::calc_nnz_in_span(grid_dim, block_dim, 0, exec->get_queue(), + row_span, col_span, row_ptrs, col_idxs, + row_nnz->get_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); + + +template +void compute_submatrix(std::shared_ptr exec, + const matrix::Csr* source, + gko::span row_span, gko::span col_span, + matrix::Csr* result) +{ + const auto row_offset = row_span.begin; + const auto col_offset = col_span.begin; + const auto num_rows = result->get_size()[0]; + const auto num_cols = result->get_size()[1]; + const auto row_ptrs = source->get_const_row_ptrs(); + + const auto num_nnz = source->get_num_stored_elements(); + auto grid_dim = ceildiv(num_rows, default_block_size); + auto block_dim = default_block_size; + kernel::compute_submatrix_idxs_and_vals( + grid_dim, block_dim, 0, exec->get_queue(), num_rows, num_cols, num_nnz, + row_offset, col_offset, source->get_const_row_ptrs(), + source->get_const_col_idxs(), source->get_const_values(), + result->get_const_row_ptrs(), result->get_col_idxs(), + result->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); + + namespace { diff --git a/dpcpp/test/matrix/csr_kernels.cpp b/dpcpp/test/matrix/csr_kernels.cpp index b746d752db6..0ddcf4d84ca 100644 --- a/dpcpp/test/matrix/csr_kernels.cpp +++ b/dpcpp/test/matrix/csr_kernels.cpp @@ -48,6 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/test/utils.hpp" #include "core/test/utils/unsort_matrix.hpp" @@ -102,6 +103,14 @@ class Csr : public ::testing::Test { std::normal_distribution(-1.0, 1.0), rand_engine, ref); } + void set_up_apply_data() + { + mtx2 = Mtx::create(ref); + mtx2->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 5)); + dmtx2 = Mtx::create(dpcpp); + dmtx2->copy_from(mtx2.get()); + } + void set_up_apply_data(std::shared_ptr strategy, int num_vectors = 1) { @@ -156,6 +165,7 @@ class Csr : public ::testing::Test { std::ranlux48 rand_engine; std::unique_ptr mtx; + std::unique_ptr mtx2; std::unique_ptr complex_mtx; std::unique_ptr square_mtx; std::unique_ptr expected; @@ -164,6 +174,7 @@ class Csr : public ::testing::Test { std::unique_ptr beta; std::unique_ptr dmtx; + std::unique_ptr dmtx2; std::unique_ptr complex_dmtx; std::unique_ptr square_dmtx; std::unique_ptr dresult; @@ -930,4 +941,75 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) } +TEST_F(Csr, CalculateNnzPerRowInSpanIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr; + set_up_apply_data(); + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + auto drow_nnz = gko::Array(this->dpcpp, row_nnz); + + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::dpcpp::csr::calculate_nonzeros_per_row_in_span( + this->dpcpp, this->dmtx2.get(), rspan, cspan, &drow_nnz); + + GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +} + + +TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr; + using IndexType = int; + using ValueType = vtype; + set_up_apply_data(); + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + row_nnz.fill(gko::zero()); + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::reference::components::prefix_sum( + this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto num_nnz = row_nnz.get_data()[rspan.length()]; + auto drow_nnz = gko::Array(this->dpcpp, row_nnz); + auto smat1 = + Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->ref, num_nnz)), + std::move(gko::Array(this->ref, num_nnz)), + std::move(row_nnz)); + auto sdmat1 = + Mtx::create(this->dpcpp, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->dpcpp, num_nnz)), + std::move(gko::Array(this->dpcpp, num_nnz)), + std::move(drow_nnz)); + + + gko::kernels::reference::csr::compute_submatrix(this->ref, this->mtx2.get(), + rspan, cspan, smat1.get()); + gko::kernels::dpcpp::csr::compute_submatrix(this->dpcpp, this->dmtx2.get(), + rspan, cspan, sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + +TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr; + set_up_apply_data(); + gko::span rspan{47, 81}; + gko::span cspan{2, 31}; + + auto smat1 = this->mtx2->create_submatrix(rspan, cspan); + auto sdmat1 = this->dmtx2->create_submatrix(rspan, cspan); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + } // namespace diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index be28c3a471b..2fab9bf4e74 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -1082,6 +1082,56 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); +template +void calculate_nonzeros_per_row_in_span( + std::shared_ptr exec, + const matrix::Csr* source, const span& row_span, + const span& col_span, Array* row_nnz) +{ + const auto num_rows = source->get_size()[0]; + auto row_ptrs = source->get_const_row_ptrs(); + auto col_idxs = source->get_const_col_idxs(); + auto grid_dim = ceildiv(row_span.length(), default_block_size); + + hipLaunchKernelGGL(kernel::calculate_nnz_per_row_in_span, dim3(grid_dim), + dim3(default_block_size), 0, 0, row_span, col_span, + as_hip_type(row_ptrs), as_hip_type(col_idxs), + as_hip_type(row_nnz->get_data())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); + + +template +void compute_submatrix(std::shared_ptr exec, + const matrix::Csr* source, + gko::span row_span, gko::span col_span, + matrix::Csr* result) +{ + auto row_offset = row_span.begin; + auto col_offset = col_span.begin; + auto num_rows = result->get_size()[0]; + auto num_cols = result->get_size()[1]; + auto row_ptrs = source->get_const_row_ptrs(); + auto grid_dim = ceildiv(num_rows, default_block_size); + + auto num_nnz = source->get_num_stored_elements(); + grid_dim = ceildiv(num_nnz, default_block_size); + hipLaunchKernelGGL( + kernel::compute_submatrix_idxs_and_vals, dim3(grid_dim), + dim3(default_block_size), 0, 0, num_rows, num_cols, num_nnz, row_offset, + col_offset, as_hip_type(source->get_const_row_ptrs()), + as_hip_type(source->get_const_col_idxs()), + as_hip_type(source->get_const_values()), + as_hip_type(result->get_const_row_ptrs()), + as_hip_type(result->get_col_idxs()), as_hip_type(result->get_values())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); + + template void convert_to_hybrid(std::shared_ptr exec, const matrix::Csr* source, diff --git a/hip/test/matrix/csr_kernels.hip.cpp b/hip/test/matrix/csr_kernels.hip.cpp index 6948d2b7320..311aa01cafd 100644 --- a/hip/test/matrix/csr_kernels.hip.cpp +++ b/hip/test/matrix/csr_kernels.hip.cpp @@ -51,6 +51,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/test/utils/unsort_matrix.hpp" #include "hip/test/utils.hip.hpp" @@ -100,6 +101,14 @@ class Csr : public ::testing::Test { std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); } + void set_up_apply_data() + { + mtx2 = Mtx::create(ref); + mtx2->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 5)); + dmtx2 = Mtx::create(hip); + dmtx2->copy_from(mtx2.get()); + } + void set_up_apply_data(std::shared_ptr strategy, int num_vectors = 1) { @@ -154,6 +163,7 @@ class Csr : public ::testing::Test { std::ranlux48 rand_engine; std::unique_ptr mtx; + std::unique_ptr mtx2; std::unique_ptr complex_mtx; std::unique_ptr square_mtx; std::unique_ptr expected; @@ -162,6 +172,7 @@ class Csr : public ::testing::Test { std::unique_ptr beta; std::unique_ptr dmtx; + std::unique_ptr dmtx2; std::unique_ptr complex_dmtx; std::unique_ptr square_dmtx; std::unique_ptr dresult; @@ -937,4 +948,74 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) } +TEST_F(Csr, CalculateNnzPerRowInSpanIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + auto drow_nnz = gko::Array(this->hip, row_nnz); + + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::hip::csr::calculate_nonzeros_per_row_in_span( + this->hip, this->dmtx2.get(), rspan, cspan, &drow_nnz); + + GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +} + + +TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + using IndexType = int; + using ValueType = double; + set_up_apply_data(); + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::reference::components::prefix_sum( + this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto num_nnz = row_nnz.get_data()[rspan.length()]; + auto drow_nnz = gko::Array(this->hip, row_nnz); + auto smat1 = + Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->ref, num_nnz)), + std::move(gko::Array(this->ref, num_nnz)), + std::move(row_nnz)); + auto sdmat1 = + Mtx::create(this->hip, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->hip, num_nnz)), + std::move(gko::Array(this->hip, num_nnz)), + std::move(drow_nnz)); + + + gko::kernels::reference::csr::compute_submatrix(this->ref, this->mtx2.get(), + rspan, cspan, smat1.get()); + gko::kernels::hip::csr::compute_submatrix(this->hip, this->dmtx2.get(), + rspan, cspan, sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + +TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + gko::span rspan{47, 81}; + gko::span cspan{2, 31}; + + auto smat1 = this->mtx2->create_submatrix(rspan, cspan); + auto sdmat1 = this->dmtx2->create_submatrix(rspan, cspan); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + } // namespace diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index a9c9cc53255..02969dcc94d 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -763,6 +763,9 @@ class Csr : public EnableLinOp>, std::unique_ptr> extract_diagonal() const override; + std::unique_ptr> create_submatrix( + const gko::span& row_span, const gko::span& column_span) const; + std::unique_ptr compute_absolute() const override; void compute_absolute_inplace() override; diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 785a934c7c6..19f16c8b543 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -713,6 +713,63 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); +template +void calculate_nonzeros_per_row_in_span( + std::shared_ptr exec, + const matrix::Csr* source, const span& row_span, + const span& col_span, Array* row_nnz) +{ + const auto row_ptrs = source->get_const_row_ptrs(); + const auto col_idxs = source->get_const_col_idxs(); +#pragma omp parallel for + for (size_type row = row_span.begin; row < row_span.end; ++row) { + row_nnz->get_data()[row - row_span.begin] = zero(); + for (size_type nnz = row_ptrs[row]; nnz < row_ptrs[row + 1]; ++nnz) { + if (col_idxs[nnz] >= col_span.begin && + col_idxs[nnz] < col_span.end) { + row_nnz->get_data()[row - row_span.begin]++; + } + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); + + +template +void compute_submatrix(std::shared_ptr exec, + const matrix::Csr* source, + gko::span row_span, gko::span col_span, + matrix::Csr* result) +{ + auto row_offset = row_span.begin; + auto col_offset = col_span.begin; + auto num_rows = result->get_size()[0]; + auto num_cols = result->get_size()[1]; + const auto row_ptrs = source->get_const_row_ptrs(); + const auto col_idxs = source->get_const_col_idxs(); + const auto values = source->get_const_values(); + auto res_row_ptrs = result->get_row_ptrs(); +#pragma omp parallel for + for (size_type row = 0; row < num_rows; ++row) { + size_type res_nnz = res_row_ptrs[row]; + for (size_type nnz = row_ptrs[row_offset + row]; + nnz < row_ptrs[row_offset + row + 1]; ++nnz) { + const auto local_col = col_idxs[nnz] - col_offset; + if (local_col >= 0 && local_col < num_cols) { + result->get_col_idxs()[res_nnz] = local_col; + result->get_values()[res_nnz] = values[nnz]; + res_nnz++; + } + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); + + template void convert_to_hybrid(std::shared_ptr exec, const matrix::Csr* source, diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index cea56e5e91f..d6712cfa7d6 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -51,6 +51,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/test/utils.hpp" #include "core/test/utils/unsort_matrix.hpp" @@ -106,6 +107,14 @@ class Csr : public ::testing::Test { return gen_mtx(num_rows, num_cols, min_nnz_row, num_cols); } + void set_up_mat_data() + { + mtx2 = Mtx::create(ref); + mtx2->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 5)); + dmtx2 = Mtx::create(omp); + dmtx2->copy_from(mtx2.get()); + } + void set_up_apply_data(int num_vectors = 1) { mtx = gen_mtx(mtx_size[0], mtx_size[1], 1); @@ -158,6 +167,7 @@ class Csr : public ::testing::Test { std::ranlux48 rand_engine; std::unique_ptr mtx; + std::unique_ptr mtx2; std::unique_ptr complex_mtx; std::unique_ptr square_mtx; std::unique_ptr expected; @@ -166,6 +176,7 @@ class Csr : public ::testing::Test { std::unique_ptr beta; std::unique_ptr dmtx; + std::unique_ptr dmtx2; std::unique_ptr complex_dmtx; std::unique_ptr square_dmtx; std::unique_ptr dresult; @@ -700,4 +711,75 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) } +TEST_F(Csr, CalculateNnzPerRowInSpanIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_mat_data(); + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + row_nnz.fill(gko::zero()); + auto drow_nnz = gko::Array(this->omp, row_nnz); + + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::omp::csr::calculate_nonzeros_per_row_in_span( + this->omp, this->dmtx2.get(), rspan, cspan, &drow_nnz); + + GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +} + + +TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + using IndexType = int; + using ValueType = double; + set_up_mat_data(); + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + row_nnz.fill(gko::zero()); + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::reference::components::prefix_sum( + this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto num_nnz = row_nnz.get_data()[rspan.length()]; + auto drow_nnz = gko::Array(this->omp, row_nnz); + auto smat1 = + Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->ref, num_nnz)), + std::move(gko::Array(this->ref, num_nnz)), + std::move(row_nnz)); + auto sdmat1 = + Mtx::create(this->omp, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->omp, num_nnz)), + std::move(gko::Array(this->omp, num_nnz)), + std::move(drow_nnz)); + + + gko::kernels::reference::csr::compute_submatrix(this->ref, this->mtx2.get(), + rspan, cspan, smat1.get()); + gko::kernels::omp::csr::compute_submatrix(this->omp, this->dmtx2.get(), + rspan, cspan, sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + +TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) +{ + set_up_mat_data(); + + gko::span rspan{36, 98}; + gko::span cspan{26, 104}; + auto smat1 = this->mtx2->create_submatrix(rspan, cspan); + auto sdmat1 = this->dmtx2->create_submatrix(rspan, cspan); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + } // namespace diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 9538cdae4af..a8dbefa3c11 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -633,6 +633,64 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); +template +void calculate_nonzeros_per_row_in_span( + std::shared_ptr exec, + const matrix::Csr* source, const span& row_span, + const span& col_span, Array* row_nnz) +{ + size_type res_row = 0; + for (size_type row = row_span.begin; row < row_span.end; ++row) { + row_nnz->get_data()[res_row] = zero(); + for (size_type nnz = source->get_const_row_ptrs()[row]; + nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { + if (source->get_const_col_idxs()[nnz] < col_span.end && + source->get_const_col_idxs()[nnz] >= col_span.begin) { + row_nnz->get_data()[res_row]++; + } + } + res_row++; + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); + + +template +void compute_submatrix(std::shared_ptr exec, + const matrix::Csr* source, + gko::span row_span, gko::span col_span, + matrix::Csr* result) +{ + auto row_offset = row_span.begin; + auto col_offset = col_span.begin; + auto num_rows = result->get_size()[0]; + auto num_cols = result->get_size()[1]; + auto res_row_ptrs = result->get_row_ptrs(); + auto res_col_idxs = result->get_col_idxs(); + auto res_values = result->get_values(); + const auto src_row_ptrs = source->get_const_row_ptrs(); + const auto src_col_idxs = source->get_const_col_idxs(); + const auto src_values = source->get_const_values(); + + size_type res_nnz = 0; + for (size_type nnz = 0; nnz < source->get_num_stored_elements(); ++nnz) { + if (nnz >= src_row_ptrs[row_offset] && + nnz < src_row_ptrs[row_offset + num_rows] && + (src_col_idxs[nnz] < (col_offset + num_cols) && + src_col_idxs[nnz] >= col_offset)) { + res_col_idxs[res_nnz] = src_col_idxs[nnz] - col_offset; + res_values[res_nnz] = src_values[nnz]; + res_nnz++; + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); + + template void convert_to_hybrid(std::shared_ptr exec, const matrix::Csr* source, diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index 1d4a3957084..216a4048e64 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -1624,4 +1624,101 @@ TYPED_TEST(CsrComplex, OutplaceAbsolute) } +TYPED_TEST(Csr, CanGetSubmatrix) +{ + using Vec = typename TestFixture::Vec; + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + /* this->mtx + * 1 3 2 + * 0 5 0 + */ + auto sub_mat = + this->mtx->create_submatrix(gko::span(0, 2), gko::span(0, 2)); + auto ref = + gko::initialize({I{1.0, 3.0}, I{0.0, 5.0}}, this->exec); + + GKO_ASSERT_MTX_NEAR(sub_mat.get(), ref.get(), 0.0); +} + + +TYPED_TEST(Csr, CanGetSubmatrix2) +{ + using Vec = typename TestFixture::Vec; + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto mat = gko::initialize( + { + I{1.0, 3.0, 4.5, 0.0, 2.0}, // 0 + I{1.0, 0.0, 4.5, 7.5, 3.0}, // 1 + I{0.0, 3.0, 4.5, 0.0, 2.0}, // 2 + I{0.0, -1.0, 2.5, 0.0, 2.0}, // 3 + I{1.0, 0.0, -1.0, 3.5, 1.0}, // 4 + I{0.0, 1.0, 0.0, 0.0, 2.0}, // 5 + I{0.0, 3.0, 0.0, 7.5, 1.0} // 6 + }, + this->exec); + ASSERT_EQ(mat->get_num_stored_elements(), 23); + { + auto sub_mat1 = mat->create_submatrix(gko::span(0, 2), gko::span(0, 2)); + auto ref1 = + gko::initialize({I{1.0, 3.0}, I{1.0, 0.0}}, this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } + { + auto sub_mat2 = mat->create_submatrix(gko::span(2, 4), gko::span(0, 2)); + auto ref2 = + gko::initialize({I{0.0, 3.0}, I{0.0, -1.0}}, this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat2.get(), ref2.get(), 0.0); + } + { + auto sub_mat3 = mat->create_submatrix(gko::span(0, 2), gko::span(3, 5)); + auto ref3 = + gko::initialize({I{0.0, 2.0}, I{7.5, 3.0}}, this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat3.get(), ref3.get(), 0.0); + } + { + auto sub_mat4 = mat->create_submatrix(gko::span(1, 6), gko::span(2, 4)); + /* + 4.5, 7.5 + 4.5, 0.0 + 2.5, 0.0 + -1.0, 3.5 + 0.0, 0.0 + */ + auto ref4 = gko::initialize( + {I{4.5, 7.5}, I{4.5, 0.0}, I{2.5, 0.0}, I{-1.0, 3.5}, + I{0.0, 0.0}}, + this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat4.get(), ref4.get(), 0.0); + } + { + auto sub_mat5 = mat->create_submatrix(gko::span(0, 7), gko::span(0, 5)); + auto ref5 = gko::initialize( + { + I{1.0, 3.0, 4.5, 0.0, 2.0}, // 0 + I{1.0, 0.0, 4.5, 7.5, 3.0}, // 1 + I{0.0, 3.0, 4.5, 0.0, 2.0}, // 2 + I{0.0, -1.0, 2.5, 0.0, 2.0}, // 3 + I{1.0, 0.0, -1.0, 3.5, 1.0}, // 4 + I{0.0, 1.0, 0.0, 0.0, 2.0}, // 5 + I{0.0, 3.0, 0.0, 7.5, 1.0} // 6 + }, + this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat5.get(), ref5.get(), 0.0); + } + { + auto sub_mat7 = mat->create_submatrix(gko::span(0, 1), gko::span(0, 1)); + auto ref7 = gko::initialize({I{1.0}}, this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat7.get(), ref7.get(), 0.0); + } +} + + } // namespace