From c3c0351d95b54a46c924ba6a9155b45c9a733982 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 9 Nov 2021 18:43:58 +0100 Subject: [PATCH] Add dpcpp kernels + tests. --- dpcpp/matrix/csr_kernels.dp.cpp | 88 ++++++++++++++++++++++++++++++- dpcpp/test/matrix/csr_kernels.cpp | 82 ++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 2 deletions(-) diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index 237545e2110..f33282b47b3 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -1625,11 +1625,79 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL); +namespace kernel { + + +template +void calc_nnz_in_span(const span row_span, const span col_span, + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + IndexType* __restrict__ nnz_per_row, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < row_span.end) { + nnz_per_row[tidx - row_span.begin] = zero(); + for (size_type col = row_ptrs[tidx]; col < row_ptrs[tidx + 1]; ++col) { + if (col_idxs[col] >= col_span.begin && + col_idxs[col] < col_span.end) { + nnz_per_row[tidx - row_span.begin]++; + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(calc_nnz_in_span, calc_nnz_in_span); + + +template +void compute_submat(size_type num_rows, size_type num_cols, size_type num_nnz, + size_type row_offset, size_type col_offset, + const IndexType* __restrict__ src_row_ptrs, + const IndexType* __restrict__ src_col_idxs, + const ValueType* __restrict__ src_values, + const IndexType* __restrict__ res_row_ptrs, + IndexType* __restrict__ res_col_idxs, + ValueType* __restrict__ res_values, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < num_rows) { + size_type res_nnz = res_row_ptrs[tidx]; + for (size_type nnz = src_row_ptrs[row_offset + tidx]; + nnz < src_row_ptrs[row_offset + tidx + 1]; ++nnz) { + if ((src_col_idxs[nnz] < (col_offset + num_cols) && + src_col_idxs[nnz] >= col_offset)) { + res_col_idxs[res_nnz] = src_col_idxs[nnz] - col_offset; + res_values[res_nnz] = src_values[nnz]; + res_nnz++; + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(compute_submat, compute_submat); + + +} // namespace kernel + + template void calculate_nonzeros_per_row_in_span( std::shared_ptr exec, const matrix::Csr* source, const span& row_span, - const span& col_span, Array* row_nnz) GKO_NOT_IMPLEMENTED; + const span& col_span, Array* row_nnz) +{ + const auto num_rows = source->get_size()[0]; + auto row_ptrs = source->get_const_row_ptrs(); + auto col_idxs = source->get_const_col_idxs(); + auto grid_dim = ceildiv(row_span.length(), default_block_size); + auto block_dim = default_block_size; + + kernel::calc_nnz_in_span(grid_dim, block_dim, 0, exec->get_queue(), + row_span, col_span, row_ptrs, col_idxs, + row_nnz->get_data()); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); @@ -1640,7 +1708,23 @@ void compute_submatrix(std::shared_ptr exec, const matrix::Csr* source, gko::span row_span, gko::span col_span, matrix::Csr* result) - GKO_NOT_IMPLEMENTED; +{ + const auto row_offset = row_span.begin; + const auto col_offset = col_span.begin; + const auto num_rows = result->get_size()[0]; + const auto num_cols = result->get_size()[1]; + const auto row_ptrs = source->get_const_row_ptrs(); + + const auto num_nnz = source->get_num_stored_elements(); + auto grid_dim = ceildiv(num_rows, default_block_size); + auto block_dim = default_block_size; + kernel::compute_submat( + grid_dim, block_dim, 0, exec->get_queue(), num_rows, num_cols, num_nnz, + row_offset, col_offset, source->get_const_row_ptrs(), + source->get_const_col_idxs(), source->get_const_values(), + result->get_const_row_ptrs(), result->get_col_idxs(), + result->get_values()); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); diff --git a/dpcpp/test/matrix/csr_kernels.cpp b/dpcpp/test/matrix/csr_kernels.cpp index b746d752db6..0ddcf4d84ca 100644 --- a/dpcpp/test/matrix/csr_kernels.cpp +++ b/dpcpp/test/matrix/csr_kernels.cpp @@ -48,6 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/test/utils.hpp" #include "core/test/utils/unsort_matrix.hpp" @@ -102,6 +103,14 @@ class Csr : public ::testing::Test { std::normal_distribution(-1.0, 1.0), rand_engine, ref); } + void set_up_apply_data() + { + mtx2 = Mtx::create(ref); + mtx2->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 5)); + dmtx2 = Mtx::create(dpcpp); + dmtx2->copy_from(mtx2.get()); + } + void set_up_apply_data(std::shared_ptr strategy, int num_vectors = 1) { @@ -156,6 +165,7 @@ class Csr : public ::testing::Test { std::ranlux48 rand_engine; std::unique_ptr mtx; + std::unique_ptr mtx2; std::unique_ptr complex_mtx; std::unique_ptr square_mtx; std::unique_ptr expected; @@ -164,6 +174,7 @@ class Csr : public ::testing::Test { std::unique_ptr beta; std::unique_ptr dmtx; + std::unique_ptr dmtx2; std::unique_ptr complex_dmtx; std::unique_ptr square_dmtx; std::unique_ptr dresult; @@ -930,4 +941,75 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) } +TEST_F(Csr, CalculateNnzPerRowInSpanIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr; + set_up_apply_data(); + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + auto drow_nnz = gko::Array(this->dpcpp, row_nnz); + + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::dpcpp::csr::calculate_nonzeros_per_row_in_span( + this->dpcpp, this->dmtx2.get(), rspan, cspan, &drow_nnz); + + GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +} + + +TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr; + using IndexType = int; + using ValueType = vtype; + set_up_apply_data(); + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + row_nnz.fill(gko::zero()); + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::reference::components::prefix_sum( + this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto num_nnz = row_nnz.get_data()[rspan.length()]; + auto drow_nnz = gko::Array(this->dpcpp, row_nnz); + auto smat1 = + Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->ref, num_nnz)), + std::move(gko::Array(this->ref, num_nnz)), + std::move(row_nnz)); + auto sdmat1 = + Mtx::create(this->dpcpp, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->dpcpp, num_nnz)), + std::move(gko::Array(this->dpcpp, num_nnz)), + std::move(drow_nnz)); + + + gko::kernels::reference::csr::compute_submatrix(this->ref, this->mtx2.get(), + rspan, cspan, smat1.get()); + gko::kernels::dpcpp::csr::compute_submatrix(this->dpcpp, this->dmtx2.get(), + rspan, cspan, sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + +TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr; + set_up_apply_data(); + gko::span rspan{47, 81}; + gko::span cspan{2, 31}; + + auto smat1 = this->mtx2->create_submatrix(rspan, cspan); + auto sdmat1 = this->dmtx2->create_submatrix(rspan, cspan); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + } // namespace