From 77ec16f77d54caa20db5f696e8a6b5aa7bc4092f Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 13 Sep 2021 12:12:56 +0200 Subject: [PATCH 01/14] Add a SubMatrixExtractable interface. --- include/ginkgo/core/base/lin_op.hpp | 14 ++++++++++++++ include/ginkgo/core/matrix/csr.hpp | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/include/ginkgo/core/base/lin_op.hpp b/include/ginkgo/core/base/lin_op.hpp index 540fd6ebad3..a43580c8d81 100644 --- a/include/ginkgo/core/base/lin_op.hpp +++ b/include/ginkgo/core/base/lin_op.hpp @@ -654,6 +654,20 @@ class Preconditionable { }; +/** + * A submatrix of the LinOp implementing this interface can be extracted. + * The row and column spans must be in range of the size of the matrix. + * + * @ingroup LinOp + */ +template +class SubMatrixExtractable { +public: + virtual std::unique_ptr create_submatrix( + const gko::span& row_span, const gko::span& column_span) const = 0; +}; + + /** * The diagonal of a LinOp can be extracted. It will be implemented by * DiagonalExtractable, so the class does not need to implement it. diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index a9c9cc53255..d0ea4a9bfa7 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -127,6 +127,7 @@ class Csr : public EnableLinOp>, public ConvertibleTo>, public ConvertibleTo>, public DiagonalExtractable, + public SubMatrixExtractable>, public ReadableFromMatrixData, public WritableToMatrixData, public Transposable, @@ -763,6 +764,9 @@ class Csr : public EnableLinOp>, std::unique_ptr> extract_diagonal() const override; + std::unique_ptr> create_submatrix( + const gko::span& row_span, const gko::span& column_span) const override; + std::unique_ptr compute_absolute() const override; void compute_absolute_inplace() override; From 30bde4547a1a1b35026d7be7749b2d6003501949 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 13 Sep 2021 12:13:47 +0200 Subject: [PATCH 02/14] Add core code and declare kernels --- core/device_hooks/common_kernels.inc.cpp | 2 ++ core/matrix/csr.cpp | 27 ++++++++++++++++++++++++ core/matrix/csr_kernels.hpp | 17 +++++++++++++++ dpcpp/matrix/csr_kernels.dp.cpp | 21 ++++++++++++++++++ hip/matrix/csr_kernels.hip.cpp | 21 ++++++++++++++++++ 5 files changed, 88 insertions(+) diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 5d55806c8ee..df8cfd05346 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -433,6 +433,8 @@ GKO_STUB_VALUE_AND_INDEX_TYPE( GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); +GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); template GKO_DECLARE_CSR_SCALE_KERNEL(ValueType, IndexType) diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index 1c8ab720900..4e1f712805f 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -49,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/absolute_array.hpp" #include "core/components/fill_array.hpp" +#include "core/components/reduce_array.hpp" #include "core/matrix/csr_kernels.hpp" @@ -69,6 +70,9 @@ GKO_REGISTER_OPERATION(convert_to_sellp, csr::convert_to_sellp); GKO_REGISTER_OPERATION(calculate_total_cols, csr::calculate_total_cols); GKO_REGISTER_OPERATION(convert_to_ell, csr::convert_to_ell); GKO_REGISTER_OPERATION(convert_to_hybrid, csr::convert_to_hybrid); +GKO_REGISTER_OPERATION(calculate_nonzeros_per_row_in_span, + csr::calculate_nonzeros_per_row_in_span); +GKO_REGISTER_OPERATION(compute_submatrix, csr::compute_submatrix); GKO_REGISTER_OPERATION(transpose, csr::transpose); GKO_REGISTER_OPERATION(conj_transpose, csr::conj_transpose); GKO_REGISTER_OPERATION(inv_symm_permute, csr::inv_symm_permute); @@ -85,6 +89,7 @@ GKO_REGISTER_OPERATION(is_sorted_by_column_index, csr::is_sorted_by_column_index); GKO_REGISTER_OPERATION(extract_diagonal, csr::extract_diagonal); GKO_REGISTER_OPERATION(fill_array, components::fill_array); +GKO_REGISTER_OPERATION(reduce_array, components::reduce_array); GKO_REGISTER_OPERATION(inplace_absolute_array, components::inplace_absolute_array); GKO_REGISTER_OPERATION(outplace_absolute_array, @@ -536,6 +541,28 @@ bool Csr::is_sorted_by_column_index() const } +template +std::unique_ptr> +Csr::create_submatrix(const gko::span& row_span, + const gko::span& column_span) const +{ + using Mat = Csr; + auto exec = this->get_executor(); + auto sub_mat_size = gko::dim<2>(row_span.length(), column_span.length()); + Array row_nnz(exec, row_span.length()); + exec->run(csr::make_fill_array(row_nnz.get_data(), row_nnz.get_num_elems(), + zero())); + exec->run(csr::make_calculate_nonzeros_per_row_in_span( + this, row_span, column_span, &row_nnz)); + auto sub_mat_nnz = reduce(row_nnz); + auto sub_mat = + Mat::create(exec, sub_mat_size, sub_mat_nnz, this->get_strategy()); + exec->run(csr::make_compute_submatrix(this, &row_nnz, row_span, column_span, + sub_mat.get())); + return sub_mat; +} + + template std::unique_ptr> Csr::extract_diagonal() const diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp index 0e6a7346b7c..c3428c4bbf3 100644 --- a/core/matrix/csr_kernels.hpp +++ b/core/matrix/csr_kernels.hpp @@ -174,6 +174,19 @@ namespace kernels { const matrix::Csr* source, \ Array* result) +#define GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL(ValueType, IndexType) \ + void calculate_nonzeros_per_row_in_span( \ + std::shared_ptr exec, \ + const matrix::Csr* source, const span& row_span, \ + const span& col_span, Array* row_nnz) + +#define GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType) \ + void compute_submatrix(std::shared_ptr exec, \ + const matrix::Csr* source, \ + const Array* row_nnz, \ + gko::span row_span, gko::span col_span, \ + matrix::Csr* result) + #define GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \ void sort_by_column_index(std::shared_ptr exec, \ matrix::Csr* to_sort) @@ -240,6 +253,10 @@ namespace kernels { template \ GKO_DECLARE_CSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType); \ template \ + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType); \ + template \ GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType); \ template \ GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType); \ diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index 7435ff35221..bb4d554387f 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -795,6 +795,27 @@ void calculate_nnz_per_row(size_type num_rows, GKO_ENABLE_DEFAULT_HOST(calculate_nnz_per_row, calculate_nnz_per_row); +template +void calculate_nonzeros_per_row_in_span( + std::shared_ptr exec, + const matrix::Csr* source, const span& row_span, + const span& col_span, Array* row_nnz) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); + + +template +void compute_submatrix( + std::shared_ptr exec, + const matrix::Csr* source, + const Array* row_nnz, gko::span row_span, gko::span col_span, + matrix::Csr* result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); + + void calculate_slice_lengths(size_type num_rows, size_type slice_size, size_type stride_factor, const size_type* __restrict__ nnz_per_row, diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index be28c3a471b..3646a40b2bc 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -1082,6 +1082,27 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); +template +void calculate_nonzeros_per_row_in_span( + std::shared_ptr exec, + const matrix::Csr* source, const span& row_span, + const span& col_span, Array* row_nnz) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); + + +template +void compute_submatrix( + std::shared_ptr exec, + const matrix::Csr* source, + const Array* row_nnz, gko::span row_span, gko::span col_span, + matrix::Csr* result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); + + template void convert_to_hybrid(std::shared_ptr exec, const matrix::Csr* source, From 9cc20303f96a9e97382829bb31bb822f9573087b Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 13 Sep 2021 12:15:53 +0200 Subject: [PATCH 03/14] Add reference, omp kernels, tests --- omp/matrix/csr_kernels.cpp | 62 ++++++++ omp/test/matrix/CMakeLists.txt | 1 + omp/test/matrix/csr_kernels2.cpp | 140 ++++++++++++++++++ reference/matrix/csr_kernels.cpp | 56 +++++++ reference/test/matrix/CMakeLists.txt | 1 + reference/test/matrix/csr_kernels2.cpp | 195 +++++++++++++++++++++++++ 6 files changed, 455 insertions(+) create mode 100644 omp/test/matrix/csr_kernels2.cpp create mode 100644 reference/test/matrix/csr_kernels2.cpp diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 785a934c7c6..5eccbaeb803 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -713,6 +713,68 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); +template +void calculate_nonzeros_per_row_in_span( + std::shared_ptr exec, + const matrix::Csr* source, const span& row_span, + const span& col_span, Array* row_nnz) +{ + const auto row_ptrs = source->get_const_row_ptrs(); + const auto col_idxs = source->get_const_col_idxs(); +#pragma omp parallel for + for (size_type row = row_span.begin; row < row_span.end; ++row) { + for (size_type col = row_ptrs[row]; col < row_ptrs[row + 1]; ++col) { + if (col_idxs[col] >= col_span.begin && + col_idxs[col] < col_span.end) { + row_nnz->get_data()[row - row_span.begin]++; + } + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); + + +template +void compute_submatrix(std::shared_ptr exec, + const matrix::Csr* source, + const Array* row_nnz, gko::span row_span, + gko::span col_span, + matrix::Csr* result) +{ + auto row_offset = row_span.begin; + auto col_offset = col_span.begin; + auto num_rows = result->get_size()[0]; + auto num_cols = result->get_size()[1]; + const auto row_ptrs = source->get_const_row_ptrs(); + const auto col_idxs = source->get_const_col_idxs(); + const auto values = source->get_const_values(); + auto res_row_ptrs = result->get_row_ptrs(); +#pragma omp parallel for + for (size_type row = 0; row < num_rows; ++row) { + res_row_ptrs[row] = row_nnz->get_const_data()[row]; + } + components::prefix_sum(exec, res_row_ptrs, num_rows + 1); +#pragma omp parallel for + for (size_type row = 0; row < num_rows; ++row) { + size_type res_nnz = res_row_ptrs[row]; + for (size_type nnz = row_ptrs[row_offset + row]; + nnz < row_ptrs[row_offset + row + 1]; ++nnz) { + if ((col_idxs[nnz] < (col_offset + num_cols) && + col_idxs[nnz] >= col_offset)) { + result->get_col_idxs()[res_nnz] = col_idxs[nnz] - col_offset; + result->get_values()[res_nnz] = values[nnz]; + res_nnz++; + } + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); + + template void convert_to_hybrid(std::shared_ptr exec, const matrix::Csr* source, diff --git a/omp/test/matrix/CMakeLists.txt b/omp/test/matrix/CMakeLists.txt index d2843b7e459..7198ddf87ea 100644 --- a/omp/test/matrix/CMakeLists.txt +++ b/omp/test/matrix/CMakeLists.txt @@ -1,5 +1,6 @@ ginkgo_create_test(coo_kernels) ginkgo_create_test(csr_kernels) +ginkgo_create_test(csr_kernels2) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) diff --git a/omp/test/matrix/csr_kernels2.cpp b/omp/test/matrix/csr_kernels2.cpp new file mode 100644 index 00000000000..e6b8bb999ce --- /dev/null +++ b/omp/test/matrix/csr_kernels2.cpp @@ -0,0 +1,140 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/csr_kernels.hpp" +#include "core/test/utils.hpp" +#include "core/test/utils/unsort_matrix.hpp" + + +namespace { + + +class Csr : public ::testing::Test { +protected: + using Arr = gko::Array; + using Mtx = gko::matrix::Csr<>; + using Vec = gko::matrix::Dense<>; + using ComplexVec = gko::matrix::Dense>; + using ComplexMtx = gko::matrix::Csr>; + + Csr() +#ifdef GINKGO_FAST_TESTS + : mtx_size(152, 185), +#else + : mtx_size(532, 231), +#endif + rand_engine(42) + {} + + void SetUp() + { + ref = gko::ReferenceExecutor::create(); + omp = gko::OmpExecutor::create(); + } + + void TearDown() + { + if (omp != nullptr) { + ASSERT_NO_THROW(omp->synchronize()); + } + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row, int max_nnz_row) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(min_nnz_row, max_nnz_row), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row) + { + return gen_mtx(num_rows, num_cols, min_nnz_row, num_cols); + } + + void set_up_apply_data(int num_vectors = 1) + { + mtx = Mtx::create(ref); + mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 1)); + dmtx = Mtx::create(omp); + dmtx->copy_from(mtx.get()); + } + + std::shared_ptr ref; + std::shared_ptr omp; + + const gko::dim<2> mtx_size; + std::ranlux48 rand_engine; + std::unique_ptr mtx; + std::unique_ptr dmtx; +}; + + +TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + gko::span rspan{36, 98}; + gko::span cspan{26, 104}; + auto smat1 = this->mtx->create_submatrix(rspan, cspan); + auto sdmat1 = this->dmtx->create_submatrix(rspan, cspan); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 1e-14); +} + + +} // namespace diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 9538cdae4af..3fe3148ff5d 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -633,6 +633,62 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); +template +void calculate_nonzeros_per_row_in_span( + std::shared_ptr exec, + const matrix::Csr* source, const span& row_span, + const span& col_span, Array* row_nnz) +{ + size_type res_row = 0; + for (size_type row = row_span.begin; row < row_span.end; ++row) { + for (size_type col = source->get_const_row_ptrs()[row]; + col < source->get_const_row_ptrs()[row + 1]; ++col) { + if (source->get_const_col_idxs()[col] >= col_span.begin && + source->get_const_col_idxs()[col] < col_span.end) { + row_nnz->get_data()[res_row]++; + } + } + res_row++; + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); + + +template +void compute_submatrix(std::shared_ptr exec, + const matrix::Csr* source, + const Array* row_nnz, gko::span row_span, + gko::span col_span, + matrix::Csr* result) +{ + auto row_offset = row_span.begin; + auto col_offset = col_span.begin; + auto num_rows = result->get_size()[0]; + auto num_cols = result->get_size()[1]; + for (size_type row = 0; row < num_rows; ++row) { + result->get_row_ptrs()[row] = row_nnz->get_const_data()[row]; + } + components::prefix_sum(exec, result->get_row_ptrs(), num_rows + 1); + size_type res_nnz = 0; + for (size_type nnz = 0; nnz < source->get_num_stored_elements(); ++nnz) { + if (nnz >= source->get_const_row_ptrs()[row_offset] && + nnz < source->get_const_row_ptrs()[row_offset + num_rows] && + (source->get_const_col_idxs()[nnz] < (col_offset + num_cols) && + source->get_const_col_idxs()[nnz] >= col_offset)) { + result->get_col_idxs()[res_nnz] = + source->get_const_col_idxs()[nnz] - col_offset; + result->get_values()[res_nnz] = source->get_const_values()[nnz]; + res_nnz++; + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); + + template void convert_to_hybrid(std::shared_ptr exec, const matrix::Csr* source, diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt index 9670a5df80c..977146062a7 100644 --- a/reference/test/matrix/CMakeLists.txt +++ b/reference/test/matrix/CMakeLists.txt @@ -1,5 +1,6 @@ ginkgo_create_test(coo_kernels) ginkgo_create_test(csr_kernels) +ginkgo_create_test(csr_kernels2) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) diff --git a/reference/test/matrix/csr_kernels2.cpp b/reference/test/matrix/csr_kernels2.cpp new file mode 100644 index 00000000000..3206ffcde4c --- /dev/null +++ b/reference/test/matrix/csr_kernels2.cpp @@ -0,0 +1,195 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/csr_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +template +class Csr : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using Coo = gko::matrix::Coo; + using Mtx = gko::matrix::Csr; + using Sellp = gko::matrix::Sellp; + using SparsityCsr = gko::matrix::SparsityCsr; + using Ell = gko::matrix::Ell; + using Hybrid = gko::matrix::Hybrid; + using Vec = gko::matrix::Dense; + using MixedVec = gko::matrix::Dense>; + + Csr() + : exec(gko::ReferenceExecutor::create()), + mtx(Mtx::create(exec, gko::dim<2>{2, 3}, 4, + std::make_shared(2))) + { + this->create_mtx(mtx.get()); + } + + void create_mtx(Mtx* m) + { + value_type* v = m->get_values(); + index_type* c = m->get_col_idxs(); + index_type* r = m->get_row_ptrs(); + auto* s = m->get_srow(); + /* + * 1 3 2 + * 0 5 0 + */ + r[0] = 0; + r[1] = 3; + r[2] = 4; + c[0] = 0; + c[1] = 1; + c[2] = 2; + c[3] = 1; + v[0] = 1.0; + v[1] = 3.0; + v[2] = 2.0; + v[3] = 5.0; + s[0] = 0; + } + + std::shared_ptr exec; + std::unique_ptr mtx; +}; + +TYPED_TEST_SUITE(Csr, gko::test::ValueIndexTypes); + + +TYPED_TEST(Csr, CanGetSubmatrix) +{ + using Vec = typename TestFixture::Vec; + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + /* this->mtx + * 1 3 2 + * 0 5 0 + */ + auto sub_mat = + this->mtx->create_submatrix(gko::span(0, 2), gko::span(0, 2)); + auto ref = + gko::initialize({I{1.0, 3.0}, I{0.0, 5.0}}, this->exec); + + GKO_ASSERT_MTX_NEAR(sub_mat.get(), ref.get(), 0.0); +} + + +TYPED_TEST(Csr, CanGetSubmatrix2) +{ + using Vec = typename TestFixture::Vec; + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto mat = gko::initialize( + { + // clang-format off + I{1.0, 3.0, 4.5, 0.0, 2.0}, // 0 + I{1.0, 0.0, 4.5, 7.5, 3.0}, // 1 + I{0.0, 3.0, 4.5, 0.0, 2.0}, // 2 + I{0.0,-1.0, 2.5, 0.0, 2.0}, // 3 + I{1.0, 0.0,-1.0, 3.5, 1.0}, // 4 + I{0.0, 1.0, 0.0, 0.0, 2.0}, // 5 + I{0.0, 3.0, 0.0, 7.5, 1.0} // 6 + // clang-format on + }, + this->exec); + ASSERT_EQ(mat->get_num_stored_elements(), 23); + { + auto sub_mat1 = mat->create_submatrix(gko::span(0, 2), gko::span(0, 2)); + auto ref1 = + gko::initialize({I{1.0, 3.0}, I{1.0, 0.0}}, this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } + { + auto sub_mat2 = mat->create_submatrix(gko::span(2, 4), gko::span(0, 2)); + auto ref2 = + gko::initialize({I{0.0, 3.0}, I{0.0, -1.0}}, this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat2.get(), ref2.get(), 0.0); + } + { + auto sub_mat3 = mat->create_submatrix(gko::span(0, 2), gko::span(3, 5)); + auto ref3 = + gko::initialize({I{0.0, 2.0}, I{7.5, 3.0}}, this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat3.get(), ref3.get(), 0.0); + } + { + auto sub_mat4 = mat->create_submatrix(gko::span(1, 6), gko::span(2, 4)); + /* + 4.5, 7.5 + 4.5, 0.0 + 2.5, 0.0 + 1.0, 3.5 + 0.0, 0.0 + */ + auto ref4 = gko::initialize( + {I{4.5, 7.5}, I{4.5, 0.0}, I{2.5, 0.0}, I{-1.0, 3.5}, + I{0.0, 0.0}}, + this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat4.get(), ref4.get(), 0.0); + } +} + + +} // namespace From be0a777007bb8af07f45f823e41e192047bf93a8 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 13 Sep 2021 12:18:36 +0200 Subject: [PATCH 04/14] Add CUDA kernels, tests --- common/cuda_hip/matrix/csr_kernels.hpp.inc | 67 ++++++++++ cuda/matrix/csr_kernels.cu | 54 ++++++++ cuda/test/matrix/CMakeLists.txt | 1 + cuda/test/matrix/csr_kernels2.cpp | 139 +++++++++++++++++++++ dpcpp/matrix/csr_kernels.dp.cpp | 42 +++---- omp/test/matrix/csr_kernels2.cpp | 4 +- reference/test/matrix/csr_kernels2.cpp | 4 +- 7 files changed, 284 insertions(+), 27 deletions(-) create mode 100644 cuda/test/matrix/csr_kernels2.cpp diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc index 7c8cb7f7a5f..35a18d36aa7 100644 --- a/common/cuda_hip/matrix/csr_kernels.hpp.inc +++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc @@ -1046,3 +1046,70 @@ __global__ __launch_bounds__(default_block_size) void inv_symm_permute_kernel( out_vals[out_begin + i] = in_vals[in_begin + i]; } } + + +namespace kernel { + + +template +__global__ __launch_bounds__(default_block_size) void get_row_nnz_data( + const size_type num_rows, const size_type* __restrict__ nnz_per_row, + IndexType* __restrict__ row_ptrs) +{ + const auto tidx = thread::get_thread_id_flat(); + if (tidx < num_rows) { + row_ptrs[tidx] = nnz_per_row[tidx]; + } +} + + +template +__global__ + __launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals( + const size_type num_rows, const size_type num_cols, + const size_type num_nnz, const size_type row_offset, + const size_type col_offset, + const IndexType* __restrict__ source_row_ptrs, + const IndexType* __restrict__ source_col_idxs, + const ValueType* __restrict__ source_values, + const IndexType* __restrict__ result_row_ptrs, + IndexType* __restrict__ result_col_idxs, + ValueType* __restrict__ result_values) +{ + const auto tidx = thread::get_thread_id_flat(); + if (tidx < num_rows) { + size_type res_nnz = result_row_ptrs[tidx]; + for (size_type nnz = source_row_ptrs[row_offset + tidx]; + nnz < source_row_ptrs[row_offset + tidx + 1]; ++nnz) { + if ((source_col_idxs[nnz] < (col_offset + num_cols) && + source_col_idxs[nnz] >= col_offset)) { + result_col_idxs[res_nnz] = source_col_idxs[nnz] - col_offset; + result_values[res_nnz] = source_values[nnz]; + res_nnz++; + } + } + } +} + + +template +__global__ + __launch_bounds__(default_block_size) void calculate_nnz_per_row_in_span( + const span row_span, const span col_span, + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + size_type* __restrict__ nnz_per_row) +{ + const auto tidx = thread::get_thread_id_flat(); + if (tidx >= row_span.begin && tidx < row_span.end) { + for (size_type col = row_ptrs[tidx]; col < row_ptrs[tidx + 1]; ++col) { + if (col_idxs[col] >= col_span.begin && + col_idxs[col] < col_span.end) { + nnz_per_row[tidx - row_span.begin]++; + } + } + } +} + + +} // namespace kernel diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index 00e6c15039d..42807b50926 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -1240,6 +1240,60 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); +template +void calculate_nonzeros_per_row_in_span( + std::shared_ptr exec, + const matrix::Csr* source, const span& row_span, + const span& col_span, Array* row_nnz) +{ + const auto num_rows = source->get_size()[0]; + auto row_ptrs = source->get_const_row_ptrs(); + auto col_idxs = source->get_const_col_idxs(); + auto grid_dim = ceildiv(num_rows, default_block_size); + + kernel::calculate_nnz_per_row_in_span<<>>( + row_span, col_span, as_cuda_type(row_ptrs), as_cuda_type(col_idxs), + as_cuda_type(row_nnz->get_data())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); + + +template +void compute_submatrix(std::shared_ptr exec, + const matrix::Csr* source, + const Array* row_nnz, gko::span row_span, + gko::span col_span, + matrix::Csr* result) +{ + auto row_offset = row_span.begin; + auto col_offset = col_span.begin; + auto num_rows = result->get_size()[0]; + auto num_cols = result->get_size()[1]; + auto row_ptrs = source->get_const_row_ptrs(); + auto grid_dim = ceildiv(num_rows, default_block_size); + kernel::get_row_nnz_data<<>>( + num_rows, as_cuda_type(row_nnz->get_const_data()), + as_cuda_type(result->get_row_ptrs())); + components::prefix_sum(exec, result->get_row_ptrs(), num_rows + 1); + + auto num_nnz = source->get_num_stored_elements(); + grid_dim = ceildiv(num_nnz, default_block_size); + kernel::compute_submatrix_idxs_and_vals<<>>( + num_rows, num_cols, num_nnz, row_offset, col_offset, + as_cuda_type(source->get_const_row_ptrs()), + as_cuda_type(source->get_const_col_idxs()), + as_cuda_type(source->get_const_values()), + as_cuda_type(result->get_const_row_ptrs()), + as_cuda_type(result->get_col_idxs()), + as_cuda_type(result->get_values())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); + + template void convert_to_hybrid(std::shared_ptr exec, const matrix::Csr* source, diff --git a/cuda/test/matrix/CMakeLists.txt b/cuda/test/matrix/CMakeLists.txt index be2ebd7ef03..fa0e20c1b74 100644 --- a/cuda/test/matrix/CMakeLists.txt +++ b/cuda/test/matrix/CMakeLists.txt @@ -1,5 +1,6 @@ ginkgo_create_test(coo_kernels) ginkgo_create_test(csr_kernels) +ginkgo_create_test(csr_kernels2) ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) diff --git a/cuda/test/matrix/csr_kernels2.cpp b/cuda/test/matrix/csr_kernels2.cpp new file mode 100644 index 00000000000..dde23eaaa2d --- /dev/null +++ b/cuda/test/matrix/csr_kernels2.cpp @@ -0,0 +1,139 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/csr_kernels.hpp" +#include "core/test/utils.hpp" +#include "core/test/utils/unsort_matrix.hpp" + + +namespace { + + +class Csr : public ::testing::Test { +protected: + using Arr = gko::Array; + using Mtx = gko::matrix::Csr<>; + using Vec = gko::matrix::Dense<>; + + Csr() +#ifdef GINKGO_FAST_TESTS + : mtx_size(152, 185), +#else + : mtx_size(532, 231), +#endif + rand_engine(42) + {} + + void SetUp() + { + ref = gko::ReferenceExecutor::create(); + cuda = gko::CudaExecutor::create(0, ref); + } + + void TearDown() + { + if (cuda != nullptr) { + ASSERT_NO_THROW(cuda->synchronize()); + } + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row, int max_nnz_row) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(min_nnz_row, max_nnz_row), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row) + { + return gen_mtx(num_rows, num_cols, min_nnz_row, num_cols); + } + + void set_up_apply_data(int num_vectors = 1) + { + mtx = Mtx::create(ref); + mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 1)); + dmtx = Mtx::create(cuda); + dmtx->copy_from(mtx.get()); + } + + std::shared_ptr ref; + std::shared_ptr cuda; + + const gko::dim<2> mtx_size; + std::ranlux48 rand_engine; + std::unique_ptr mtx; + std::unique_ptr dmtx; +}; + + +TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + + gko::span rspan{36, 98}; + gko::span cspan{26, 104}; + auto smat1 = this->mtx->create_submatrix(rspan, cspan); + auto sdmat1 = this->dmtx->create_submatrix(rspan, cspan); + auto hmat = Mtx::create(this->ref); + hmat->copy_from(sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(hmat, smat1, 1e-14); +} + + +} // namespace diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index bb4d554387f..83dda90a9c4 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -795,27 +795,6 @@ void calculate_nnz_per_row(size_type num_rows, GKO_ENABLE_DEFAULT_HOST(calculate_nnz_per_row, calculate_nnz_per_row); -template -void calculate_nonzeros_per_row_in_span( - std::shared_ptr exec, - const matrix::Csr* source, const span& row_span, - const span& col_span, Array* row_nnz) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); - - -template -void compute_submatrix( - std::shared_ptr exec, - const matrix::Csr* source, - const Array* row_nnz, gko::span row_span, gko::span col_span, - matrix::Csr* result) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); - - void calculate_slice_lengths(size_type num_rows, size_type slice_size, size_type stride_factor, const size_type* __restrict__ nnz_per_row, @@ -1646,6 +1625,27 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL); +template +void calculate_nonzeros_per_row_in_span( + std::shared_ptr exec, + const matrix::Csr* source, const span& row_span, + const span& col_span, Array* row_nnz) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); + + +template +void compute_submatrix( + std::shared_ptr exec, + const matrix::Csr* source, + const Array* row_nnz, gko::span row_span, gko::span col_span, + matrix::Csr* result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); + + namespace { diff --git a/omp/test/matrix/csr_kernels2.cpp b/omp/test/matrix/csr_kernels2.cpp index e6b8bb999ce..d3dfa65067a 100644 --- a/omp/test/matrix/csr_kernels2.cpp +++ b/omp/test/matrix/csr_kernels2.cpp @@ -30,9 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include - - #include #include #include @@ -45,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include diff --git a/reference/test/matrix/csr_kernels2.cpp b/reference/test/matrix/csr_kernels2.cpp index 3206ffcde4c..84d1a5ca307 100644 --- a/reference/test/matrix/csr_kernels2.cpp +++ b/reference/test/matrix/csr_kernels2.cpp @@ -30,9 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include - - #include @@ -44,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include From f7ae6d06a2f536d0060e1fbd911a5bc77ce3bdaf Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 4 Oct 2021 17:44:13 +0200 Subject: [PATCH 05/14] Review udpate Co-authored-by: Tobias Ribizel --- common/cuda_hip/matrix/csr_kernels.hpp.inc | 4 +-- core/matrix/csr.cpp | 21 +++++++++------- core/matrix/csr_kernels.hpp | 3 +-- cuda/matrix/csr_kernels.cu | 11 +++----- cuda/test/matrix/csr_kernels2.cpp | 2 +- dpcpp/matrix/csr_kernels.dp.cpp | 12 ++++----- omp/matrix/csr_kernels.cpp | 16 ++++-------- omp/test/matrix/csr_kernels2.cpp | 2 +- reference/matrix/csr_kernels.cpp | 29 +++++++++++----------- 9 files changed, 46 insertions(+), 54 deletions(-) diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc index 35a18d36aa7..6f9501f01a7 100644 --- a/common/cuda_hip/matrix/csr_kernels.hpp.inc +++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc @@ -1100,8 +1100,8 @@ __global__ const IndexType* __restrict__ col_idxs, size_type* __restrict__ nnz_per_row) { - const auto tidx = thread::get_thread_id_flat(); - if (tidx >= row_span.begin && tidx < row_span.end) { + const auto tidx = thread::get_thread_id_flat() + row_span.begin; + if (tidx < row_span.end) { for (size_type col = row_ptrs[tidx]; col < row_ptrs[tidx + 1]; ++col) { if (col_idxs[col] >= col_span.begin && col_idxs[col] < col_span.end) { diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index 4e1f712805f..22d918d9833 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -49,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/absolute_array.hpp" #include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" #include "core/components/reduce_array.hpp" #include "core/matrix/csr_kernels.hpp" @@ -89,7 +90,8 @@ GKO_REGISTER_OPERATION(is_sorted_by_column_index, csr::is_sorted_by_column_index); GKO_REGISTER_OPERATION(extract_diagonal, csr::extract_diagonal); GKO_REGISTER_OPERATION(fill_array, components::fill_array); -GKO_REGISTER_OPERATION(reduce_array, components::reduce_array); +GKO_REGISTER_OPERATION(reduce_add_array, components::reduce_add_array); +GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum); GKO_REGISTER_OPERATION(inplace_absolute_array, components::inplace_absolute_array); GKO_REGISTER_OPERATION(outplace_absolute_array, @@ -549,16 +551,17 @@ Csr::create_submatrix(const gko::span& row_span, using Mat = Csr; auto exec = this->get_executor(); auto sub_mat_size = gko::dim<2>(row_span.length(), column_span.length()); - Array row_nnz(exec, row_span.length()); - exec->run(csr::make_fill_array(row_nnz.get_data(), row_nnz.get_num_elems(), - zero())); + Array row_ptrs(exec, row_span.length() + 1); + exec->run(csr::make_fill_array( + row_ptrs.get_data(), row_ptrs.get_num_elems() + 1, zero())); exec->run(csr::make_calculate_nonzeros_per_row_in_span( - this, row_span, column_span, &row_nnz)); - auto sub_mat_nnz = reduce(row_nnz); - auto sub_mat = - Mat::create(exec, sub_mat_size, sub_mat_nnz, this->get_strategy()); - exec->run(csr::make_compute_submatrix(this, &row_nnz, row_span, column_span, + this, row_span, column_span, &row_ptrs)); + exec->run(csr::make_prefix_sum(row_ptrs.get_data(), row_span.length() + 1)); + auto sub_mat = Mat::create(exec, sub_mat_size, std::move(row_ptrs), + this->get_strategy()); + exec->run(csr::make_compute_submatrix(this, row_span, column_span, sub_mat.get())); + sub_mat->make_srow(); return sub_mat; } diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp index c3428c4bbf3..13f238e58a7 100644 --- a/core/matrix/csr_kernels.hpp +++ b/core/matrix/csr_kernels.hpp @@ -178,12 +178,11 @@ namespace kernels { void calculate_nonzeros_per_row_in_span( \ std::shared_ptr exec, \ const matrix::Csr* source, const span& row_span, \ - const span& col_span, Array* row_nnz) + const span& col_span, Array* row_nnz) #define GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType) \ void compute_submatrix(std::shared_ptr exec, \ const matrix::Csr* source, \ - const Array* row_nnz, \ gko::span row_span, gko::span col_span, \ matrix::Csr* result) diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index 42807b50926..f7c3d7c385c 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -1244,12 +1244,12 @@ template void calculate_nonzeros_per_row_in_span( std::shared_ptr exec, const matrix::Csr* source, const span& row_span, - const span& col_span, Array* row_nnz) + const span& col_span, Array* row_nnz) { const auto num_rows = source->get_size()[0]; auto row_ptrs = source->get_const_row_ptrs(); auto col_idxs = source->get_const_col_idxs(); - auto grid_dim = ceildiv(num_rows, default_block_size); + auto grid_dim = ceildiv(row_span.length(), default_block_size); kernel::calculate_nnz_per_row_in_span<<>>( row_span, col_span, as_cuda_type(row_ptrs), as_cuda_type(col_idxs), @@ -1263,8 +1263,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_submatrix(std::shared_ptr exec, const matrix::Csr* source, - const Array* row_nnz, gko::span row_span, - gko::span col_span, + gko::span row_span, gko::span col_span, matrix::Csr* result) { auto row_offset = row_span.begin; @@ -1273,10 +1272,6 @@ void compute_submatrix(std::shared_ptr exec, auto num_cols = result->get_size()[1]; auto row_ptrs = source->get_const_row_ptrs(); auto grid_dim = ceildiv(num_rows, default_block_size); - kernel::get_row_nnz_data<<>>( - num_rows, as_cuda_type(row_nnz->get_const_data()), - as_cuda_type(result->get_row_ptrs())); - components::prefix_sum(exec, result->get_row_ptrs(), num_rows + 1); auto num_nnz = source->get_num_stored_elements(); grid_dim = ceildiv(num_nnz, default_block_size); diff --git a/cuda/test/matrix/csr_kernels2.cpp b/cuda/test/matrix/csr_kernels2.cpp index dde23eaaa2d..6e9673ea79c 100644 --- a/cuda/test/matrix/csr_kernels2.cpp +++ b/cuda/test/matrix/csr_kernels2.cpp @@ -132,7 +132,7 @@ TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) auto hmat = Mtx::create(this->ref); hmat->copy_from(sdmat1.get()); - GKO_ASSERT_MTX_NEAR(hmat, smat1, 1e-14); + GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); } diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index 83dda90a9c4..237545e2110 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -1629,18 +1629,18 @@ template void calculate_nonzeros_per_row_in_span( std::shared_ptr exec, const matrix::Csr* source, const span& row_span, - const span& col_span, Array* row_nnz) GKO_NOT_IMPLEMENTED; + const span& col_span, Array* row_nnz) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); template -void compute_submatrix( - std::shared_ptr exec, - const matrix::Csr* source, - const Array* row_nnz, gko::span row_span, gko::span col_span, - matrix::Csr* result) GKO_NOT_IMPLEMENTED; +void compute_submatrix(std::shared_ptr exec, + const matrix::Csr* source, + gko::span row_span, gko::span col_span, + matrix::Csr* result) + GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 5eccbaeb803..64f10a63f04 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -717,7 +717,7 @@ template void calculate_nonzeros_per_row_in_span( std::shared_ptr exec, const matrix::Csr* source, const span& row_span, - const span& col_span, Array* row_nnz) + const span& col_span, Array* row_nnz) { const auto row_ptrs = source->get_const_row_ptrs(); const auto col_idxs = source->get_const_col_idxs(); @@ -739,8 +739,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_submatrix(std::shared_ptr exec, const matrix::Csr* source, - const Array* row_nnz, gko::span row_span, - gko::span col_span, + gko::span row_span, gko::span col_span, matrix::Csr* result) { auto row_offset = row_span.begin; @@ -751,19 +750,14 @@ void compute_submatrix(std::shared_ptr exec, const auto col_idxs = source->get_const_col_idxs(); const auto values = source->get_const_values(); auto res_row_ptrs = result->get_row_ptrs(); -#pragma omp parallel for - for (size_type row = 0; row < num_rows; ++row) { - res_row_ptrs[row] = row_nnz->get_const_data()[row]; - } - components::prefix_sum(exec, res_row_ptrs, num_rows + 1); #pragma omp parallel for for (size_type row = 0; row < num_rows; ++row) { size_type res_nnz = res_row_ptrs[row]; for (size_type nnz = row_ptrs[row_offset + row]; nnz < row_ptrs[row_offset + row + 1]; ++nnz) { - if ((col_idxs[nnz] < (col_offset + num_cols) && - col_idxs[nnz] >= col_offset)) { - result->get_col_idxs()[res_nnz] = col_idxs[nnz] - col_offset; + const auto local_col = col_idxs[nnz] - col_offset; + if (local_col >= 0 && local_col < num_cols) { + result->get_col_idxs()[res_nnz] = local_col; result->get_values()[res_nnz] = values[nnz]; res_nnz++; } diff --git a/omp/test/matrix/csr_kernels2.cpp b/omp/test/matrix/csr_kernels2.cpp index d3dfa65067a..c3dd39f2fb9 100644 --- a/omp/test/matrix/csr_kernels2.cpp +++ b/omp/test/matrix/csr_kernels2.cpp @@ -131,7 +131,7 @@ TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) auto smat1 = this->mtx->create_submatrix(rspan, cspan); auto sdmat1 = this->dmtx->create_submatrix(rspan, cspan); - GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 1e-14); + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); } diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 3fe3148ff5d..72420a5fa18 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -637,7 +637,7 @@ template void calculate_nonzeros_per_row_in_span( std::shared_ptr exec, const matrix::Csr* source, const span& row_span, - const span& col_span, Array* row_nnz) + const span& col_span, Array* row_nnz) { size_type res_row = 0; for (size_type row = row_span.begin; row < row_span.end; ++row) { @@ -659,27 +659,28 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_submatrix(std::shared_ptr exec, const matrix::Csr* source, - const Array* row_nnz, gko::span row_span, - gko::span col_span, + gko::span row_span, gko::span col_span, matrix::Csr* result) { auto row_offset = row_span.begin; auto col_offset = col_span.begin; auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; - for (size_type row = 0; row < num_rows; ++row) { - result->get_row_ptrs()[row] = row_nnz->get_const_data()[row]; - } - components::prefix_sum(exec, result->get_row_ptrs(), num_rows + 1); + auto res_row_ptrs = result->get_row_ptrs(); + auto res_col_idxs = result->get_col_idxs(); + auto res_values = result->get_values(); + const auto src_row_ptrs = source->get_const_row_ptrs(); + const auto src_col_idxs = source->get_const_col_idxs(); + const auto src_values = source->get_const_values(); + size_type res_nnz = 0; for (size_type nnz = 0; nnz < source->get_num_stored_elements(); ++nnz) { - if (nnz >= source->get_const_row_ptrs()[row_offset] && - nnz < source->get_const_row_ptrs()[row_offset + num_rows] && - (source->get_const_col_idxs()[nnz] < (col_offset + num_cols) && - source->get_const_col_idxs()[nnz] >= col_offset)) { - result->get_col_idxs()[res_nnz] = - source->get_const_col_idxs()[nnz] - col_offset; - result->get_values()[res_nnz] = source->get_const_values()[nnz]; + if (nnz >= src_row_ptrs[row_offset] && + nnz < src_row_ptrs[row_offset + num_rows] && + (src_col_idxs[nnz] < (col_offset + num_cols) && + src_col_idxs[nnz] >= col_offset)) { + res_col_idxs[res_nnz] = src_col_idxs[nnz] - col_offset; + res_values[res_nnz] = src_values[nnz]; res_nnz++; } } From 18958b1e62fcfdae125b583994ec7b1f25bcea73 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 19 Oct 2021 16:42:25 +0200 Subject: [PATCH 06/14] Add a CSR constructor with only row_ptrs --- include/ginkgo/core/matrix/csr.hpp | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index d0ea4a9bfa7..72882f18469 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -985,6 +985,37 @@ class Csr : public EnableLinOp>, strategy_(strategy->copy()) {} + /** + * Creates a CSR matrix from already allocated (and initialized) row + * pointer array. + * + * @tparam RowPtrsArray type of `row_ptrs` array + * + * @param exec Executor associated to the matrix + * @param size size of the matrix + * @param row_ptrs array of row pointers + * + * @note If `row_ptrs`, is not an rvalue, not + * an array of IndexType, IndexType and ValueType, respectively, or + * is on the wrong executor, an internal copy of that array will be + * created, and the original array data will not be used in the + * matrix. + */ + explicit Csr( + std::shared_ptr exec, const dim<2>& size, + Array&& row_ptrs, + std::shared_ptr strategy = std::make_shared()) + : EnableLinOp(exec, size), + row_ptrs_{exec, std::forward>(row_ptrs)}, + srow_(exec), + strategy_(strategy->copy()) + { + auto num_nnz = exec->copy_val_to_host(row_ptrs_.get_data() + size[0]); + values_ = Array(exec, num_nnz); + col_idxs_ = Array(exec, num_nnz); + this->make_srow(); + } + /** * Creates a CSR matrix from already allocated (and initialized) row * pointer, column index and value arrays. From b2ab9936d4108e3578c7fa2dfde43761db3eef18 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 19 Oct 2021 16:42:56 +0200 Subject: [PATCH 07/14] Update CUDA/HIP kernels and add hip tests --- common/cuda_hip/matrix/csr_kernels.hpp.inc | 4 +- core/matrix/csr.cpp | 2 +- hip/matrix/csr_kernels.hip.cpp | 41 +++++- hip/test/matrix/CMakeLists.txt | 1 + hip/test/matrix/csr_kernels2.hip.cpp | 139 +++++++++++++++++++++ include/ginkgo/core/matrix/csr.hpp | 10 +- reference/test/matrix/csr_kernels2.cpp | 2 +- 7 files changed, 181 insertions(+), 18 deletions(-) create mode 100644 hip/test/matrix/csr_kernels2.hip.cpp diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc index 6f9501f01a7..d01c9cc4f09 100644 --- a/common/cuda_hip/matrix/csr_kernels.hpp.inc +++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc @@ -1053,7 +1053,7 @@ namespace kernel { template __global__ __launch_bounds__(default_block_size) void get_row_nnz_data( - const size_type num_rows, const size_type* __restrict__ nnz_per_row, + const size_type num_rows, const IndexType* __restrict__ nnz_per_row, IndexType* __restrict__ row_ptrs) { const auto tidx = thread::get_thread_id_flat(); @@ -1098,7 +1098,7 @@ __global__ const span row_span, const span col_span, const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ col_idxs, - size_type* __restrict__ nnz_per_row) + IndexType* __restrict__ nnz_per_row) { const auto tidx = thread::get_thread_id_flat() + row_span.begin; if (tidx < row_span.end) { diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index 22d918d9833..0e015baabcb 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -553,7 +553,7 @@ Csr::create_submatrix(const gko::span& row_span, auto sub_mat_size = gko::dim<2>(row_span.length(), column_span.length()); Array row_ptrs(exec, row_span.length() + 1); exec->run(csr::make_fill_array( - row_ptrs.get_data(), row_ptrs.get_num_elems() + 1, zero())); + row_ptrs.get_data(), row_ptrs.get_num_elems(), zero())); exec->run(csr::make_calculate_nonzeros_per_row_in_span( this, row_span, column_span, &row_ptrs)); exec->run(csr::make_prefix_sum(row_ptrs.get_data(), row_span.length() + 1)); diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index 3646a40b2bc..2fab9bf4e74 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -1086,18 +1086,47 @@ template void calculate_nonzeros_per_row_in_span( std::shared_ptr exec, const matrix::Csr* source, const span& row_span, - const span& col_span, Array* row_nnz) GKO_NOT_IMPLEMENTED; + const span& col_span, Array* row_nnz) +{ + const auto num_rows = source->get_size()[0]; + auto row_ptrs = source->get_const_row_ptrs(); + auto col_idxs = source->get_const_col_idxs(); + auto grid_dim = ceildiv(row_span.length(), default_block_size); + + hipLaunchKernelGGL(kernel::calculate_nnz_per_row_in_span, dim3(grid_dim), + dim3(default_block_size), 0, 0, row_span, col_span, + as_hip_type(row_ptrs), as_hip_type(col_idxs), + as_hip_type(row_nnz->get_data())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); template -void compute_submatrix( - std::shared_ptr exec, - const matrix::Csr* source, - const Array* row_nnz, gko::span row_span, gko::span col_span, - matrix::Csr* result) GKO_NOT_IMPLEMENTED; +void compute_submatrix(std::shared_ptr exec, + const matrix::Csr* source, + gko::span row_span, gko::span col_span, + matrix::Csr* result) +{ + auto row_offset = row_span.begin; + auto col_offset = col_span.begin; + auto num_rows = result->get_size()[0]; + auto num_cols = result->get_size()[1]; + auto row_ptrs = source->get_const_row_ptrs(); + auto grid_dim = ceildiv(num_rows, default_block_size); + + auto num_nnz = source->get_num_stored_elements(); + grid_dim = ceildiv(num_nnz, default_block_size); + hipLaunchKernelGGL( + kernel::compute_submatrix_idxs_and_vals, dim3(grid_dim), + dim3(default_block_size), 0, 0, num_rows, num_cols, num_nnz, row_offset, + col_offset, as_hip_type(source->get_const_row_ptrs()), + as_hip_type(source->get_const_col_idxs()), + as_hip_type(source->get_const_values()), + as_hip_type(result->get_const_row_ptrs()), + as_hip_type(result->get_col_idxs()), as_hip_type(result->get_values())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); diff --git a/hip/test/matrix/CMakeLists.txt b/hip/test/matrix/CMakeLists.txt index 1759cd1ed28..ecc6df25b17 100644 --- a/hip/test/matrix/CMakeLists.txt +++ b/hip/test/matrix/CMakeLists.txt @@ -1,5 +1,6 @@ ginkgo_create_hip_test(coo_kernels) ginkgo_create_hip_test(csr_kernels) +ginkgo_create_hip_test(csr_kernels2) ginkgo_create_hip_test(dense_kernels) ginkgo_create_hip_test(diagonal_kernels) ginkgo_create_hip_test(ell_kernels) diff --git a/hip/test/matrix/csr_kernels2.hip.cpp b/hip/test/matrix/csr_kernels2.hip.cpp new file mode 100644 index 00000000000..d12759936a4 --- /dev/null +++ b/hip/test/matrix/csr_kernels2.hip.cpp @@ -0,0 +1,139 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/csr_kernels.hpp" +#include "core/test/utils.hpp" +#include "core/test/utils/unsort_matrix.hpp" + + +namespace { + + +class Csr : public ::testing::Test { +protected: + using Arr = gko::Array; + using Mtx = gko::matrix::Csr<>; + using Vec = gko::matrix::Dense<>; + + Csr() +#ifdef GINKGO_FAST_TESTS + : mtx_size(152, 185), +#else + : mtx_size(532, 231), +#endif + rand_engine(42) + {} + + void SetUp() + { + ref = gko::ReferenceExecutor::create(); + hip = gko::HipExecutor::create(0, ref); + } + + void TearDown() + { + if (hip != nullptr) { + ASSERT_NO_THROW(hip->synchronize()); + } + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row, int max_nnz_row) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(min_nnz_row, max_nnz_row), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row) + { + return gen_mtx(num_rows, num_cols, min_nnz_row, num_cols); + } + + void set_up_apply_data(int num_vectors = 1) + { + mtx = Mtx::create(ref); + mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 1)); + dmtx = Mtx::create(hip); + dmtx->copy_from(mtx.get()); + } + + std::shared_ptr ref; + std::shared_ptr hip; + + const gko::dim<2> mtx_size; + std::ranlux48 rand_engine; + std::unique_ptr mtx; + std::unique_ptr dmtx; +}; + + +TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + + gko::span rspan{36, 98}; + gko::span cspan{26, 104}; + auto smat1 = this->mtx->create_submatrix(rspan, cspan); + auto sdmat1 = this->dmtx->create_submatrix(rspan, cspan); + auto hmat = Mtx::create(this->ref); + hmat->copy_from(sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); +} + + +} // namespace diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index 72882f18469..0b8a023b8ea 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -989,27 +989,21 @@ class Csr : public EnableLinOp>, * Creates a CSR matrix from already allocated (and initialized) row * pointer array. * - * @tparam RowPtrsArray type of `row_ptrs` array * * @param exec Executor associated to the matrix * @param size size of the matrix * @param row_ptrs array of row pointers - * - * @note If `row_ptrs`, is not an rvalue, not - * an array of IndexType, IndexType and ValueType, respectively, or - * is on the wrong executor, an internal copy of that array will be - * created, and the original array data will not be used in the - * matrix. */ explicit Csr( std::shared_ptr exec, const dim<2>& size, Array&& row_ptrs, std::shared_ptr strategy = std::make_shared()) : EnableLinOp(exec, size), - row_ptrs_{exec, std::forward>(row_ptrs)}, + row_ptrs_{exec, std::move(row_ptrs)}, srow_(exec), strategy_(strategy->copy()) { + GKO_ASSERT(row_ptrs_.get_num_elems() == size[0] + 1); auto num_nnz = exec->copy_val_to_host(row_ptrs_.get_data() + size[0]); values_ = Array(exec, num_nnz); col_idxs_ = Array(exec, num_nnz); diff --git a/reference/test/matrix/csr_kernels2.cpp b/reference/test/matrix/csr_kernels2.cpp index 84d1a5ca307..1682287d857 100644 --- a/reference/test/matrix/csr_kernels2.cpp +++ b/reference/test/matrix/csr_kernels2.cpp @@ -177,7 +177,7 @@ TYPED_TEST(Csr, CanGetSubmatrix2) 4.5, 7.5 4.5, 0.0 2.5, 0.0 - 1.0, 3.5 + -1.0, 3.5 0.0, 0.0 */ auto ref4 = gko::initialize( From 1d8f3a69bffb6cef7a800f9b0417aa860a192e9e Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 2 Nov 2021 13:41:02 +0100 Subject: [PATCH 08/14] Review updates. Co-authored-by: Aditya Kashi --- common/cuda_hip/matrix/csr_kernels.hpp.inc | 13 +---- cuda/test/matrix/csr_kernels2.cpp | 66 ++++++++++++++++++++-- include/ginkgo/core/matrix/csr.hpp | 1 - reference/matrix/csr_kernels.cpp | 4 +- 4 files changed, 64 insertions(+), 20 deletions(-) diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc index d01c9cc4f09..41257c40de9 100644 --- a/common/cuda_hip/matrix/csr_kernels.hpp.inc +++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc @@ -1051,18 +1051,6 @@ __global__ __launch_bounds__(default_block_size) void inv_symm_permute_kernel( namespace kernel { -template -__global__ __launch_bounds__(default_block_size) void get_row_nnz_data( - const size_type num_rows, const IndexType* __restrict__ nnz_per_row, - IndexType* __restrict__ row_ptrs) -{ - const auto tidx = thread::get_thread_id_flat(); - if (tidx < num_rows) { - row_ptrs[tidx] = nnz_per_row[tidx]; - } -} - - template __global__ __launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals( @@ -1077,6 +1065,7 @@ __global__ ValueType* __restrict__ result_values) { const auto tidx = thread::get_thread_id_flat(); + // for (int tidx = threadIdx.x; tidx < num_rows; tidx += blockDim.x) { if (tidx < num_rows) { size_type res_nnz = result_row_ptrs[tidx]; for (size_type nnz = source_row_ptrs[row_offset + tidx]; diff --git a/cuda/test/matrix/csr_kernels2.cpp b/cuda/test/matrix/csr_kernels2.cpp index 6e9673ea79c..0497ca8892a 100644 --- a/cuda/test/matrix/csr_kernels2.cpp +++ b/cuda/test/matrix/csr_kernels2.cpp @@ -49,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/test/utils.hpp" #include "core/test/utils/unsort_matrix.hpp" @@ -65,7 +66,7 @@ class Csr : public ::testing::Test { Csr() #ifdef GINKGO_FAST_TESTS - : mtx_size(152, 185), + : mtx_size(131, 155), #else : mtx_size(532, 231), #endif @@ -102,10 +103,10 @@ class Csr : public ::testing::Test { return gen_mtx(num_rows, num_cols, min_nnz_row, num_cols); } - void set_up_apply_data(int num_vectors = 1) + void set_up_apply_data() { mtx = Mtx::create(ref); - mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 1)); + mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 5)); dmtx = Mtx::create(cuda); dmtx->copy_from(mtx.get()); } @@ -120,13 +121,68 @@ class Csr : public ::testing::Test { }; +TEST_F(Csr, CalculateNnzPerRowIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + row_nnz.fill(gko::zero()); + auto drow_nnz = gko::Array(this->cuda, row_nnz); + + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx.get(), rspan, cspan, &row_nnz); + gko::kernels::cuda::csr::calculate_nonzeros_per_row_in_span( + this->cuda, this->dmtx.get(), rspan, cspan, &drow_nnz); + + GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +} + + +TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + row_nnz.fill(gko::zero()); + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx.get(), rspan, cspan, &row_nnz); + gko::kernels::reference::components::prefix_sum( + this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto drow_nnz = gko::Array(this->cuda, row_nnz); + auto smat1 = + Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), + std::move(row_nnz)); + auto sdmat1 = + Mtx::create(this->cuda, gko::dim<2>(rspan.length(), cspan.length()), + std::move(drow_nnz)); + + + gko::kernels::reference::csr::compute_submatrix(this->ref, this->mtx.get(), + rspan, cspan, smat1.get()); + gko::kernels::cuda::csr::compute_submatrix(this->cuda, this->dmtx.get(), + rspan, cspan, sdmat1.get()); + auto hmat = Mtx::create(this->ref); + hmat->copy_from(sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); +} + + TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) { using Mtx = gko::matrix::Csr<>; set_up_apply_data(); - gko::span rspan{36, 98}; - gko::span cspan{26, 104}; + gko::span rspan{47, 81}; + gko::span cspan{2, 31}; auto smat1 = this->mtx->create_submatrix(rspan, cspan); auto sdmat1 = this->dmtx->create_submatrix(rspan, cspan); auto hmat = Mtx::create(this->ref); diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index 0b8a023b8ea..cfd8ab8dbe2 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -989,7 +989,6 @@ class Csr : public EnableLinOp>, * Creates a CSR matrix from already allocated (and initialized) row * pointer array. * - * * @param exec Executor associated to the matrix * @param size size of the matrix * @param row_ptrs array of row pointers diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 72420a5fa18..dd5a269bc3a 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -643,8 +643,8 @@ void calculate_nonzeros_per_row_in_span( for (size_type row = row_span.begin; row < row_span.end; ++row) { for (size_type col = source->get_const_row_ptrs()[row]; col < source->get_const_row_ptrs()[row + 1]; ++col) { - if (source->get_const_col_idxs()[col] >= col_span.begin && - source->get_const_col_idxs()[col] < col_span.end) { + if (source->get_const_col_idxs()[col] < col_span.end && + source->get_const_col_idxs()[col] >= col_span.begin) { row_nnz->get_data()[res_row]++; } } From 476f74fd8756a58dd39e70014eedd3e95b7deb2c Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 2 Nov 2021 14:22:19 +0100 Subject: [PATCH 09/14] Move tests back to csr_kernels --- cuda/test/matrix/CMakeLists.txt | 1 - cuda/test/matrix/csr_kernels.cpp | 83 +++++++++++ cuda/test/matrix/csr_kernels2.cpp | 195 ------------------------- hip/test/matrix/CMakeLists.txt | 1 - hip/test/matrix/csr_kernels.hip.cpp | 82 +++++++++++ hip/test/matrix/csr_kernels2.hip.cpp | 139 ------------------ omp/test/matrix/CMakeLists.txt | 1 - omp/test/matrix/csr_kernels.cpp | 79 ++++++++++ omp/test/matrix/csr_kernels2.cpp | 138 ----------------- reference/test/matrix/CMakeLists.txt | 1 - reference/test/matrix/csr_kernels.cpp | 77 ++++++++++ reference/test/matrix/csr_kernels2.cpp | 193 ------------------------ 12 files changed, 321 insertions(+), 669 deletions(-) delete mode 100644 cuda/test/matrix/csr_kernels2.cpp delete mode 100644 hip/test/matrix/csr_kernels2.hip.cpp delete mode 100644 omp/test/matrix/csr_kernels2.cpp delete mode 100644 reference/test/matrix/csr_kernels2.cpp diff --git a/cuda/test/matrix/CMakeLists.txt b/cuda/test/matrix/CMakeLists.txt index fa0e20c1b74..be2ebd7ef03 100644 --- a/cuda/test/matrix/CMakeLists.txt +++ b/cuda/test/matrix/CMakeLists.txt @@ -1,6 +1,5 @@ ginkgo_create_test(coo_kernels) ginkgo_create_test(csr_kernels) -ginkgo_create_test(csr_kernels2) ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) diff --git a/cuda/test/matrix/csr_kernels.cpp b/cuda/test/matrix/csr_kernels.cpp index 0847ac2235b..a6f6843b71f 100644 --- a/cuda/test/matrix/csr_kernels.cpp +++ b/cuda/test/matrix/csr_kernels.cpp @@ -51,6 +51,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/test/utils/unsort_matrix.hpp" #include "cuda/test/utils.hpp" @@ -100,6 +101,14 @@ class Csr : public ::testing::Test { std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); } + void set_up_apply_data() + { + mtx2 = Mtx::create(ref); + mtx2->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 5)); + dmtx2 = Mtx::create(cuda); + dmtx2->copy_from(mtx2.get()); + } + void set_up_apply_data(std::shared_ptr strategy, int num_vectors = 1) { @@ -147,6 +156,7 @@ class Csr : public ::testing::Test { dmtx->copy_from(mtx.get()); } + std::shared_ptr ref; std::shared_ptr cuda; @@ -154,6 +164,7 @@ class Csr : public ::testing::Test { std::ranlux48 rand_engine; std::unique_ptr mtx; + std::unique_ptr mtx2; std::unique_ptr complex_mtx; std::unique_ptr square_mtx; std::unique_ptr expected; @@ -162,6 +173,7 @@ class Csr : public ::testing::Test { std::unique_ptr beta; std::unique_ptr dmtx; + std::unique_ptr dmtx2; std::unique_ptr complex_dmtx; std::unique_ptr square_dmtx; std::unique_ptr dresult; @@ -926,4 +938,75 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) } +TEST_F(Csr, CalculateNnzPerRowIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + row_nnz.fill(gko::zero()); + auto drow_nnz = gko::Array(this->cuda, row_nnz); + + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::cuda::csr::calculate_nonzeros_per_row_in_span( + this->cuda, this->dmtx2.get(), rspan, cspan, &drow_nnz); + + GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +} + + +TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + row_nnz.fill(gko::zero()); + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::reference::components::prefix_sum( + this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto drow_nnz = gko::Array(this->cuda, row_nnz); + auto smat1 = + Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), + std::move(row_nnz)); + auto sdmat1 = + Mtx::create(this->cuda, gko::dim<2>(rspan.length(), cspan.length()), + std::move(drow_nnz)); + + + gko::kernels::reference::csr::compute_submatrix(this->ref, this->mtx2.get(), + rspan, cspan, smat1.get()); + gko::kernels::cuda::csr::compute_submatrix(this->cuda, this->dmtx2.get(), + rspan, cspan, sdmat1.get()); + auto hmat = Mtx::create(this->ref); + hmat->copy_from(sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); +} + + +TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + + gko::span rspan{47, 81}; + gko::span cspan{2, 31}; + auto smat1 = this->mtx2->create_submatrix(rspan, cspan); + auto sdmat1 = this->dmtx2->create_submatrix(rspan, cspan); + auto hmat = Mtx::create(this->ref); + hmat->copy_from(sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); +} + + } // namespace diff --git a/cuda/test/matrix/csr_kernels2.cpp b/cuda/test/matrix/csr_kernels2.cpp deleted file mode 100644 index 0497ca8892a..00000000000 --- a/cuda/test/matrix/csr_kernels2.cpp +++ /dev/null @@ -1,195 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include -#include -#include -#include - - -#include - - -#include -#include -#include -#include -#include -#include -#include -#include - - -#include "core/components/prefix_sum.hpp" -#include "core/matrix/csr_kernels.hpp" -#include "core/test/utils.hpp" -#include "core/test/utils/unsort_matrix.hpp" - - -namespace { - - -class Csr : public ::testing::Test { -protected: - using Arr = gko::Array; - using Mtx = gko::matrix::Csr<>; - using Vec = gko::matrix::Dense<>; - - Csr() -#ifdef GINKGO_FAST_TESTS - : mtx_size(131, 155), -#else - : mtx_size(532, 231), -#endif - rand_engine(42) - {} - - void SetUp() - { - ref = gko::ReferenceExecutor::create(); - cuda = gko::CudaExecutor::create(0, ref); - } - - void TearDown() - { - if (cuda != nullptr) { - ASSERT_NO_THROW(cuda->synchronize()); - } - } - - template - std::unique_ptr gen_mtx(int num_rows, int num_cols, - int min_nnz_row, int max_nnz_row) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(min_nnz_row, max_nnz_row), - std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); - } - - template - std::unique_ptr gen_mtx(int num_rows, int num_cols, - int min_nnz_row) - { - return gen_mtx(num_rows, num_cols, min_nnz_row, num_cols); - } - - void set_up_apply_data() - { - mtx = Mtx::create(ref); - mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 5)); - dmtx = Mtx::create(cuda); - dmtx->copy_from(mtx.get()); - } - - std::shared_ptr ref; - std::shared_ptr cuda; - - const gko::dim<2> mtx_size; - std::ranlux48 rand_engine; - std::unique_ptr mtx; - std::unique_ptr dmtx; -}; - - -TEST_F(Csr, CalculateNnzPerRowIsEquivalentToRef) -{ - using Mtx = gko::matrix::Csr<>; - set_up_apply_data(); - - gko::span rspan{7, 51}; - gko::span cspan{22, 88}; - auto size = this->mtx->get_size(); - auto row_nnz = gko::Array(this->ref, rspan.length() + 1); - row_nnz.fill(gko::zero()); - auto drow_nnz = gko::Array(this->cuda, row_nnz); - - gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( - this->ref, this->mtx.get(), rspan, cspan, &row_nnz); - gko::kernels::cuda::csr::calculate_nonzeros_per_row_in_span( - this->cuda, this->dmtx.get(), rspan, cspan, &drow_nnz); - - GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); -} - - -TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) -{ - using Mtx = gko::matrix::Csr<>; - set_up_apply_data(); - - gko::span rspan{7, 51}; - gko::span cspan{22, 88}; - auto size = this->mtx->get_size(); - auto row_nnz = gko::Array(this->ref, rspan.length() + 1); - row_nnz.fill(gko::zero()); - gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( - this->ref, this->mtx.get(), rspan, cspan, &row_nnz); - gko::kernels::reference::components::prefix_sum( - this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); - auto drow_nnz = gko::Array(this->cuda, row_nnz); - auto smat1 = - Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), - std::move(row_nnz)); - auto sdmat1 = - Mtx::create(this->cuda, gko::dim<2>(rspan.length(), cspan.length()), - std::move(drow_nnz)); - - - gko::kernels::reference::csr::compute_submatrix(this->ref, this->mtx.get(), - rspan, cspan, smat1.get()); - gko::kernels::cuda::csr::compute_submatrix(this->cuda, this->dmtx.get(), - rspan, cspan, sdmat1.get()); - auto hmat = Mtx::create(this->ref); - hmat->copy_from(sdmat1.get()); - - GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); -} - - -TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) -{ - using Mtx = gko::matrix::Csr<>; - set_up_apply_data(); - - gko::span rspan{47, 81}; - gko::span cspan{2, 31}; - auto smat1 = this->mtx->create_submatrix(rspan, cspan); - auto sdmat1 = this->dmtx->create_submatrix(rspan, cspan); - auto hmat = Mtx::create(this->ref); - hmat->copy_from(sdmat1.get()); - - GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); -} - - -} // namespace diff --git a/hip/test/matrix/CMakeLists.txt b/hip/test/matrix/CMakeLists.txt index ecc6df25b17..1759cd1ed28 100644 --- a/hip/test/matrix/CMakeLists.txt +++ b/hip/test/matrix/CMakeLists.txt @@ -1,6 +1,5 @@ ginkgo_create_hip_test(coo_kernels) ginkgo_create_hip_test(csr_kernels) -ginkgo_create_hip_test(csr_kernels2) ginkgo_create_hip_test(dense_kernels) ginkgo_create_hip_test(diagonal_kernels) ginkgo_create_hip_test(ell_kernels) diff --git a/hip/test/matrix/csr_kernels.hip.cpp b/hip/test/matrix/csr_kernels.hip.cpp index 6948d2b7320..332f87e3837 100644 --- a/hip/test/matrix/csr_kernels.hip.cpp +++ b/hip/test/matrix/csr_kernels.hip.cpp @@ -51,6 +51,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/test/utils/unsort_matrix.hpp" #include "hip/test/utils.hip.hpp" @@ -100,6 +101,14 @@ class Csr : public ::testing::Test { std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); } + void set_up_apply_data() + { + mtx2 = Mtx::create(ref); + mtx2->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 5)); + dmtx2 = Mtx::create(hip); + dmtx2->copy_from(mtx2.get()); + } + void set_up_apply_data(std::shared_ptr strategy, int num_vectors = 1) { @@ -154,6 +163,7 @@ class Csr : public ::testing::Test { std::ranlux48 rand_engine; std::unique_ptr mtx; + std::unique_ptr mtx2; std::unique_ptr complex_mtx; std::unique_ptr square_mtx; std::unique_ptr expected; @@ -162,6 +172,7 @@ class Csr : public ::testing::Test { std::unique_ptr beta; std::unique_ptr dmtx; + std::unique_ptr dmtx2; std::unique_ptr complex_dmtx; std::unique_ptr square_dmtx; std::unique_ptr dresult; @@ -937,4 +948,75 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) } +TEST_F(Csr, CalculateNnzPerRowIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + row_nnz.fill(gko::zero()); + auto drow_nnz = gko::Array(this->hip, row_nnz); + + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::hip::csr::calculate_nonzeros_per_row_in_span( + this->hip, this->dmtx2.get(), rspan, cspan, &drow_nnz); + + GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +} + + +TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + row_nnz.fill(gko::zero()); + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::reference::components::prefix_sum( + this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto drow_nnz = gko::Array(this->hip, row_nnz); + auto smat1 = + Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), + std::move(row_nnz)); + auto sdmat1 = + Mtx::create(this->hip, gko::dim<2>(rspan.length(), cspan.length()), + std::move(drow_nnz)); + + + gko::kernels::reference::csr::compute_submatrix(this->ref, this->mtx2.get(), + rspan, cspan, smat1.get()); + gko::kernels::hip::csr::compute_submatrix(this->hip, this->dmtx2.get(), + rspan, cspan, sdmat1.get()); + auto hmat = Mtx::create(this->ref); + hmat->copy_from(sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); +} + + +TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_apply_data(); + + gko::span rspan{47, 81}; + gko::span cspan{2, 31}; + auto smat1 = this->mtx2->create_submatrix(rspan, cspan); + auto sdmat1 = this->dmtx2->create_submatrix(rspan, cspan); + auto hmat = Mtx::create(this->ref); + hmat->copy_from(sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); +} + + } // namespace diff --git a/hip/test/matrix/csr_kernels2.hip.cpp b/hip/test/matrix/csr_kernels2.hip.cpp deleted file mode 100644 index d12759936a4..00000000000 --- a/hip/test/matrix/csr_kernels2.hip.cpp +++ /dev/null @@ -1,139 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include -#include -#include -#include - - -#include - - -#include -#include -#include -#include -#include -#include -#include -#include - - -#include "core/matrix/csr_kernels.hpp" -#include "core/test/utils.hpp" -#include "core/test/utils/unsort_matrix.hpp" - - -namespace { - - -class Csr : public ::testing::Test { -protected: - using Arr = gko::Array; - using Mtx = gko::matrix::Csr<>; - using Vec = gko::matrix::Dense<>; - - Csr() -#ifdef GINKGO_FAST_TESTS - : mtx_size(152, 185), -#else - : mtx_size(532, 231), -#endif - rand_engine(42) - {} - - void SetUp() - { - ref = gko::ReferenceExecutor::create(); - hip = gko::HipExecutor::create(0, ref); - } - - void TearDown() - { - if (hip != nullptr) { - ASSERT_NO_THROW(hip->synchronize()); - } - } - - template - std::unique_ptr gen_mtx(int num_rows, int num_cols, - int min_nnz_row, int max_nnz_row) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(min_nnz_row, max_nnz_row), - std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); - } - - template - std::unique_ptr gen_mtx(int num_rows, int num_cols, - int min_nnz_row) - { - return gen_mtx(num_rows, num_cols, min_nnz_row, num_cols); - } - - void set_up_apply_data(int num_vectors = 1) - { - mtx = Mtx::create(ref); - mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 1)); - dmtx = Mtx::create(hip); - dmtx->copy_from(mtx.get()); - } - - std::shared_ptr ref; - std::shared_ptr hip; - - const gko::dim<2> mtx_size; - std::ranlux48 rand_engine; - std::unique_ptr mtx; - std::unique_ptr dmtx; -}; - - -TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) -{ - using Mtx = gko::matrix::Csr<>; - set_up_apply_data(); - - gko::span rspan{36, 98}; - gko::span cspan{26, 104}; - auto smat1 = this->mtx->create_submatrix(rspan, cspan); - auto sdmat1 = this->dmtx->create_submatrix(rspan, cspan); - auto hmat = Mtx::create(this->ref); - hmat->copy_from(sdmat1.get()); - - GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); -} - - -} // namespace diff --git a/omp/test/matrix/CMakeLists.txt b/omp/test/matrix/CMakeLists.txt index 7198ddf87ea..d2843b7e459 100644 --- a/omp/test/matrix/CMakeLists.txt +++ b/omp/test/matrix/CMakeLists.txt @@ -1,6 +1,5 @@ ginkgo_create_test(coo_kernels) ginkgo_create_test(csr_kernels) -ginkgo_create_test(csr_kernels2) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index cea56e5e91f..11bfdf1dced 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -51,6 +51,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/test/utils.hpp" #include "core/test/utils/unsort_matrix.hpp" @@ -106,6 +107,14 @@ class Csr : public ::testing::Test { return gen_mtx(num_rows, num_cols, min_nnz_row, num_cols); } + void set_up_mat_data() + { + mtx2 = Mtx::create(ref); + mtx2->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 5)); + dmtx2 = Mtx::create(omp); + dmtx2->copy_from(mtx2.get()); + } + void set_up_apply_data(int num_vectors = 1) { mtx = gen_mtx(mtx_size[0], mtx_size[1], 1); @@ -158,6 +167,7 @@ class Csr : public ::testing::Test { std::ranlux48 rand_engine; std::unique_ptr mtx; + std::unique_ptr mtx2; std::unique_ptr complex_mtx; std::unique_ptr square_mtx; std::unique_ptr expected; @@ -166,6 +176,7 @@ class Csr : public ::testing::Test { std::unique_ptr beta; std::unique_ptr dmtx; + std::unique_ptr dmtx2; std::unique_ptr complex_dmtx; std::unique_ptr square_dmtx; std::unique_ptr dresult; @@ -700,4 +711,72 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) } +TEST_F(Csr, CalculateNnzPerRowIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_mat_data(); + + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + row_nnz.fill(gko::zero()); + auto drow_nnz = gko::Array(this->omp, row_nnz); + + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::omp::csr::calculate_nonzeros_per_row_in_span( + this->omp, this->dmtx2.get(), rspan, cspan, &drow_nnz); + + GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +} + + +TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr<>; + set_up_mat_data(); + + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + row_nnz.fill(gko::zero()); + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::reference::components::prefix_sum( + this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto drow_nnz = gko::Array(this->omp, row_nnz); + auto smat1 = + Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), + std::move(row_nnz)); + auto sdmat1 = + Mtx::create(this->omp, gko::dim<2>(rspan.length(), cspan.length()), + std::move(drow_nnz)); + + + gko::kernels::reference::csr::compute_submatrix(this->ref, this->mtx2.get(), + rspan, cspan, smat1.get()); + gko::kernels::omp::csr::compute_submatrix(this->omp, this->dmtx2.get(), + rspan, cspan, sdmat1.get()); + auto hmat = Mtx::create(this->ref); + hmat->copy_from(sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); +} + + +TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) +{ + set_up_mat_data(); + + gko::span rspan{36, 98}; + gko::span cspan{26, 104}; + auto smat1 = this->mtx2->create_submatrix(rspan, cspan); + auto sdmat1 = this->dmtx2->create_submatrix(rspan, cspan); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + } // namespace diff --git a/omp/test/matrix/csr_kernels2.cpp b/omp/test/matrix/csr_kernels2.cpp deleted file mode 100644 index c3dd39f2fb9..00000000000 --- a/omp/test/matrix/csr_kernels2.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include -#include -#include -#include - - -#include - - -#include -#include -#include -#include -#include -#include -#include -#include - - -#include "core/matrix/csr_kernels.hpp" -#include "core/test/utils.hpp" -#include "core/test/utils/unsort_matrix.hpp" - - -namespace { - - -class Csr : public ::testing::Test { -protected: - using Arr = gko::Array; - using Mtx = gko::matrix::Csr<>; - using Vec = gko::matrix::Dense<>; - using ComplexVec = gko::matrix::Dense>; - using ComplexMtx = gko::matrix::Csr>; - - Csr() -#ifdef GINKGO_FAST_TESTS - : mtx_size(152, 185), -#else - : mtx_size(532, 231), -#endif - rand_engine(42) - {} - - void SetUp() - { - ref = gko::ReferenceExecutor::create(); - omp = gko::OmpExecutor::create(); - } - - void TearDown() - { - if (omp != nullptr) { - ASSERT_NO_THROW(omp->synchronize()); - } - } - - template - std::unique_ptr gen_mtx(int num_rows, int num_cols, - int min_nnz_row, int max_nnz_row) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(min_nnz_row, max_nnz_row), - std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); - } - - template - std::unique_ptr gen_mtx(int num_rows, int num_cols, - int min_nnz_row) - { - return gen_mtx(num_rows, num_cols, min_nnz_row, num_cols); - } - - void set_up_apply_data(int num_vectors = 1) - { - mtx = Mtx::create(ref); - mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 1)); - dmtx = Mtx::create(omp); - dmtx->copy_from(mtx.get()); - } - - std::shared_ptr ref; - std::shared_ptr omp; - - const gko::dim<2> mtx_size; - std::ranlux48 rand_engine; - std::unique_ptr mtx; - std::unique_ptr dmtx; -}; - - -TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) -{ - set_up_apply_data(); - - gko::span rspan{36, 98}; - gko::span cspan{26, 104}; - auto smat1 = this->mtx->create_submatrix(rspan, cspan); - auto sdmat1 = this->dmtx->create_submatrix(rspan, cspan); - - GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); -} - - -} // namespace diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt index 977146062a7..9670a5df80c 100644 --- a/reference/test/matrix/CMakeLists.txt +++ b/reference/test/matrix/CMakeLists.txt @@ -1,6 +1,5 @@ ginkgo_create_test(coo_kernels) ginkgo_create_test(csr_kernels) -ginkgo_create_test(csr_kernels2) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index 1d4a3957084..cce523eb10f 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -1624,4 +1624,81 @@ TYPED_TEST(CsrComplex, OutplaceAbsolute) } +TYPED_TEST(Csr, CanGetSubmatrix) +{ + using Vec = typename TestFixture::Vec; + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + /* this->mtx + * 1 3 2 + * 0 5 0 + */ + auto sub_mat = + this->mtx->create_submatrix(gko::span(0, 2), gko::span(0, 2)); + auto ref = + gko::initialize({I{1.0, 3.0}, I{0.0, 5.0}}, this->exec); + + GKO_ASSERT_MTX_NEAR(sub_mat.get(), ref.get(), 0.0); +} + + +TYPED_TEST(Csr, CanGetSubmatrix2) +{ + using Vec = typename TestFixture::Vec; + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto mat = gko::initialize( + { + // clang-format off + I{1.0, 3.0, 4.5, 0.0, 2.0}, // 0 + I{1.0, 0.0, 4.5, 7.5, 3.0}, // 1 + I{0.0, 3.0, 4.5, 0.0, 2.0}, // 2 + I{0.0,-1.0, 2.5, 0.0, 2.0}, // 3 + I{1.0, 0.0,-1.0, 3.5, 1.0}, // 4 + I{0.0, 1.0, 0.0, 0.0, 2.0}, // 5 + I{0.0, 3.0, 0.0, 7.5, 1.0} // 6 + // clang-format on + }, + this->exec); + ASSERT_EQ(mat->get_num_stored_elements(), 23); + { + auto sub_mat1 = mat->create_submatrix(gko::span(0, 2), gko::span(0, 2)); + auto ref1 = + gko::initialize({I{1.0, 3.0}, I{1.0, 0.0}}, this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); + } + { + auto sub_mat2 = mat->create_submatrix(gko::span(2, 4), gko::span(0, 2)); + auto ref2 = + gko::initialize({I{0.0, 3.0}, I{0.0, -1.0}}, this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat2.get(), ref2.get(), 0.0); + } + { + auto sub_mat3 = mat->create_submatrix(gko::span(0, 2), gko::span(3, 5)); + auto ref3 = + gko::initialize({I{0.0, 2.0}, I{7.5, 3.0}}, this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat3.get(), ref3.get(), 0.0); + } + { + auto sub_mat4 = mat->create_submatrix(gko::span(1, 6), gko::span(2, 4)); + /* + 4.5, 7.5 + 4.5, 0.0 + 2.5, 0.0 + -1.0, 3.5 + 0.0, 0.0 + */ + auto ref4 = gko::initialize( + {I{4.5, 7.5}, I{4.5, 0.0}, I{2.5, 0.0}, I{-1.0, 3.5}, + I{0.0, 0.0}}, + this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat4.get(), ref4.get(), 0.0); + } +} + + } // namespace diff --git a/reference/test/matrix/csr_kernels2.cpp b/reference/test/matrix/csr_kernels2.cpp deleted file mode 100644 index 1682287d857..00000000000 --- a/reference/test/matrix/csr_kernels2.cpp +++ /dev/null @@ -1,193 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include "core/matrix/csr_kernels.hpp" -#include "core/test/utils.hpp" - - -namespace { - - -template -class Csr : public ::testing::Test { -protected: - using value_type = - typename std::tuple_element<0, decltype(ValueIndexType())>::type; - using index_type = - typename std::tuple_element<1, decltype(ValueIndexType())>::type; - using Coo = gko::matrix::Coo; - using Mtx = gko::matrix::Csr; - using Sellp = gko::matrix::Sellp; - using SparsityCsr = gko::matrix::SparsityCsr; - using Ell = gko::matrix::Ell; - using Hybrid = gko::matrix::Hybrid; - using Vec = gko::matrix::Dense; - using MixedVec = gko::matrix::Dense>; - - Csr() - : exec(gko::ReferenceExecutor::create()), - mtx(Mtx::create(exec, gko::dim<2>{2, 3}, 4, - std::make_shared(2))) - { - this->create_mtx(mtx.get()); - } - - void create_mtx(Mtx* m) - { - value_type* v = m->get_values(); - index_type* c = m->get_col_idxs(); - index_type* r = m->get_row_ptrs(); - auto* s = m->get_srow(); - /* - * 1 3 2 - * 0 5 0 - */ - r[0] = 0; - r[1] = 3; - r[2] = 4; - c[0] = 0; - c[1] = 1; - c[2] = 2; - c[3] = 1; - v[0] = 1.0; - v[1] = 3.0; - v[2] = 2.0; - v[3] = 5.0; - s[0] = 0; - } - - std::shared_ptr exec; - std::unique_ptr mtx; -}; - -TYPED_TEST_SUITE(Csr, gko::test::ValueIndexTypes); - - -TYPED_TEST(Csr, CanGetSubmatrix) -{ - using Vec = typename TestFixture::Vec; - using Mtx = typename TestFixture::Mtx; - using T = typename TestFixture::value_type; - /* this->mtx - * 1 3 2 - * 0 5 0 - */ - auto sub_mat = - this->mtx->create_submatrix(gko::span(0, 2), gko::span(0, 2)); - auto ref = - gko::initialize({I{1.0, 3.0}, I{0.0, 5.0}}, this->exec); - - GKO_ASSERT_MTX_NEAR(sub_mat.get(), ref.get(), 0.0); -} - - -TYPED_TEST(Csr, CanGetSubmatrix2) -{ - using Vec = typename TestFixture::Vec; - using Mtx = typename TestFixture::Mtx; - using T = typename TestFixture::value_type; - auto mat = gko::initialize( - { - // clang-format off - I{1.0, 3.0, 4.5, 0.0, 2.0}, // 0 - I{1.0, 0.0, 4.5, 7.5, 3.0}, // 1 - I{0.0, 3.0, 4.5, 0.0, 2.0}, // 2 - I{0.0,-1.0, 2.5, 0.0, 2.0}, // 3 - I{1.0, 0.0,-1.0, 3.5, 1.0}, // 4 - I{0.0, 1.0, 0.0, 0.0, 2.0}, // 5 - I{0.0, 3.0, 0.0, 7.5, 1.0} // 6 - // clang-format on - }, - this->exec); - ASSERT_EQ(mat->get_num_stored_elements(), 23); - { - auto sub_mat1 = mat->create_submatrix(gko::span(0, 2), gko::span(0, 2)); - auto ref1 = - gko::initialize({I{1.0, 3.0}, I{1.0, 0.0}}, this->exec); - - GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0); - } - { - auto sub_mat2 = mat->create_submatrix(gko::span(2, 4), gko::span(0, 2)); - auto ref2 = - gko::initialize({I{0.0, 3.0}, I{0.0, -1.0}}, this->exec); - - GKO_EXPECT_MTX_NEAR(sub_mat2.get(), ref2.get(), 0.0); - } - { - auto sub_mat3 = mat->create_submatrix(gko::span(0, 2), gko::span(3, 5)); - auto ref3 = - gko::initialize({I{0.0, 2.0}, I{7.5, 3.0}}, this->exec); - - GKO_EXPECT_MTX_NEAR(sub_mat3.get(), ref3.get(), 0.0); - } - { - auto sub_mat4 = mat->create_submatrix(gko::span(1, 6), gko::span(2, 4)); - /* - 4.5, 7.5 - 4.5, 0.0 - 2.5, 0.0 - -1.0, 3.5 - 0.0, 0.0 - */ - auto ref4 = gko::initialize( - {I{4.5, 7.5}, I{4.5, 0.0}, I{2.5, 0.0}, I{-1.0, 3.5}, - I{0.0, 0.0}}, - this->exec); - - GKO_EXPECT_MTX_NEAR(sub_mat4.get(), ref4.get(), 0.0); - } -} - - -} // namespace From fbfcee51847f91e672e86ab3cb09209a45f4d6b4 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 3 Nov 2021 11:19:35 +0100 Subject: [PATCH 10/14] Review update. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Fritz Göbel --- common/cuda_hip/matrix/csr_kernels.hpp.inc | 1 - omp/matrix/csr_kernels.cpp | 6 +++--- reference/matrix/csr_kernels.cpp | 8 ++++---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc index 41257c40de9..6007820e840 100644 --- a/common/cuda_hip/matrix/csr_kernels.hpp.inc +++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc @@ -1065,7 +1065,6 @@ __global__ ValueType* __restrict__ result_values) { const auto tidx = thread::get_thread_id_flat(); - // for (int tidx = threadIdx.x; tidx < num_rows; tidx += blockDim.x) { if (tidx < num_rows) { size_type res_nnz = result_row_ptrs[tidx]; for (size_type nnz = source_row_ptrs[row_offset + tidx]; diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 64f10a63f04..332a37e3bd7 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -723,9 +723,9 @@ void calculate_nonzeros_per_row_in_span( const auto col_idxs = source->get_const_col_idxs(); #pragma omp parallel for for (size_type row = row_span.begin; row < row_span.end; ++row) { - for (size_type col = row_ptrs[row]; col < row_ptrs[row + 1]; ++col) { - if (col_idxs[col] >= col_span.begin && - col_idxs[col] < col_span.end) { + for (size_type nnz = row_ptrs[row]; nnz < row_ptrs[row + 1]; ++nnz) { + if (col_idxs[nnz] >= col_span.begin && + col_idxs[nnz] < col_span.end) { row_nnz->get_data()[row - row_span.begin]++; } } diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index dd5a269bc3a..f65242126c1 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -641,10 +641,10 @@ void calculate_nonzeros_per_row_in_span( { size_type res_row = 0; for (size_type row = row_span.begin; row < row_span.end; ++row) { - for (size_type col = source->get_const_row_ptrs()[row]; - col < source->get_const_row_ptrs()[row + 1]; ++col) { - if (source->get_const_col_idxs()[col] < col_span.end && - source->get_const_col_idxs()[col] >= col_span.begin) { + for (size_type nnz = source->get_const_row_ptrs()[row]; + nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { + if (source->get_const_col_idxs()[nnz] < col_span.end && + source->get_const_col_idxs()[nnz] >= col_span.begin) { row_nnz->get_data()[res_row]++; } } From 16a516592b0551ea44ef69a3d9971e79877c1e5c Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 8 Nov 2021 12:11:58 +0100 Subject: [PATCH 11/14] Review update. Co-authored-by: Tobias Ribizel --- common/cuda_hip/matrix/csr_kernels.hpp.inc | 1 + core/matrix/csr.cpp | 12 ++++----- cuda/test/matrix/csr_kernels.cpp | 23 ++++++++--------- hip/test/matrix/csr_kernels.hip.cpp | 23 ++++++++--------- include/ginkgo/core/base/lin_op.hpp | 14 ----------- include/ginkgo/core/matrix/csr.hpp | 29 ++-------------------- omp/matrix/csr_kernels.cpp | 1 + omp/test/matrix/csr_kernels.cpp | 15 ++++++----- reference/matrix/csr_kernels.cpp | 1 + 9 files changed, 42 insertions(+), 77 deletions(-) diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc index 6007820e840..fb63bf59e03 100644 --- a/common/cuda_hip/matrix/csr_kernels.hpp.inc +++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc @@ -1090,6 +1090,7 @@ __global__ { const auto tidx = thread::get_thread_id_flat() + row_span.begin; if (tidx < row_span.end) { + nnz_per_row[tidx - row_span.begin] = zero(); for (size_type col = row_ptrs[tidx]; col < row_ptrs[tidx + 1]; ++col) { if (col_idxs[col] >= col_span.begin && col_idxs[col] < col_span.end) { diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index 0e015baabcb..ae683e11813 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -50,7 +50,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/absolute_array.hpp" #include "core/components/fill_array.hpp" #include "core/components/prefix_sum.hpp" -#include "core/components/reduce_array.hpp" #include "core/matrix/csr_kernels.hpp" @@ -90,7 +89,6 @@ GKO_REGISTER_OPERATION(is_sorted_by_column_index, csr::is_sorted_by_column_index); GKO_REGISTER_OPERATION(extract_diagonal, csr::extract_diagonal); GKO_REGISTER_OPERATION(fill_array, components::fill_array); -GKO_REGISTER_OPERATION(reduce_add_array, components::reduce_add_array); GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum); GKO_REGISTER_OPERATION(inplace_absolute_array, components::inplace_absolute_array); @@ -552,13 +550,15 @@ Csr::create_submatrix(const gko::span& row_span, auto exec = this->get_executor(); auto sub_mat_size = gko::dim<2>(row_span.length(), column_span.length()); Array row_ptrs(exec, row_span.length() + 1); - exec->run(csr::make_fill_array( - row_ptrs.get_data(), row_ptrs.get_num_elems(), zero())); exec->run(csr::make_calculate_nonzeros_per_row_in_span( this, row_span, column_span, &row_ptrs)); exec->run(csr::make_prefix_sum(row_ptrs.get_data(), row_span.length() + 1)); - auto sub_mat = Mat::create(exec, sub_mat_size, std::move(row_ptrs), - this->get_strategy()); + auto num_nnz = + exec->copy_val_to_host(row_ptrs.get_data() + sub_mat_size[0]); + auto sub_mat = Mat::create(exec, sub_mat_size, + std::move(Array(exec, num_nnz)), + std::move(Array(exec, num_nnz)), + std::move(row_ptrs), this->get_strategy()); exec->run(csr::make_compute_submatrix(this, row_span, column_span, sub_mat.get())); sub_mat->make_srow(); diff --git a/cuda/test/matrix/csr_kernels.cpp b/cuda/test/matrix/csr_kernels.cpp index a6f6843b71f..26ee15a0ae0 100644 --- a/cuda/test/matrix/csr_kernels.cpp +++ b/cuda/test/matrix/csr_kernels.cpp @@ -938,16 +938,14 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) } -TEST_F(Csr, CalculateNnzPerRowIsEquivalentToRef) +TEST_F(Csr, CalculateNnzPerRowInSpanIsEquivalentToRef) { using Mtx = gko::matrix::Csr<>; set_up_apply_data(); - gko::span rspan{7, 51}; gko::span cspan{22, 88}; auto size = this->mtx2->get_size(); auto row_nnz = gko::Array(this->ref, rspan.length() + 1); - row_nnz.fill(gko::zero()); auto drow_nnz = gko::Array(this->cuda, row_nnz); gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( @@ -962,23 +960,28 @@ TEST_F(Csr, CalculateNnzPerRowIsEquivalentToRef) TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) { using Mtx = gko::matrix::Csr<>; + using IndexType = int; + using ValueType = double; set_up_apply_data(); - gko::span rspan{7, 51}; gko::span cspan{22, 88}; auto size = this->mtx2->get_size(); auto row_nnz = gko::Array(this->ref, rspan.length() + 1); - row_nnz.fill(gko::zero()); gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); gko::kernels::reference::components::prefix_sum( this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto num_nnz = row_nnz.get_data()[rspan.length()]; auto drow_nnz = gko::Array(this->cuda, row_nnz); auto smat1 = Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->ref, num_nnz)), + std::move(gko::Array(this->ref, num_nnz)), std::move(row_nnz)); auto sdmat1 = Mtx::create(this->cuda, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->cuda, num_nnz)), + std::move(gko::Array(this->cuda, num_nnz)), std::move(drow_nnz)); @@ -986,10 +989,8 @@ TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) rspan, cspan, smat1.get()); gko::kernels::cuda::csr::compute_submatrix(this->cuda, this->dmtx2.get(), rspan, cspan, sdmat1.get()); - auto hmat = Mtx::create(this->ref); - hmat->copy_from(sdmat1.get()); - GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); } @@ -997,15 +998,13 @@ TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) { using Mtx = gko::matrix::Csr<>; set_up_apply_data(); - gko::span rspan{47, 81}; gko::span cspan{2, 31}; + auto smat1 = this->mtx2->create_submatrix(rspan, cspan); auto sdmat1 = this->dmtx2->create_submatrix(rspan, cspan); - auto hmat = Mtx::create(this->ref); - hmat->copy_from(sdmat1.get()); - GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); } diff --git a/hip/test/matrix/csr_kernels.hip.cpp b/hip/test/matrix/csr_kernels.hip.cpp index 332f87e3837..311aa01cafd 100644 --- a/hip/test/matrix/csr_kernels.hip.cpp +++ b/hip/test/matrix/csr_kernels.hip.cpp @@ -948,16 +948,14 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) } -TEST_F(Csr, CalculateNnzPerRowIsEquivalentToRef) +TEST_F(Csr, CalculateNnzPerRowInSpanIsEquivalentToRef) { using Mtx = gko::matrix::Csr<>; set_up_apply_data(); - gko::span rspan{7, 51}; gko::span cspan{22, 88}; auto size = this->mtx2->get_size(); auto row_nnz = gko::Array(this->ref, rspan.length() + 1); - row_nnz.fill(gko::zero()); auto drow_nnz = gko::Array(this->hip, row_nnz); gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( @@ -972,23 +970,28 @@ TEST_F(Csr, CalculateNnzPerRowIsEquivalentToRef) TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) { using Mtx = gko::matrix::Csr<>; + using IndexType = int; + using ValueType = double; set_up_apply_data(); - gko::span rspan{7, 51}; gko::span cspan{22, 88}; auto size = this->mtx2->get_size(); auto row_nnz = gko::Array(this->ref, rspan.length() + 1); - row_nnz.fill(gko::zero()); gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); gko::kernels::reference::components::prefix_sum( this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto num_nnz = row_nnz.get_data()[rspan.length()]; auto drow_nnz = gko::Array(this->hip, row_nnz); auto smat1 = Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->ref, num_nnz)), + std::move(gko::Array(this->ref, num_nnz)), std::move(row_nnz)); auto sdmat1 = Mtx::create(this->hip, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->hip, num_nnz)), + std::move(gko::Array(this->hip, num_nnz)), std::move(drow_nnz)); @@ -996,10 +999,8 @@ TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) rspan, cspan, smat1.get()); gko::kernels::hip::csr::compute_submatrix(this->hip, this->dmtx2.get(), rspan, cspan, sdmat1.get()); - auto hmat = Mtx::create(this->ref); - hmat->copy_from(sdmat1.get()); - GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); } @@ -1007,15 +1008,13 @@ TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) { using Mtx = gko::matrix::Csr<>; set_up_apply_data(); - gko::span rspan{47, 81}; gko::span cspan{2, 31}; + auto smat1 = this->mtx2->create_submatrix(rspan, cspan); auto sdmat1 = this->dmtx2->create_submatrix(rspan, cspan); - auto hmat = Mtx::create(this->ref); - hmat->copy_from(sdmat1.get()); - GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); } diff --git a/include/ginkgo/core/base/lin_op.hpp b/include/ginkgo/core/base/lin_op.hpp index a43580c8d81..540fd6ebad3 100644 --- a/include/ginkgo/core/base/lin_op.hpp +++ b/include/ginkgo/core/base/lin_op.hpp @@ -654,20 +654,6 @@ class Preconditionable { }; -/** - * A submatrix of the LinOp implementing this interface can be extracted. - * The row and column spans must be in range of the size of the matrix. - * - * @ingroup LinOp - */ -template -class SubMatrixExtractable { -public: - virtual std::unique_ptr create_submatrix( - const gko::span& row_span, const gko::span& column_span) const = 0; -}; - - /** * The diagonal of a LinOp can be extracted. It will be implemented by * DiagonalExtractable, so the class does not need to implement it. diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index cfd8ab8dbe2..94f7384438a 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -127,7 +127,6 @@ class Csr : public EnableLinOp>, public ConvertibleTo>, public ConvertibleTo>, public DiagonalExtractable, - public SubMatrixExtractable>, public ReadableFromMatrixData, public WritableToMatrixData, public Transposable, @@ -765,9 +764,9 @@ class Csr : public EnableLinOp>, std::unique_ptr> extract_diagonal() const override; std::unique_ptr> create_submatrix( - const gko::span& row_span, const gko::span& column_span) const override; + const gko::span& row_span, const gko::span& column_span) const; - std::unique_ptr compute_absolute() const override; + std::unique_ptr compute_absolute() const; void compute_absolute_inplace() override; @@ -985,30 +984,6 @@ class Csr : public EnableLinOp>, strategy_(strategy->copy()) {} - /** - * Creates a CSR matrix from already allocated (and initialized) row - * pointer array. - * - * @param exec Executor associated to the matrix - * @param size size of the matrix - * @param row_ptrs array of row pointers - */ - explicit Csr( - std::shared_ptr exec, const dim<2>& size, - Array&& row_ptrs, - std::shared_ptr strategy = std::make_shared()) - : EnableLinOp(exec, size), - row_ptrs_{exec, std::move(row_ptrs)}, - srow_(exec), - strategy_(strategy->copy()) - { - GKO_ASSERT(row_ptrs_.get_num_elems() == size[0] + 1); - auto num_nnz = exec->copy_val_to_host(row_ptrs_.get_data() + size[0]); - values_ = Array(exec, num_nnz); - col_idxs_ = Array(exec, num_nnz); - this->make_srow(); - } - /** * Creates a CSR matrix from already allocated (and initialized) row * pointer, column index and value arrays. diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 332a37e3bd7..19f16c8b543 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -723,6 +723,7 @@ void calculate_nonzeros_per_row_in_span( const auto col_idxs = source->get_const_col_idxs(); #pragma omp parallel for for (size_type row = row_span.begin; row < row_span.end; ++row) { + row_nnz->get_data()[row - row_span.begin] = zero(); for (size_type nnz = row_ptrs[row]; nnz < row_ptrs[row + 1]; ++nnz) { if (col_idxs[nnz] >= col_span.begin && col_idxs[nnz] < col_span.end) { diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index 11bfdf1dced..d6712cfa7d6 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -711,11 +711,10 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) } -TEST_F(Csr, CalculateNnzPerRowIsEquivalentToRef) +TEST_F(Csr, CalculateNnzPerRowInSpanIsEquivalentToRef) { using Mtx = gko::matrix::Csr<>; set_up_mat_data(); - gko::span rspan{7, 51}; gko::span cspan{22, 88}; auto size = this->mtx2->get_size(); @@ -735,8 +734,9 @@ TEST_F(Csr, CalculateNnzPerRowIsEquivalentToRef) TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) { using Mtx = gko::matrix::Csr<>; + using IndexType = int; + using ValueType = double; set_up_mat_data(); - gko::span rspan{7, 51}; gko::span cspan{22, 88}; auto size = this->mtx2->get_size(); @@ -746,12 +746,17 @@ TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); gko::kernels::reference::components::prefix_sum( this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto num_nnz = row_nnz.get_data()[rspan.length()]; auto drow_nnz = gko::Array(this->omp, row_nnz); auto smat1 = Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->ref, num_nnz)), + std::move(gko::Array(this->ref, num_nnz)), std::move(row_nnz)); auto sdmat1 = Mtx::create(this->omp, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->omp, num_nnz)), + std::move(gko::Array(this->omp, num_nnz)), std::move(drow_nnz)); @@ -759,10 +764,8 @@ TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) rspan, cspan, smat1.get()); gko::kernels::omp::csr::compute_submatrix(this->omp, this->dmtx2.get(), rspan, cspan, sdmat1.get()); - auto hmat = Mtx::create(this->ref); - hmat->copy_from(sdmat1.get()); - GKO_ASSERT_MTX_NEAR(hmat, smat1, 0.0); + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); } diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index f65242126c1..a8dbefa3c11 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -641,6 +641,7 @@ void calculate_nonzeros_per_row_in_span( { size_type res_row = 0; for (size_type row = row_span.begin; row < row_span.end; ++row) { + row_nnz->get_data()[res_row] = zero(); for (size_type nnz = source->get_const_row_ptrs()[row]; nnz < source->get_const_row_ptrs()[row + 1]; ++nnz) { if (source->get_const_col_idxs()[nnz] < col_span.end && From 77087ad2a43a7f1e1a36ac741a198be76cca66b8 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 9 Nov 2021 18:43:58 +0100 Subject: [PATCH 12/14] Add dpcpp kernels + tests. --- dpcpp/matrix/csr_kernels.dp.cpp | 88 ++++++++++++++++++++++++++++++- dpcpp/test/matrix/csr_kernels.cpp | 82 ++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 2 deletions(-) diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index 237545e2110..15fa13c488c 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -1625,11 +1625,79 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL); +namespace kernel { + + +template +void calc_nnz_in_span(const span row_span, const span col_span, + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + IndexType* __restrict__ nnz_per_row, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1) + row_span.begin; + if (tidx < row_span.end) { + nnz_per_row[tidx - row_span.begin] = zero(); + for (size_type col = row_ptrs[tidx]; col < row_ptrs[tidx + 1]; ++col) { + if (col_idxs[col] >= col_span.begin && + col_idxs[col] < col_span.end) { + nnz_per_row[tidx - row_span.begin]++; + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(calc_nnz_in_span, calc_nnz_in_span); + + +template +void compute_submat(size_type num_rows, size_type num_cols, size_type num_nnz, + size_type row_offset, size_type col_offset, + const IndexType* __restrict__ src_row_ptrs, + const IndexType* __restrict__ src_col_idxs, + const ValueType* __restrict__ src_values, + const IndexType* __restrict__ res_row_ptrs, + IndexType* __restrict__ res_col_idxs, + ValueType* __restrict__ res_values, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < num_rows) { + size_type res_nnz = res_row_ptrs[tidx]; + for (size_type nnz = src_row_ptrs[row_offset + tidx]; + nnz < src_row_ptrs[row_offset + tidx + 1]; ++nnz) { + if ((src_col_idxs[nnz] < (col_offset + num_cols) && + src_col_idxs[nnz] >= col_offset)) { + res_col_idxs[res_nnz] = src_col_idxs[nnz] - col_offset; + res_values[res_nnz] = src_values[nnz]; + res_nnz++; + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(compute_submat, compute_submat); + + +} // namespace kernel + + template void calculate_nonzeros_per_row_in_span( std::shared_ptr exec, const matrix::Csr* source, const span& row_span, - const span& col_span, Array* row_nnz) GKO_NOT_IMPLEMENTED; + const span& col_span, Array* row_nnz) +{ + const auto num_rows = source->get_size()[0]; + auto row_ptrs = source->get_const_row_ptrs(); + auto col_idxs = source->get_const_col_idxs(); + auto grid_dim = ceildiv(row_span.length(), default_block_size); + auto block_dim = default_block_size; + + kernel::calc_nnz_in_span(grid_dim, block_dim, 0, exec->get_queue(), + row_span, col_span, row_ptrs, col_idxs, + row_nnz->get_data()); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); @@ -1640,7 +1708,23 @@ void compute_submatrix(std::shared_ptr exec, const matrix::Csr* source, gko::span row_span, gko::span col_span, matrix::Csr* result) - GKO_NOT_IMPLEMENTED; +{ + const auto row_offset = row_span.begin; + const auto col_offset = col_span.begin; + const auto num_rows = result->get_size()[0]; + const auto num_cols = result->get_size()[1]; + const auto row_ptrs = source->get_const_row_ptrs(); + + const auto num_nnz = source->get_num_stored_elements(); + auto grid_dim = ceildiv(num_rows, default_block_size); + auto block_dim = default_block_size; + kernel::compute_submat( + grid_dim, block_dim, 0, exec->get_queue(), num_rows, num_cols, num_nnz, + row_offset, col_offset, source->get_const_row_ptrs(), + source->get_const_col_idxs(), source->get_const_values(), + result->get_const_row_ptrs(), result->get_col_idxs(), + result->get_values()); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); diff --git a/dpcpp/test/matrix/csr_kernels.cpp b/dpcpp/test/matrix/csr_kernels.cpp index b746d752db6..0ddcf4d84ca 100644 --- a/dpcpp/test/matrix/csr_kernels.cpp +++ b/dpcpp/test/matrix/csr_kernels.cpp @@ -48,6 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/prefix_sum.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/test/utils.hpp" #include "core/test/utils/unsort_matrix.hpp" @@ -102,6 +103,14 @@ class Csr : public ::testing::Test { std::normal_distribution(-1.0, 1.0), rand_engine, ref); } + void set_up_apply_data() + { + mtx2 = Mtx::create(ref); + mtx2->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 5)); + dmtx2 = Mtx::create(dpcpp); + dmtx2->copy_from(mtx2.get()); + } + void set_up_apply_data(std::shared_ptr strategy, int num_vectors = 1) { @@ -156,6 +165,7 @@ class Csr : public ::testing::Test { std::ranlux48 rand_engine; std::unique_ptr mtx; + std::unique_ptr mtx2; std::unique_ptr complex_mtx; std::unique_ptr square_mtx; std::unique_ptr expected; @@ -164,6 +174,7 @@ class Csr : public ::testing::Test { std::unique_ptr beta; std::unique_ptr dmtx; + std::unique_ptr dmtx2; std::unique_ptr complex_dmtx; std::unique_ptr square_dmtx; std::unique_ptr dresult; @@ -930,4 +941,75 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) } +TEST_F(Csr, CalculateNnzPerRowInSpanIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr; + set_up_apply_data(); + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + auto drow_nnz = gko::Array(this->dpcpp, row_nnz); + + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::dpcpp::csr::calculate_nonzeros_per_row_in_span( + this->dpcpp, this->dmtx2.get(), rspan, cspan, &drow_nnz); + + GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +} + + +TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr; + using IndexType = int; + using ValueType = vtype; + set_up_apply_data(); + gko::span rspan{7, 51}; + gko::span cspan{22, 88}; + auto size = this->mtx2->get_size(); + auto row_nnz = gko::Array(this->ref, rspan.length() + 1); + row_nnz.fill(gko::zero()); + gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span( + this->ref, this->mtx2.get(), rspan, cspan, &row_nnz); + gko::kernels::reference::components::prefix_sum( + this->ref, row_nnz.get_data(), row_nnz.get_num_elems()); + auto num_nnz = row_nnz.get_data()[rspan.length()]; + auto drow_nnz = gko::Array(this->dpcpp, row_nnz); + auto smat1 = + Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->ref, num_nnz)), + std::move(gko::Array(this->ref, num_nnz)), + std::move(row_nnz)); + auto sdmat1 = + Mtx::create(this->dpcpp, gko::dim<2>(rspan.length(), cspan.length()), + std::move(gko::Array(this->dpcpp, num_nnz)), + std::move(gko::Array(this->dpcpp, num_nnz)), + std::move(drow_nnz)); + + + gko::kernels::reference::csr::compute_submatrix(this->ref, this->mtx2.get(), + rspan, cspan, smat1.get()); + gko::kernels::dpcpp::csr::compute_submatrix(this->dpcpp, this->dmtx2.get(), + rspan, cspan, sdmat1.get()); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + +TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) +{ + using Mtx = gko::matrix::Csr; + set_up_apply_data(); + gko::span rspan{47, 81}; + gko::span cspan{2, 31}; + + auto smat1 = this->mtx2->create_submatrix(rspan, cspan); + auto sdmat1 = this->dmtx2->create_submatrix(rspan, cspan); + + GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0); +} + + } // namespace From 57cc8d86a7c0e1ac73d8b243554dd1e94edbba3e Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 9 Nov 2021 22:45:05 +0100 Subject: [PATCH 13/14] Some more edge case tests --- dpcpp/matrix/csr_kernels.dp.cpp | 24 +++++++++-------- reference/test/matrix/csr_kernels.cpp | 38 ++++++++++++++++++++------- 2 files changed, 42 insertions(+), 20 deletions(-) diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index 15fa13c488c..e8df9d6c675 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -1651,15 +1651,16 @@ GKO_ENABLE_DEFAULT_HOST(calc_nnz_in_span, calc_nnz_in_span); template -void compute_submat(size_type num_rows, size_type num_cols, size_type num_nnz, - size_type row_offset, size_type col_offset, - const IndexType* __restrict__ src_row_ptrs, - const IndexType* __restrict__ src_col_idxs, - const ValueType* __restrict__ src_values, - const IndexType* __restrict__ res_row_ptrs, - IndexType* __restrict__ res_col_idxs, - ValueType* __restrict__ res_values, - sycl::nd_item<3> item_ct1) +void compute_submatrix_idxs_and_vals(size_type num_rows, size_type num_cols, + size_type num_nnz, size_type row_offset, + size_type col_offset, + const IndexType* __restrict__ src_row_ptrs, + const IndexType* __restrict__ src_col_idxs, + const ValueType* __restrict__ src_values, + const IndexType* __restrict__ res_row_ptrs, + IndexType* __restrict__ res_col_idxs, + ValueType* __restrict__ res_values, + sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); if (tidx < num_rows) { @@ -1676,7 +1677,8 @@ void compute_submat(size_type num_rows, size_type num_cols, size_type num_nnz, } } -GKO_ENABLE_DEFAULT_HOST(compute_submat, compute_submat); +GKO_ENABLE_DEFAULT_HOST(compute_submatrix_idxs_and_vals, + compute_submatrix_idxs_and_vals); } // namespace kernel @@ -1718,7 +1720,7 @@ void compute_submatrix(std::shared_ptr exec, const auto num_nnz = source->get_num_stored_elements(); auto grid_dim = ceildiv(num_rows, default_block_size); auto block_dim = default_block_size; - kernel::compute_submat( + kernel::compute_submatrix_idxs_and_vals( grid_dim, block_dim, 0, exec->get_queue(), num_rows, num_cols, num_nnz, row_offset, col_offset, source->get_const_row_ptrs(), source->get_const_col_idxs(), source->get_const_values(), diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index cce523eb10f..216a4048e64 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -1649,15 +1649,13 @@ TYPED_TEST(Csr, CanGetSubmatrix2) using T = typename TestFixture::value_type; auto mat = gko::initialize( { - // clang-format off - I{1.0, 3.0, 4.5, 0.0, 2.0}, // 0 - I{1.0, 0.0, 4.5, 7.5, 3.0}, // 1 - I{0.0, 3.0, 4.5, 0.0, 2.0}, // 2 - I{0.0,-1.0, 2.5, 0.0, 2.0}, // 3 - I{1.0, 0.0,-1.0, 3.5, 1.0}, // 4 - I{0.0, 1.0, 0.0, 0.0, 2.0}, // 5 - I{0.0, 3.0, 0.0, 7.5, 1.0} // 6 - // clang-format on + I{1.0, 3.0, 4.5, 0.0, 2.0}, // 0 + I{1.0, 0.0, 4.5, 7.5, 3.0}, // 1 + I{0.0, 3.0, 4.5, 0.0, 2.0}, // 2 + I{0.0, -1.0, 2.5, 0.0, 2.0}, // 3 + I{1.0, 0.0, -1.0, 3.5, 1.0}, // 4 + I{0.0, 1.0, 0.0, 0.0, 2.0}, // 5 + I{0.0, 3.0, 0.0, 7.5, 1.0} // 6 }, this->exec); ASSERT_EQ(mat->get_num_stored_elements(), 23); @@ -1698,6 +1696,28 @@ TYPED_TEST(Csr, CanGetSubmatrix2) GKO_EXPECT_MTX_NEAR(sub_mat4.get(), ref4.get(), 0.0); } + { + auto sub_mat5 = mat->create_submatrix(gko::span(0, 7), gko::span(0, 5)); + auto ref5 = gko::initialize( + { + I{1.0, 3.0, 4.5, 0.0, 2.0}, // 0 + I{1.0, 0.0, 4.5, 7.5, 3.0}, // 1 + I{0.0, 3.0, 4.5, 0.0, 2.0}, // 2 + I{0.0, -1.0, 2.5, 0.0, 2.0}, // 3 + I{1.0, 0.0, -1.0, 3.5, 1.0}, // 4 + I{0.0, 1.0, 0.0, 0.0, 2.0}, // 5 + I{0.0, 3.0, 0.0, 7.5, 1.0} // 6 + }, + this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat5.get(), ref5.get(), 0.0); + } + { + auto sub_mat7 = mat->create_submatrix(gko::span(0, 1), gko::span(0, 1)); + auto ref7 = gko::initialize({I{1.0}}, this->exec); + + GKO_EXPECT_MTX_NEAR(sub_mat7.get(), ref7.get(), 0.0); + } } From 7494456e80e2ea00bfd80ef4305e1914552e6485 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 11 Nov 2021 17:52:06 +0100 Subject: [PATCH 14/14] Review update. Co-authored-by: Tobias Ribizel --- common/cuda_hip/matrix/csr_kernels.hpp.inc | 31 +++++++++++----------- include/ginkgo/core/matrix/csr.hpp | 2 +- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc index fb63bf59e03..853a694ab9c 100644 --- a/common/cuda_hip/matrix/csr_kernels.hpp.inc +++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc @@ -1064,14 +1064,15 @@ __global__ IndexType* __restrict__ result_col_idxs, ValueType* __restrict__ result_values) { - const auto tidx = thread::get_thread_id_flat(); - if (tidx < num_rows) { - size_type res_nnz = result_row_ptrs[tidx]; - for (size_type nnz = source_row_ptrs[row_offset + tidx]; - nnz < source_row_ptrs[row_offset + tidx + 1]; ++nnz) { - if ((source_col_idxs[nnz] < (col_offset + num_cols) && - source_col_idxs[nnz] >= col_offset)) { - result_col_idxs[res_nnz] = source_col_idxs[nnz] - col_offset; + const auto res_row = thread::get_thread_id_flat(); + if (res_row < num_rows) { + const auto src_row = res_row + row_offset; + auto res_nnz = result_row_ptrs[res_row]; + for (auto nnz = source_row_ptrs[src_row]; + nnz < source_row_ptrs[src_row + 1]; ++nnz) { + const auto res_col = source_col_idxs[nnz] - col_offset; + if (res_col < num_cols && res_col >= 0) { + result_col_idxs[res_nnz] = res_col; result_values[res_nnz] = source_values[nnz]; res_nnz++; } @@ -1088,15 +1089,15 @@ __global__ const IndexType* __restrict__ col_idxs, IndexType* __restrict__ nnz_per_row) { - const auto tidx = thread::get_thread_id_flat() + row_span.begin; - if (tidx < row_span.end) { - nnz_per_row[tidx - row_span.begin] = zero(); - for (size_type col = row_ptrs[tidx]; col < row_ptrs[tidx + 1]; ++col) { - if (col_idxs[col] >= col_span.begin && - col_idxs[col] < col_span.end) { - nnz_per_row[tidx - row_span.begin]++; + const auto src_row = thread::get_thread_id_flat() + row_span.begin; + if (src_row < row_span.end) { + IndexType nnz{}; + for (auto i = row_ptrs[src_row]; i < row_ptrs[src_row + 1]; ++i) { + if (col_idxs[i] >= col_span.begin && col_idxs[i] < col_span.end) { + nnz++; } } + nnz_per_row[src_row - row_span.begin] = nnz; } } diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index 94f7384438a..02969dcc94d 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -766,7 +766,7 @@ class Csr : public EnableLinOp>, std::unique_ptr> create_submatrix( const gko::span& row_span, const gko::span& column_span) const; - std::unique_ptr compute_absolute() const; + std::unique_ptr compute_absolute() const override; void compute_absolute_inplace() override;