diff --git a/contributors.txt b/contributors.txt index fde5b54d2c3..a7a558499e2 100644 --- a/contributors.txt +++ b/contributors.txt @@ -14,6 +14,7 @@ Grützmacher Thomas Karlsruhe Institute of Technology Heroux Mike Sandia National Laboratories Hoemmen Mark Sandia National Laboratories Holeksa Claudius Karlsruhe Institute of Technology +Kashi Aditya Karlsruhe Institute of Technology Maier Matthias Texas A&M University Nayak Pratik Karlsruhe Institute of Technology Olenik Gregor HPSim diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 754727381da..7c67744d69a 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -25,6 +25,7 @@ target_sources(ginkgo matrix/dense.cpp matrix/diagonal.cpp matrix/ell.cpp + matrix/fbcsr.cpp matrix/hybrid.cpp matrix/identity.cpp matrix/permutation.cpp diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index eceae46dd8b..81d74a33f88 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -49,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/dense_kernels.hpp" #include "core/matrix/diagonal_kernels.hpp" #include "core/matrix/ell_kernels.hpp" +#include "core/matrix/fbcsr_kernels.hpp" #include "core/matrix/hybrid_kernels.hpp" #include "core/matrix/sellp_kernels.hpp" #include "core/matrix/sparsity_csr_kernels.hpp" @@ -803,6 +804,78 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); } // namespace csr +namespace fbcsr { + + +template +GKO_DECLARE_FBCSR_SPMV_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + +template +GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + +template +GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + +template +GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + +template +GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + +template +GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + +template +GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + +template +GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + +template +GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + +template +GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + +template +GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr + + namespace coo { diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp new file mode 100644 index 00000000000..5019a047923 --- /dev/null +++ b/core/matrix/fbcsr.cpp @@ -0,0 +1,510 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "accessor/block_col_major.hpp" +#include "accessor/range.hpp" +#include "core/components/absolute_array.hpp" +#include "core/components/fill_array.hpp" +#include "core/matrix/fbcsr_kernels.hpp" + + +namespace gko { +namespace matrix { +namespace fbcsr { + + +GKO_REGISTER_OPERATION(spmv, fbcsr::spmv); +GKO_REGISTER_OPERATION(advanced_spmv, fbcsr::advanced_spmv); +GKO_REGISTER_OPERATION(convert_to_csr, fbcsr::convert_to_csr); +GKO_REGISTER_OPERATION(convert_to_dense, fbcsr::convert_to_dense); +GKO_REGISTER_OPERATION(transpose, fbcsr::transpose); +GKO_REGISTER_OPERATION(conj_transpose, fbcsr::conj_transpose); +GKO_REGISTER_OPERATION(calculate_max_nnz_per_row, + fbcsr::calculate_max_nnz_per_row); +GKO_REGISTER_OPERATION(calculate_nonzeros_per_row, + fbcsr::calculate_nonzeros_per_row); +GKO_REGISTER_OPERATION(is_sorted_by_column_index, + fbcsr::is_sorted_by_column_index); +GKO_REGISTER_OPERATION(sort_by_column_index, fbcsr::sort_by_column_index); +GKO_REGISTER_OPERATION(extract_diagonal, fbcsr::extract_diagonal); +GKO_REGISTER_OPERATION(fill_array, components::fill_array); +GKO_REGISTER_OPERATION(inplace_absolute_array, + components::inplace_absolute_array); +GKO_REGISTER_OPERATION(outplace_absolute_array, + components::outplace_absolute_array); + + +} // namespace fbcsr + + +namespace detail { + + +/** + * @internal + * A lightweight dynamic block type on the host + * + * Currently used only while reading a FBCSR matrix from matrix_data. + * + * @tparam ValueType The numeric type of entries of the block + */ +template +class DenseBlock final { +public: + using value_type = ValueType; + + DenseBlock() = default; + + DenseBlock(const int num_rows, const int num_cols) + : nrows_{num_rows}, ncols_{num_cols}, vals_(num_rows * num_cols) + {} + + value_type &at(const int row, const int col) + { + return vals_[row + col * nrows_]; + } + + const value_type &at(const int row, const int col) const + { + return vals_[row + col * nrows_]; + } + + value_type &operator()(const int row, const int col) + { + return at(row, col); + } + + const value_type &operator()(const int row, const int col) const + { + return at(row, col); + } + + int size() const { return nrows_ * ncols_; } + + void resize(const int nrows, const int ncols) + { + vals_.resize(nrows * ncols); + nrows_ = nrows; + ncols_ = ncols; + } + + void zero() + { + std::fill(vals_.begin(), vals_.end(), static_cast(0)); + } + +private: + int nrows_ = 0; + int ncols_ = 0; + std::vector vals_; +}; + + +} // namespace detail + + +template +void Fbcsr::apply_impl(const LinOp *const b, + LinOp *const x) const +{ + using Dense = Dense; + if (auto b_fbcsr = dynamic_cast *>(b)) { + // if b is a FBCSR matrix, we need an SpGeMM + GKO_NOT_SUPPORTED(b_fbcsr); + } else { + // otherwise we assume that b is dense and compute a SpMV/SpMM + this->get_executor()->run( + fbcsr::make_spmv(this, as(b), as(x))); + } +} + + +template +void Fbcsr::apply_impl(const LinOp *const alpha, + const LinOp *const b, + const LinOp *const beta, + LinOp *const x) const +{ + using Dense = Dense; + if (auto b_fbcsr = dynamic_cast *>(b)) { + // if b is a FBCSR matrix, we need an SpGeMM + GKO_NOT_SUPPORTED(b_fbcsr); + } else if (auto b_ident = dynamic_cast *>(b)) { + // if b is an identity matrix, we need an SpGEAM + GKO_NOT_SUPPORTED(b_ident); + } else { + // otherwise we assume that b is dense and compute a SpMV/SpMM + this->get_executor()->run( + fbcsr::make_advanced_spmv(as(alpha), this, as(b), + as(beta), as(x))); + } +} + + +template +void Fbcsr::convert_to( + Fbcsr, IndexType> *const result) const +{ + result->values_ = this->values_; + result->col_idxs_ = this->col_idxs_; + result->row_ptrs_ = this->row_ptrs_; + result->set_size(this->get_size()); + result->bs_ = this->bs_; + result->nbcols_ = this->nbcols_; +} + + +template +void Fbcsr::move_to( + Fbcsr, IndexType> *const result) +{ + this->convert_to(result); +} + + +template +void Fbcsr::convert_to( + Dense *const result) const +{ + auto exec = this->get_executor(); + auto tmp = Dense::create(exec, this->get_size()); + exec->run(fbcsr::make_convert_to_dense(this, tmp.get())); + tmp->move_to(result); +} + + +template +void Fbcsr::move_to(Dense *const result) +{ + this->convert_to(result); +} + + +template +void Fbcsr::convert_to( + Csr *const result) const +{ + auto exec = this->get_executor(); + auto tmp = Csr::create( + exec, this->get_size(), this->get_num_stored_elements(), + result->get_strategy()); + exec->run(fbcsr::make_convert_to_csr(this, tmp.get())); + tmp->move_to(result); +} + + +template +void Fbcsr::move_to( + Csr *const result) +{ + this->convert_to(result); +} + + +template +void Fbcsr::convert_to( + SparsityCsr *const result) const +{ + auto exec = this->get_executor(); + auto tmp = SparsityCsr::create( + exec, + gko::dim<2>{static_cast(this->get_num_block_rows()), + static_cast(this->get_num_block_cols())}, + this->get_num_stored_blocks()); + + tmp->col_idxs_ = this->col_idxs_; + tmp->row_ptrs_ = this->row_ptrs_; + tmp->value_ = Array(exec, {one()}); + tmp->move_to(result); +} + + +template +void Fbcsr::move_to( + SparsityCsr *const result) +{ + this->convert_to(result); +} + + +/* + * Currently, this implementation is sequential and has complexity + * O(nnz log(nnz)). + * @note Can this be changed to a parallel O(nnz) implementation? + */ +template +void Fbcsr::read(const mat_data &data) +{ + GKO_ENSURE_IN_BOUNDS(data.nonzeros.size(), + std::numeric_limits::max()); + + const auto nnz = static_cast(data.nonzeros.size()); + const int bs = this->bs_; + + using Block_t = detail::DenseBlock; + + struct FbEntry { + index_type block_row; + index_type block_column; + }; + + struct FbLess { + bool operator()(const FbEntry &a, const FbEntry &b) const + { + if (a.block_row != b.block_row) + return a.block_row < b.block_row; + else + return a.block_column < b.block_column; + } + }; + + auto create_block_map = [nnz, bs](const mat_data &mdata) { + std::map blocks; + for (index_type inz = 0; inz < nnz; inz++) { + const index_type row = mdata.nonzeros[inz].row; + const index_type col = mdata.nonzeros[inz].column; + const value_type val = mdata.nonzeros[inz].value; + + const auto localrow = static_cast(row % bs); + const auto localcol = static_cast(col % bs); + const index_type blockrow = row / bs; + const index_type blockcol = col / bs; + + Block_t &nnzblk = blocks[{blockrow, blockcol}]; + if (nnzblk.size() == 0) { + nnzblk.resize(bs, bs); + nnzblk.zero(); + nnzblk(localrow, localcol) = val; + } else { + // If this does not happen, we re-visited a nonzero + assert(nnzblk(localrow, localcol) == gko::zero()); + nnzblk(localrow, localcol) = val; + } + } + return blocks; + }; + + const std::map blocks = create_block_map(data); + + auto tmp = Fbcsr::create(this->get_executor()->get_master(), data.size, + blocks.size() * bs * bs, bs); + + tmp->row_ptrs_.get_data()[0] = 0; + index_type cur_brow = 0; + index_type cur_bnz = 0; + index_type cur_bcol = blocks.begin()->first.block_column; + const index_type num_brows = detail::get_num_blocks(bs, data.size[0]); + + acc::range> values( + std::array{blocks.size(), static_cast(bs), + static_cast(bs)}, + tmp->values_.get_data()); + + for (auto it = blocks.begin(); it != blocks.end(); it++) { + GKO_ENSURE_IN_BOUNDS(cur_brow, num_brows); + + tmp->col_idxs_.get_data()[cur_bnz] = it->first.block_column; + for (int ibr = 0; ibr < bs; ibr++) { + for (int jbr = 0; jbr < bs; jbr++) { + values(cur_bnz, ibr, jbr) = it->second(ibr, jbr); + } + } + if (it->first.block_row > cur_brow) { + tmp->row_ptrs_.get_data()[++cur_brow] = cur_bnz; + } else { + assert(cur_brow == it->first.block_row); + assert(cur_bcol <= it->first.block_column); + } + + cur_bcol = it->first.block_column; + cur_bnz++; + } + + tmp->row_ptrs_.get_data()[++cur_brow] = + static_cast(blocks.size()); + + assert(cur_brow == tmp->get_size()[0] / bs); + + tmp->move_to(this); +} + + +template +void Fbcsr::write(mat_data &data) const +{ + std::unique_ptr op{}; + const Fbcsr *tmp{}; + if (this->get_executor()->get_master() != this->get_executor()) { + op = this->clone(this->get_executor()->get_master()); + tmp = static_cast(op.get()); + } else { + tmp = this; + } + + data = {tmp->get_size(), {}}; + + const size_type nbnz = tmp->get_num_stored_blocks(); + const acc::range> vblocks( + std::array{nbnz, static_cast(bs_), + static_cast(bs_)}, + tmp->values_.get_const_data()); + + for (size_type brow = 0; brow < tmp->get_num_block_rows(); ++brow) { + const auto start = tmp->row_ptrs_.get_const_data()[brow]; + const auto end = tmp->row_ptrs_.get_const_data()[brow + 1]; + + for (int ib = 0; ib < bs_; ib++) { + const auto row = brow * bs_ + ib; + for (auto inz = start; inz < end; ++inz) { + for (int jb = 0; jb < bs_; jb++) { + const auto col = + tmp->col_idxs_.get_const_data()[inz] * bs_ + jb; + const auto val = vblocks(inz, ib, jb); + data.nonzeros.emplace_back(row, col, val); + } + } + } + } +} + + +template +std::unique_ptr Fbcsr::transpose() const +{ + auto exec = this->get_executor(); + auto trans_cpy = Fbcsr::create(exec, gko::transpose(this->get_size()), + this->get_num_stored_elements(), bs_); + + exec->run(fbcsr::make_transpose(this, trans_cpy.get())); + return std::move(trans_cpy); +} + + +template +std::unique_ptr Fbcsr::conj_transpose() const +{ + auto exec = this->get_executor(); + auto trans_cpy = Fbcsr::create(exec, gko::transpose(this->get_size()), + this->get_num_stored_elements(), bs_); + + exec->run(fbcsr::make_conj_transpose(this, trans_cpy.get())); + return std::move(trans_cpy); +} + + +template +void Fbcsr::sort_by_column_index() +{ + auto exec = this->get_executor(); + exec->run(fbcsr::make_sort_by_column_index(this)); +} + + +template +bool Fbcsr::is_sorted_by_column_index() const +{ + auto exec = this->get_executor(); + bool is_sorted; + exec->run(fbcsr::make_is_sorted_by_column_index(this, &is_sorted)); + return is_sorted; +} + + +template +std::unique_ptr> +Fbcsr::extract_diagonal() const +{ + auto exec = this->get_executor(); + + const auto diag_size = std::min(this->get_size()[0], this->get_size()[1]); + auto diag = Diagonal::create(exec, diag_size); + exec->run(fbcsr::make_fill_array(diag->get_values(), diag->get_size()[0], + zero())); + exec->run(fbcsr::make_extract_diagonal(this, lend(diag))); + return diag; +} + + +template +void Fbcsr::compute_absolute_inplace() +{ + auto exec = this->get_executor(); + + exec->run(fbcsr::make_inplace_absolute_array( + this->get_values(), this->get_num_stored_elements())); +} + + +template +std::unique_ptr::absolute_type> +Fbcsr::compute_absolute() const +{ + auto exec = this->get_executor(); + + auto abs_fbcsr = absolute_type::create(exec, this->get_size(), + this->get_num_stored_elements(), + this->get_block_size()); + + abs_fbcsr->col_idxs_ = col_idxs_; + abs_fbcsr->row_ptrs_ = row_ptrs_; + exec->run(fbcsr::make_outplace_absolute_array( + this->get_const_values(), this->get_num_stored_elements(), + abs_fbcsr->get_values())); + + return abs_fbcsr; +} + + +#define GKO_DECLARE_FBCSR_MATRIX(ValueType, IndexType) \ + class Fbcsr +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_MATRIX); + + +} // namespace matrix +} // namespace gko diff --git a/core/matrix/fbcsr_builder.hpp b/core/matrix/fbcsr_builder.hpp new file mode 100644 index 00000000000..df10c2a3a57 --- /dev/null +++ b/core/matrix/fbcsr_builder.hpp @@ -0,0 +1,94 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MATRIX_FBCSR_BUILDER_HPP_ +#define GKO_CORE_MATRIX_FBCSR_BUILDER_HPP_ + + +#include + + +namespace gko { +namespace matrix { + + +/** + * @internal + * + * Allows intrusive access to the arrays stored within a @ref Fbcsr matrix. + * + * @tparam ValueType the value type of the matrix + * @tparam IndexType the index type of the matrix + */ +template +class FbcsrBuilder { +public: + /** + * @return The column index array of the matrix. + */ + Array &get_col_idx_array() { return matrix_->col_idxs_; } + + /** + * @return The value array of the matrix. + */ + Array &get_value_array() { return matrix_->values_; } + + /** + * @return The (uniform) block size + */ + int get_block_size() const { return matrix_->bs_; } + + /** + * @param matrix An existing FBCSR matrix + * for which intrusive access is needed + */ + explicit FbcsrBuilder(Fbcsr *const matrix) + : matrix_{matrix} + {} + + ~FbcsrBuilder() = default; + + // make this type non-movable + FbcsrBuilder(const FbcsrBuilder &) = delete; + FbcsrBuilder(FbcsrBuilder &&) = delete; + FbcsrBuilder &operator=(const FbcsrBuilder &) = delete; + FbcsrBuilder &operator=(FbcsrBuilder &&) = delete; + +private: + Fbcsr *matrix_; +}; + + +} // namespace matrix +} // namespace gko + +#endif // GKO_CORE_MATRIX_FBCSR_BUILDER_HPP_ diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp new file mode 100644 index 00000000000..5d2492a3b37 --- /dev/null +++ b/core/matrix/fbcsr_kernels.hpp @@ -0,0 +1,189 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MATRIX_FBCSR_KERNELS_HPP_ +#define GKO_CORE_MATRIX_FBCSR_KERNELS_HPP_ + + +#include + + +#include +#include +#include +#include +#include +#include + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_FBCSR_SPMV_KERNEL(ValueType, IndexType) \ + void spmv(std::shared_ptr exec, \ + const matrix::Fbcsr *a, \ + const matrix::Dense *b, matrix::Dense *c) + +#define GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType) \ + void advanced_spmv(std::shared_ptr exec, \ + const matrix::Dense *alpha, \ + const matrix::Fbcsr *a, \ + const matrix::Dense *b, \ + const matrix::Dense *beta, \ + matrix::Dense *c) + +#define GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ + void convert_to_dense(std::shared_ptr exec, \ + const matrix::Fbcsr *source, \ + matrix::Dense *result) + +#define GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) \ + void convert_to_csr(std::shared_ptr exec, \ + const matrix::Fbcsr *source, \ + matrix::Csr *result) + +#define GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL(ValueType, IndexType) \ + void transpose(std::shared_ptr exec, \ + const matrix::Fbcsr *orig, \ + matrix::Fbcsr *trans) + +#define GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType) \ + void conj_transpose(std::shared_ptr exec, \ + const matrix::Fbcsr *orig, \ + matrix::Fbcsr *trans) + +#define GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, \ + IndexType) \ + void calculate_max_nnz_per_row( \ + std::shared_ptr exec, \ + const matrix::Fbcsr *source, size_type *result) + +#define GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, \ + IndexType) \ + void calculate_nonzeros_per_row( \ + std::shared_ptr exec, \ + const matrix::Fbcsr *source, \ + Array *result) + +#define GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \ + void sort_by_column_index(std::shared_ptr exec, \ + matrix::Fbcsr *to_sort) + +#define GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType) \ + void is_sorted_by_column_index( \ + std::shared_ptr exec, \ + const matrix::Fbcsr *to_check, bool *is_sorted) + +#define GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) \ + void extract_diagonal(std::shared_ptr exec, \ + const matrix::Fbcsr *orig, \ + matrix::Diagonal *diag) + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_FBCSR_SPMV_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) + + +namespace omp { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace omp + + +namespace cuda { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace cuda + + +namespace reference { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace reference + + +namespace hip { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace hip + + +namespace dpcpp { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace dpcpp + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_MATRIX_FBCSR_KERNELS_HPP_ diff --git a/core/test/base/exception.cpp b/core/test/base/exception.cpp index 2c1b672f1db..bbbe623dc30 100644 --- a/core/test/base/exception.cpp +++ b/core/test/base/exception.cpp @@ -148,6 +148,14 @@ TEST(ExceptionClasses, DimensionMismatchReturnsCorrectWhatMessage) } +TEST(ExceptionClasses, BlockSizeErrorCorrectWhatMessage) +{ + gko::BlockSizeError error("test_file.cpp", 243, 3, 20); + ASSERT_EQ(std::string("test_file.cpp:243: block size = 3, size = 20"), + error.what()); +} + + TEST(ExceptionClasses, AllocationErrorReturnsCorrectWhatMessage) { gko::AllocationError error("test_file.cpp", 42, "OMP", 135); diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt index ac36ed4f7ff..64b3b3ed593 100644 --- a/core/test/matrix/CMakeLists.txt +++ b/core/test/matrix/CMakeLists.txt @@ -5,6 +5,8 @@ ginkgo_create_test(csr_builder) ginkgo_create_test(dense) ginkgo_create_test(diagonal) ginkgo_create_test(ell) +ginkgo_create_test(fbcsr) +ginkgo_create_test(fbcsr_builder) ginkgo_create_test(hybrid) ginkgo_create_test(identity) ginkgo_create_test(permutation) diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp new file mode 100644 index 00000000000..6b59ce1d30d --- /dev/null +++ b/core/test/matrix/fbcsr.cpp @@ -0,0 +1,483 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include + + +#include + + +#include "accessor/block_col_major.hpp" +#include "accessor/range.hpp" +#include "core/test/matrix/fbcsr_sample.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +template +void assert_matrices_are_same( + const gko::matrix::Fbcsr *const bm, + const gko::matrix::Csr *const cm, + const gko::matrix::Diagonal *const diam = nullptr, + const gko::matrix_data *const md = nullptr) +{ + if (cm) { + ASSERT_EQ(bm->get_size(), cm->get_size()); + ASSERT_EQ(bm->get_num_stored_elements(), cm->get_num_stored_elements()); + } + if (md) { + ASSERT_EQ(bm->get_size(), md->size); + ASSERT_EQ(bm->get_num_stored_elements(), md->nonzeros.size()); + } + if (diam) { + const gko::size_type minsize = + std::min(bm->get_size()[0], bm->get_size()[1]); + ASSERT_EQ(minsize, diam->get_size()[0]); + ASSERT_EQ(minsize, diam->get_size()[1]); + } + + const IndexType nbrows = bm->get_num_block_rows(); + const int bs = bm->get_block_size(); + const auto nbnz = bm->get_num_stored_blocks(); + gko::acc::range> fbvals( + std::array{nbnz, static_cast(bs), + static_cast(bs)}, + bm->get_const_values()); + + for (IndexType ibrow = 0; ibrow < nbrows; ibrow++) { + const IndexType *const browptr = bm->get_const_row_ptrs(); + const IndexType numblocksbrow = browptr[ibrow + 1] - browptr[ibrow]; + for (IndexType irow = ibrow * bs; irow < ibrow * bs + bs; irow++) { + const IndexType rowstart = browptr[ibrow] * bs * bs + + (irow - ibrow * bs) * numblocksbrow * bs; + if (cm) { + ASSERT_EQ(cm->get_const_row_ptrs()[irow], rowstart); + } + } + + const IndexType iz_browstart = browptr[ibrow] * bs * bs; + const IndexType *const bcolinds = bm->get_const_col_idxs(); + + for (IndexType ibnz = browptr[ibrow]; ibnz < browptr[ibrow + 1]; + ibnz++) { + const IndexType bcol = bcolinds[ibnz]; + const IndexType blkoffset_frombrowstart = ibnz - browptr[ibrow]; + + for (int ib = 0; ib < bs; ib++) { + const IndexType row = ibrow * bs + ib; + const IndexType inz_rowstart = + iz_browstart + ib * numblocksbrow * bs; + const IndexType inz_blockstart_row = + inz_rowstart + blkoffset_frombrowstart * bs; + + for (int jb = 0; jb < bs; jb++) { + const IndexType col = bcol * bs + jb; + const IndexType inz = inz_blockstart_row + jb; + if (cm) { + ASSERT_EQ(col, cm->get_const_col_idxs()[inz]); + ASSERT_EQ(fbvals(ibnz, ib, jb), + cm->get_const_values()[inz]); + } + if (md) { + ASSERT_EQ(row, md->nonzeros[inz].row); + ASSERT_EQ(col, md->nonzeros[inz].column); + ASSERT_EQ(fbvals(ibnz, ib, jb), + md->nonzeros[inz].value); + } + if (row == col && diam) { + ASSERT_EQ(fbvals(ibnz, ib, jb), + diam->get_const_values()[row]); + } + } + } + } + } +} + + +template +void check_sample_generator_common(const SampleGenerator sg) +{ + auto fbmtx = sg.generate_fbcsr(); + ASSERT_EQ(fbmtx->get_num_block_rows(), sg.nbrows); + ASSERT_EQ(fbmtx->get_num_block_cols(), sg.nbcols); + ASSERT_EQ(fbmtx->get_size()[0], sg.nrows); + ASSERT_EQ(fbmtx->get_size()[1], sg.ncols); + ASSERT_EQ(fbmtx->get_num_stored_blocks(), sg.nbnz); + ASSERT_EQ(fbmtx->get_num_stored_elements(), sg.nnz); +} + + +template +class FbcsrSample : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + + FbcsrSample() : ref(gko::ReferenceExecutor::create()) {} + + std::shared_ptr ref; +}; + + +TYPED_TEST_SUITE(FbcsrSample, gko::test::ValueIndexTypes); + + +TYPED_TEST(FbcsrSample, SampleGeneratorsAreCorrect) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using Mtx = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using MtxData = gko::matrix_data; + using Diag = gko::matrix::Diagonal; + auto ref = this->ref; + gko::testing::FbcsrSample fbsample(ref); + gko::testing::FbcsrSample2 fbsample2(ref); + + std::unique_ptr fbmtx = fbsample.generate_fbcsr(); + std::unique_ptr csmtx = fbsample.generate_csr(); + const MtxData mdata = fbsample.generate_matrix_data_with_explicit_zeros(); + std::unique_ptr fbmtx2 = fbsample2.generate_fbcsr(); + std::unique_ptr csmtx2 = fbsample2.generate_csr(); + std::unique_ptr diag2 = fbsample2.extract_diagonal(); + const gko::Array nnzperrow = fbsample2.getNonzerosPerRow(); + + check_sample_generator_common(fbsample); + assert_matrices_are_same(fbmtx.get(), csmtx.get(), + static_cast(nullptr), &mdata); + check_sample_generator_common(fbsample2); + assert_matrices_are_same(fbmtx2.get(), csmtx2.get(), diag2.get()); + for (index_type irow = 0; irow < fbsample2.nrows; irow++) { + const index_type *const row_ptrs = csmtx2->get_const_row_ptrs(); + const index_type num_nnz_row = row_ptrs[irow + 1] - row_ptrs[irow]; + ASSERT_EQ(nnzperrow.get_const_data()[irow], num_nnz_row); + for (index_type iz = row_ptrs[irow]; iz < row_ptrs[irow + 1]; iz++) { + const index_type col = csmtx2->get_const_col_idxs()[iz]; + if (irow == col) { + ASSERT_EQ(csmtx2->get_const_values()[iz], + diag2->get_const_values()[irow]); + } + } + } + check_sample_generator_common( + gko::testing::FbcsrSampleUnsorted(ref)); + check_sample_generator_common( + gko::testing::FbcsrSampleSquare(ref)); +} + + +template +class FbcsrSampleComplex : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + + FbcsrSampleComplex() : ref(gko::ReferenceExecutor::create()) {} + + std::shared_ptr ref; +}; + + +TYPED_TEST_SUITE(FbcsrSampleComplex, gko::test::ComplexValueIndexTypes); + + +TYPED_TEST(FbcsrSampleComplex, ComplexSampleGeneratorIsCorrect) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using Mtx = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + auto ref = this->ref; + gko::testing::FbcsrSampleComplex fbsample3(ref); + + std::unique_ptr fbmtx3 = fbsample3.generate_fbcsr(); + std::unique_ptr csmtx3 = fbsample3.generate_csr(); + + check_sample_generator_common(fbsample3); + assert_matrices_are_same(fbmtx3.get(), csmtx3.get()); +} + + +template +class Fbcsr : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using Mtx = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using MtxData = gko::matrix_data; + + Fbcsr() + : exec(gko::ReferenceExecutor::create()), + fbsample(exec), + mtx(fbsample.generate_fbcsr()) + { + // backup for move tests + const value_type *const v = mtx->get_values(); + const index_type *const c = mtx->get_col_idxs(); + const index_type *const r = mtx->get_row_ptrs(); + orig_size = mtx->get_size(); + orig_rowptrs.resize(fbsample.nbrows + 1); + orig_colinds.resize(fbsample.nbnz); + orig_vals.resize(fbsample.nnz); + std::copy(r, r + fbsample.nbrows + 1, orig_rowptrs.data()); + std::copy(c, c + fbsample.nbnz, orig_colinds.data()); + std::copy(v, v + fbsample.nnz, orig_vals.data()); + } + + std::shared_ptr exec; + const gko::testing::FbcsrSample fbsample; + std::unique_ptr mtx; + + gko::dim<2> orig_size; + std::vector orig_vals; + std::vector orig_rowptrs; + std::vector orig_colinds; + + void assert_equal_to_original_mtx(const Mtx *m) + { + auto v = m->get_const_values(); + auto c = m->get_const_col_idxs(); + auto r = m->get_const_row_ptrs(); + + const int bs = fbsample.bs; + + ASSERT_EQ(m->get_size(), orig_size); + ASSERT_EQ(m->get_num_stored_elements(), orig_vals.size()); + ASSERT_EQ(m->get_block_size(), bs); + ASSERT_EQ(m->get_num_block_rows(), m->get_size()[0] / bs); + ASSERT_EQ(m->get_num_block_cols(), m->get_size()[1] / bs); + + for (index_type irow = 0; irow < orig_size[0] / bs; irow++) { + const index_type *const rowptr = &orig_rowptrs[0]; + ASSERT_EQ(r[irow], rowptr[irow]); + + for (index_type inz = rowptr[irow]; inz < rowptr[irow + 1]; inz++) { + ASSERT_EQ(c[inz], orig_colinds[inz]); + + for (int i = 0; i < bs * bs; i++) { + ASSERT_EQ(v[inz * bs * bs + i], + orig_vals[inz * bs * bs + i]); + } + } + } + } + + void assert_empty(const Mtx *m) + { + ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0)); + ASSERT_EQ(m->get_num_stored_elements(), 0); + ASSERT_EQ(m->get_block_size(), 1); + ASSERT_EQ(m->get_const_values(), nullptr); + ASSERT_EQ(m->get_const_col_idxs(), nullptr); + ASSERT_NE(m->get_const_row_ptrs(), nullptr); + } +}; + +TYPED_TEST_SUITE(Fbcsr, gko::test::ValueIndexTypes); + + +TYPED_TEST(Fbcsr, GetNumBlocksCorrectlyThrows) +{ + using index_type = typename TestFixture::index_type; + const index_type vec_sz = 47; + const int blk_sz = 9; + + ASSERT_THROW(gko::matrix::detail::get_num_blocks(blk_sz, vec_sz), + gko::BlockSizeError); +} + + +TYPED_TEST(Fbcsr, GetNumBlocksWorks) +{ + using index_type = typename TestFixture::index_type; + const index_type vec_sz = 45; + const int blk_sz = 9; + + ASSERT_EQ(gko::matrix::detail::get_num_blocks(blk_sz, vec_sz), 5); +} + + +TYPED_TEST(Fbcsr, KnowsItsSize) +{ + ASSERT_EQ(this->mtx->get_size(), gko::dim<2>(6, 12)); + ASSERT_EQ(this->mtx->get_block_size(), 3); + ASSERT_EQ(this->mtx->get_num_stored_elements(), 36); + ASSERT_EQ(this->mtx->get_num_block_rows(), 2); + ASSERT_EQ(this->mtx->get_num_block_cols(), 4); +} + + +TYPED_TEST(Fbcsr, ContainsCorrectData) +{ + this->assert_equal_to_original_mtx(this->mtx.get()); +} + + +TYPED_TEST(Fbcsr, BlockSizeIsSetCorrectly) +{ + using Mtx = typename TestFixture::Mtx; + auto m = Mtx::create(this->exec); + m->set_block_size(6); + ASSERT_EQ(m->get_block_size(), 6); +} + + +TYPED_TEST(Fbcsr, CanBeEmpty) +{ + using Mtx = typename TestFixture::Mtx; + auto mtx = Mtx::create(this->exec); + + this->assert_empty(mtx.get()); +} + + +TYPED_TEST(Fbcsr, CanBeCreatedFromExistingData) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using size_type = gko::size_type; + const int bs = this->fbsample.bs; + const size_type nbrows = this->fbsample.nbrows; + const size_type nbcols = this->fbsample.nbcols; + const size_type bnnz = this->fbsample.nbnz; + std::unique_ptr refmat = this->fbsample.generate_fbcsr(); + value_type *const values = refmat->get_values(); + index_type *const col_idxs = refmat->get_col_idxs(); + index_type *const row_ptrs = refmat->get_row_ptrs(); + + auto mtx = gko::matrix::Fbcsr::create( + this->exec, gko::dim<2>{nbrows * bs, nbcols * bs}, bs, + gko::Array::view(this->exec, bnnz * bs * bs, values), + gko::Array::view(this->exec, bnnz, col_idxs), + gko::Array::view(this->exec, nbrows + 1, row_ptrs)); + + ASSERT_EQ(mtx->get_const_values(), values); + ASSERT_EQ(mtx->get_const_col_idxs(), col_idxs); + ASSERT_EQ(mtx->get_const_row_ptrs(), row_ptrs); +} + + +TYPED_TEST(Fbcsr, CanBeCopied) +{ + using Mtx = typename TestFixture::Mtx; + auto copy = Mtx::create(this->exec); + + copy->copy_from(this->mtx.get()); + + this->assert_equal_to_original_mtx(this->mtx.get()); + this->mtx->get_values()[1] = 3.0; + this->assert_equal_to_original_mtx(copy.get()); +} + + +TYPED_TEST(Fbcsr, CanBeMoved) +{ + using Mtx = typename TestFixture::Mtx; + auto copy = Mtx::create(this->exec); + + copy->copy_from(std::move(this->mtx)); + + this->assert_equal_to_original_mtx(copy.get()); +} + + +TYPED_TEST(Fbcsr, CanBeCloned) +{ + using Mtx = typename TestFixture::Mtx; + + auto clone = this->mtx->clone(); + + this->assert_equal_to_original_mtx(this->mtx.get()); + this->mtx->get_values()[1] = 5.0; + this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); +} + + +TYPED_TEST(Fbcsr, CanBeCleared) +{ + this->mtx->clear(); + + this->assert_empty(this->mtx.get()); +} + + +TYPED_TEST(Fbcsr, CanBeReadFromMatrixData) +{ + using Mtx = typename TestFixture::Mtx; + auto m = Mtx::create(this->exec); + m->set_block_size(this->fbsample.bs); + + m->read(this->fbsample.generate_matrix_data()); + + this->assert_equal_to_original_mtx(m.get()); +} + + +TYPED_TEST(Fbcsr, GeneratesCorrectMatrixData) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using MtxData = typename TestFixture::MtxData; + MtxData refdata = this->fbsample.generate_matrix_data_with_explicit_zeros(); + refdata.ensure_row_major_order(); + + MtxData data; + this->mtx->write(data); + data.ensure_row_major_order(); + + ASSERT_EQ(data.size, refdata.size); + ASSERT_EQ(data.nonzeros.size(), refdata.nonzeros.size()); + for (size_t i = 0; i < data.nonzeros.size(); i++) { + ASSERT_EQ(data.nonzeros[i], refdata.nonzeros[i]); + } +} + + +} // namespace diff --git a/core/test/matrix/fbcsr_builder.cpp b/core/test/matrix/fbcsr_builder.cpp new file mode 100644 index 00000000000..3a4bf358d51 --- /dev/null +++ b/core/test/matrix/fbcsr_builder.cpp @@ -0,0 +1,86 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_builder.hpp" + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class FbcsrBuilder : public ::testing::Test { +public: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using Mtx = gko::matrix::Fbcsr; + +protected: + FbcsrBuilder() + : exec(gko::ReferenceExecutor::create()), + mtx(Mtx::create(exec, gko::dim<2>{4, 6}, 8, 2)) + {} + + std::shared_ptr exec; + std::unique_ptr mtx; +}; + +TYPED_TEST_SUITE(FbcsrBuilder, gko::test::ValueIndexTypes); + + +TYPED_TEST(FbcsrBuilder, ReturnsCorrectArrays) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + gko::matrix::FbcsrBuilder builder{this->mtx.get()}; + + auto builder_col_idxs = builder.get_col_idx_array().get_data(); + auto builder_values = builder.get_value_array().get_data(); + auto ref_col_idxs = this->mtx->get_col_idxs(); + auto ref_values = this->mtx->get_values(); + + ASSERT_EQ(builder_col_idxs, ref_col_idxs); + ASSERT_EQ(builder_values, ref_values); +} + + +} // namespace diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp new file mode 100644 index 00000000000..ac6007e789c --- /dev/null +++ b/core/test/matrix/fbcsr_sample.hpp @@ -0,0 +1,546 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_TEST_MATRIX_FBCSR_SAMPLE_HPP_ +#define GKO_CORE_TEST_MATRIX_FBCSR_SAMPLE_HPP_ + + +#include +#include +#include +#include +#include +#include + + +#include "accessor/block_col_major.hpp" +#include "accessor/range.hpp" +#include "core/test/utils.hpp" + + +namespace gko { +namespace testing { + + +constexpr auto fbcsr_test_offset = 0.000011118888; + + +/** Generates the same sample block CSR matrix in different formats + * + * This currently a 6 x 12 matrix with 3x3 blocks. + * Assumes that the layout within each block is row-major. + * Generates complex data when instantiated with a complex value type. + */ +template +class FbcsrSample { +public: + using value_type = ValueType; + using index_type = IndexType; + using Fbcsr = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using MatData = gko::matrix_data; + using SparCsr = gko::matrix::SparsityCsr; + + + const size_type nrows = 6; + const size_type ncols = 12; + const size_type nnz = 36; + const size_type nbrows = 2; + const size_type nbcols = 4; + const size_type nbnz = 4; + const int bs = 3; + const std::shared_ptr exec; + + + FbcsrSample(std::shared_ptr rexec) + : exec(rexec) + {} + + /** + * @return The sample matrix in FBCSR format + */ + std::unique_ptr generate_fbcsr() const + { + std::unique_ptr mtx = + Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + nnz, bs); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + r[0] = 0; + r[1] = 2; + r[2] = 4; + c[0] = 1; + c[1] = 3; + c[2] = 0; + c[3] = 2; + + gko::acc::range> vals( + std::array{nbnz, static_cast(bs), + static_cast(bs)}, + v); + + if (mtx->get_size()[0] % bs != 0) + throw gko::BadDimension(__FILE__, __LINE__, __func__, "test fbcsr", + mtx->get_size()[0], mtx->get_size()[1], + "block size does not divide the size!"); + + for (index_type ibrow = 0; ibrow < mtx->get_num_block_rows(); ibrow++) { + const index_type *const browptr = mtx->get_row_ptrs(); + for (index_type inz = browptr[ibrow]; inz < browptr[ibrow + 1]; + inz++) { + const index_type bcolind = mtx->get_col_idxs()[inz]; + const value_type base = (ibrow + 1) * (bcolind + 1); + for (int ival = 0; ival < bs; ival++) + for (int jval = 0; jval < bs; jval++) + vals(inz, ival, jval) = + base + static_cast>( + ival * bs + jval); + } + } + + // Some of the entries are set to zero + vals(0, 2, 0) = gko::zero(); + vals(0, 2, 2) = gko::zero(); + vals(3, 0, 0) = gko::zero(); + + vals(3, 2, 1) += fbcsr_test_imaginary; + vals(3, 2, 2) += fbcsr_test_imaginary; + + return mtx; + } + + /** + * @return Sample matrix in CSR format + * + * Keeps explicit zeros. + */ + std::unique_ptr generate_csr() const + { + gko::Array csrrow(exec, {0, 6, 12, 18, 24, 30, 36}); + gko::Array csrcols( + exec, {3, 4, 5, 9, 10, 11, 3, 4, 5, 9, 10, 11, 3, 4, 5, 9, 10, 11, + 0, 1, 2, 6, 7, 8, 0, 1, 2, 6, 7, 8, 0, 1, 2, 6, 7, 8}); + // clang-format off + gko::Array csrvals(exec, I + {2, 3, 4, 4, 5, 6, 5, 6, 7, 7, 8, 9, 0, 9, 0, + 10, 11, 12, 2, 3, 4, 0, 7, 8, 5, 6, 7, + 9, 10, 11, 8, 9, 10, 12, + sct(13.0) + fbcsr_test_imaginary, + sct(14.0) + fbcsr_test_imaginary}); + // clang-format on + return Csr::create(exec, gko::dim<2>{nrows, ncols}, csrvals, csrcols, + csrrow); + } + + /** + * @return Sparsity structure of the matrix + */ + std::unique_ptr generate_sparsity_csr() const + { + gko::Array colids(exec, nbnz); + gko::Array rowptrs(exec, nbrows + 1); + const std::unique_ptr fbmat = generate_fbcsr(); + for (index_type i = 0; i < nbrows + 1; i++) + rowptrs.get_data()[i] = fbmat->get_const_row_ptrs()[i]; + for (index_type i = 0; i < nbnz; i++) + colids.get_data()[i] = fbmat->get_const_col_idxs()[i]; + return SparCsr::create(exec, gko::dim<2>{nbrows, nbcols}, colids, + rowptrs); + } + + /** + * @return Array of COO triplets that represent the matrix + * + * @note The order of the triplets assumes the blocks are stored row-major + */ + MatData generate_matrix_data() const + { + return MatData({{6, 12}, + {{0, 3, 2.0}, + {0, 4, 3.0}, + {0, 5, 4.0}, + {1, 3, 5.0}, + {1, 4, 6.0}, + {1, 5, 7.0}, + {2, 4, 9.0}, + + {0, 9, 4.0}, + {0, 10, 5.0}, + {0, 11, 6.0}, + {1, 9, 7.0}, + {1, 10, 8.0}, + {1, 11, 9.0}, + {2, 9, 10.0}, + {2, 10, 11.0}, + {2, 11, 12.0}, + + {3, 0, 2.0}, + {3, 1, 3.0}, + {3, 2, 4.0}, + {4, 0, 5.0}, + {4, 1, 6.0}, + {4, 2, 7.0}, + {5, 0, 8.0}, + {5, 1, 9.0}, + {5, 2, 10.0}, + + {3, 7, 7.0}, + {3, 8, 8.0}, + {4, 6, 9.0}, + {4, 7, 10.0}, + {4, 8, 11.0}, + {5, 6, 12.0}, + {5, 7, sct(13.0) + fbcsr_test_imaginary}, + {5, 8, sct(14.0) + fbcsr_test_imaginary}}}); + } + + /** + * @return Array of COO triplets that represent the matrix; includes + * explicit zeros + * + * @note The order of the triplets assumes the blocks are stored row-major + */ + MatData generate_matrix_data_with_explicit_zeros() const + { + auto mdata = generate_matrix_data(); + mdata.nonzeros.push_back({2, 3, 0.0}); + mdata.nonzeros.push_back({2, 5, 0.0}); + mdata.nonzeros.push_back({3, 6, 0.0}); + mdata.ensure_row_major_order(); + return mdata; + } + +private: + /// Enables complex data to be used for complex instantiations... + template + constexpr std::enable_if_t() || is_complex(), + ValueType> + sct(U u) const + { + return static_cast(u); + } + + /// ... while ignoring imaginary parts for real instantiations + template + constexpr std::enable_if_t() && !is_complex(), + ValueType> + sct(std::complex cu) const + { + return static_cast(cu.real()); + } + + const ValueType fbcsr_test_imaginary = sct( + std::complex>(0, 0.1 + fbcsr_test_offset)); +}; + +/** + * Generates a sample block CSR matrix in different formats. + * 6 x 8 matrix with 2x2 blocks. + */ +template +class FbcsrSample2 { +public: + using value_type = ValueType; + using index_type = IndexType; + using Fbcsr = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using Diagonal = gko::matrix::Diagonal; + + + const size_type nrows = 6; + const size_type ncols = 8; + const size_type nnz = 16; + const size_type nbrows = 3; + const size_type nbcols = 4; + const size_type nbnz = 4; + const int bs = 2; + const std::shared_ptr exec; + + + FbcsrSample2(std::shared_ptr rexec) + : exec(rexec) + {} + + std::unique_ptr generate_fbcsr() const + { + gko::Array r(exec, {0, 1, 3, 4}); + gko::Array c(exec, {0, 0, 3, 2}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; + + v[0] = 1; + v[1] = 3; + v[2] = 2; + v[3] = 0; + v[9] = 0; + v[11] = 0; + v[12] = -12; + v[13] = -2; + v[14] = -1; + v[15] = -11; + + return Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + bs, vals, c, r); + } + + std::unique_ptr generate_csr() const + { + gko::Array r(exec, {0, 2, 4, 8, 12, 14, 16}); + gko::Array c( + exec, {0, 1, 0, 1, 0, 1, 6, 7, 0, 1, 6, 7, 4, 5, 4, 5}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; + v[0] = 1; + v[1] = 2; + v[2] = 3; + v[3] = 0; + v[10] = 0; + v[11] = 0; + v[12] = -12; + v[13] = -1; + v[14] = -2; + v[15] = -11; + + return Csr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + vals, c, r, + std::make_shared()); + } + + std::unique_ptr extract_diagonal() const + { + gko::Array dvals(exec, {1, 0, 0, 0, -12, -11}); + return Diagonal::create(exec, nrows, dvals); + } + + gko::Array getNonzerosPerRow() const + { + return gko::Array(exec, {2, 2, 4, 4, 2, 2}); + } + + +private: + /// Enables use of literals to instantiate value data + template + constexpr ValueType sct(U u) const + { + return static_cast(u); + } +}; + +/** + * @brief Generates the a sample block CSR square matrix + * + * This currently a 4 x 4 matrix with 2x2 blocks. + */ +template +class FbcsrSampleSquare { +public: + using value_type = ValueType; + using index_type = IndexType; + using Fbcsr = gko::matrix::Fbcsr; + + + const size_type nrows = 4; + const size_type ncols = 4; + const size_type nnz = 8; + const size_type nbrows = 2; + const size_type nbcols = 2; + const size_type nbnz = 2; + const int bs = 2; + const std::shared_ptr exec; + + + FbcsrSampleSquare(std::shared_ptr rexec) + : exec(rexec) + {} + + std::unique_ptr generate_fbcsr() const + { + gko::Array c(exec, {1, 1}); + gko::Array r(exec, {0, 1, 2}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); + for (IndexType i = 0; i < nnz; i++) v[i] = i; + + return Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + bs, vals, c, r); + } +}; + +/** + * @brief Generates a sample block CSR matrix with complex values + * + * This is a 6 x 8 matrix with 2x2 blocks. + */ +template +class FbcsrSampleComplex { +public: + using value_type = ValueType; + using index_type = IndexType; + using Csr = gko::matrix::Csr; + using Fbcsr = gko::matrix::Fbcsr; + + + static_assert(is_complex(), "Only for complex types!"); + + + const size_type nrows = 6; + const size_type ncols = 8; + const size_type nnz = 16; + const size_type nbrows = 3; + const size_type nbcols = 4; + const size_type nbnz = 4; + const int bs = 2; + const std::shared_ptr exec; + + + FbcsrSampleComplex(std::shared_ptr rexec) + : exec(rexec) + {} + + std::unique_ptr generate_fbcsr() const + { + gko::Array r(exec, {0, 1, 3, 4}); + gko::Array c(exec, {0, 0, 3, 2}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; + + using namespace std::complex_literals; + v[0] = 1.0 + 1.15i; + v[2] = 2.0 + 2.15i; + v[1] = 3.0 - 3.15i; + v[3] = 0.0 - 0.15i; + v[9] = 0.0; + v[11] = 0.0; + v[12] = -12.0 + 12.15i; + v[14] = -1.0 + 1.15i; + v[13] = -2.0 - 2.15i; + v[15] = -11.0 - 11.15i; + + return Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + bs, vals, c, r); + } + + std::unique_ptr generate_csr() const + { + gko::Array r(exec, {0, 2, 4, 8, 12, 14, 16}); + gko::Array c( + exec, {0, 1, 0, 1, 0, 1, 6, 7, 0, 1, 6, 7, 4, 5, 4, 5}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; + + using namespace std::complex_literals; + v[0] = 1.0 + 1.15i; + v[1] = 2.0 + 2.15i; + v[2] = 3.0 - 3.15i; + v[3] = 0.0 - 0.15i; + v[10] = 0.0; + v[11] = 0.0; + v[12] = -12.0 + 12.15i; + v[13] = -1.0 + 1.15i; + v[14] = -2.0 - 2.15i; + v[15] = -11.0 - 11.15i; + + return Csr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + vals, c, r, + std::make_shared()); + } +}; + + +/** + * Generates a fixed-block CSR matrix with longer and unsorted columns + */ +template +class FbcsrSampleUnsorted { +public: + using value_type = ValueType; + using index_type = IndexType; + using Fbcsr = gko::matrix::Fbcsr; + + + const size_type nbrows = 3; + const size_type nbcols = 20; + const size_type nbnz = 30; + const int bs = 3; + const size_type nrows = nbrows * bs; + const size_type ncols = nbcols * bs; + const size_type nnz = nbnz * bs * bs; + const std::shared_ptr exec; + + + FbcsrSampleUnsorted(std::shared_ptr rexec) + : exec(rexec) + {} + + std::unique_ptr generate_fbcsr() const + { + gko::Array r(exec, {0, 8, 19, 30}); + gko::Array c( + exec, {0, 1, 20, 15, 12, 18, 5, 28, 3, 10, 29, 5, 9, 2, 16, + 12, 21, 2, 0, 1, 5, 9, 12, 15, 17, 20, 22, 24, 27, 28}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); + for (IndexType i = 0; i < nnz; i++) { + v[i] = static_cast(i + 0.15 + fbcsr_test_offset); + } + + return Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + bs, vals, c, r); + } +}; + + +} // namespace testing +} // namespace gko + +#endif // GKO_CORE_TEST_MATRIX_FBCSR_SAMPLE_HPP_ diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index a4ded75abed..842f4b75259 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -98,6 +98,7 @@ target_sources(ginkgo_cuda matrix/dense_kernels.cu matrix/diagonal_kernels.cu matrix/ell_kernels.cu + matrix/fbcsr_kernels.cu matrix/hybrid_kernels.cu matrix/sellp_kernels.cu matrix/sparsity_csr_kernels.cu diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu new file mode 100644 index 00000000000..6f7bc48cf92 --- /dev/null +++ b/cuda/matrix/fbcsr_kernels.cu @@ -0,0 +1,176 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_kernels.hpp" + + +#include + + +#include +#include +#include +#include +#include + + +#include "cuda/base/config.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The fixed-size block compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +template +void spmv(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + + +template +void convert_row_ptrs_to_idxs(std::shared_ptr exec, + const IndexType *ptrs, size_type num_rows, + IndexType *idxs) GKO_NOT_IMPLEMENTED; + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Dense *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_csr(const std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Csr *const result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + + +template +void transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + + +template +void calculate_max_nnz_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void calculate_nonzeros_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + Array *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::Fbcsr *to_check, + bool *is_sorted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +void sort_by_column_index(const std::shared_ptr exec, + matrix::Fbcsr *const to_sort) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/test/matrix/CMakeLists.txt b/cuda/test/matrix/CMakeLists.txt index 65ce218ac71..a1d7ca4a7a0 100644 --- a/cuda/test/matrix/CMakeLists.txt +++ b/cuda/test/matrix/CMakeLists.txt @@ -1,5 +1,6 @@ ginkgo_create_test(coo_kernels) ginkgo_create_test(csr_kernels) +ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) diff --git a/cuda/test/matrix/fbcsr_kernels.cpp b/cuda/test/matrix/fbcsr_kernels.cpp new file mode 100644 index 00000000000..02bfa3adfe1 --- /dev/null +++ b/cuda/test/matrix/fbcsr_kernels.cpp @@ -0,0 +1,93 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include "core/test/matrix/fbcsr_sample.hpp" +#include "cuda/test/utils.hpp" + + +namespace { + + +class Fbcsr : public ::testing::Test { +protected: + using Mtx = gko::matrix::Fbcsr<>; + + void SetUp() + { + ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); + ref = gko::ReferenceExecutor::create(); + cuda = gko::CudaExecutor::create(0, ref); + } + + void TearDown() + { + if (cuda != nullptr) { + ASSERT_NO_THROW(cuda->synchronize()); + } + } + + std::shared_ptr ref; + std::shared_ptr cuda; + + std::unique_ptr mtx; +}; + + +TEST_F(Fbcsr, CanWriteFromMatrixOnDevice) +{ + using value_type = Mtx::value_type; + using index_type = Mtx::index_type; + using MatData = gko::matrix_data; + gko::testing::FbcsrSample sample(ref); + auto refmat = sample.generate_fbcsr(); + auto cudamat = Mtx::create(cuda); + cudamat->copy_from(gko::lend(refmat)); + + MatData refdata; + MatData cudadata; + refmat->write(refdata); + cudamat->write(cudadata); + + ASSERT_TRUE(refdata.nonzeros == cudadata.nonzeros); +} + + +} // namespace diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index 186da7f8b41..c52c77e109b 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -24,6 +24,7 @@ target_sources(ginkgo_dpcpp factorization/par_ilut_kernels.dp.cpp matrix/coo_kernels.dp.cpp matrix/csr_kernels.dp.cpp + matrix/fbcsr_kernels.dp.cpp matrix/dense_kernels.dp.cpp matrix/diagonal_kernels.dp.cpp matrix/ell_kernels.dp.cpp diff --git a/dpcpp/matrix/fbcsr_kernels.dp.cpp b/dpcpp/matrix/fbcsr_kernels.dp.cpp new file mode 100644 index 00000000000..32244099d6b --- /dev/null +++ b/dpcpp/matrix/fbcsr_kernels.dp.cpp @@ -0,0 +1,189 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_kernels.hpp" + + +#include + + +#include +#include +#include +#include +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The fixed-size block compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +template +void spmv(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + + +template +void convert_row_ptrs_to_idxs(std::shared_ptr exec, + const IndexType *ptrs, size_type num_rows, + IndexType *idxs) GKO_NOT_IMPLEMENTED; + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Dense *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_csr(const std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Csr *const result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + + +template +inline void convert_fbcsr_to_csc(size_type num_rows, const IndexType *row_ptrs, + const IndexType *col_idxs, + const ValueType *fbcsr_vals, + IndexType *row_idxs, IndexType *col_ptrs, + ValueType *csc_vals, + UnaryOperator op) GKO_NOT_IMPLEMENTED; + + +template +void transpose_and_transform(std::shared_ptr exec, + matrix::Fbcsr *trans, + const matrix::Fbcsr *orig, + UnaryOperator op) GKO_NOT_IMPLEMENTED; + + +template +void transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + + +template +void calculate_max_nnz_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void calculate_nonzeros_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + Array *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::Fbcsr *to_check, + bool *is_sorted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +void sort_by_column_index(const std::shared_ptr exec, + matrix::Fbcsr *const to_sort) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index 14aefba1a0d..2b9ed806938 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -166,6 +166,7 @@ set(GINKGO_HIP_SOURCES matrix/dense_kernels.hip.cpp matrix/diagonal_kernels.hip.cpp matrix/ell_kernels.hip.cpp + matrix/fbcsr_kernels.hip.cpp matrix/hybrid_kernels.hip.cpp matrix/sellp_kernels.hip.cpp matrix/sparsity_csr_kernels.hip.cpp diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp new file mode 100644 index 00000000000..13ca94ad5f7 --- /dev/null +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -0,0 +1,173 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_kernels.hpp" + + +#include + + +#include + + +#include +#include +#include +#include +#include + + +#include "hip/base/config.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The fixed-size block compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +template +void spmv(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Dense *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_csr(const std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Csr *const result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + + +template +void transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + + +template +void calculate_max_nnz_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void calculate_nonzeros_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + Array *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::Fbcsr *to_check, + bool *is_sorted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +void sort_by_column_index(const std::shared_ptr exec, + matrix::Fbcsr *const to_sort) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp index 46fc9db93af..7556602f13b 100644 --- a/include/ginkgo/core/base/exception.hpp +++ b/include/ginkgo/core/base/exception.hpp @@ -421,6 +421,30 @@ class BadDimension : public Error { }; +/** + * Error that denotes issues between block sizes and matrix dimensions + * + * \tparam IndexType Type of index used by the linear algebra object that is + * incompatible with the requried block size. + */ +template +class BlockSizeError : public Error { +public: + /** + * @param file The name of the offending source file + * @param line The source code line number where the error occurred + * @param block_size Size of small dense blocks in a matrix + * @param size The size that is not exactly divided by the block size + */ + BlockSizeError(const std::string &file, const int line, + const int block_size, const IndexType size) + : Error(file, line, + "block size = " + std::to_string(block_size) + + ", size = " + std::to_string(size)) + {} +}; + + /** * ValueMismatch is thrown if two values are not equal. */ diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp index a10f05896fe..7a34a0835a0 100644 --- a/include/ginkgo/core/base/exception_helpers.hpp +++ b/include/ginkgo/core/base/exception_helpers.hpp @@ -567,6 +567,26 @@ inline T ensure_allocated_impl(T ptr, const std::string &file, int line, "semi-colon warnings") +/** + * Ensures that a given size, typically of a linear algebraic object, + * is divisible by a given block size. + * + * @param _size A size of a vector or matrix + * @param _block_size Size of small dense blocks that make up + * the vector or matrix + * + * @throw BlockSizeError if _block_size does not divide _size + */ +#define GKO_ASSERT_BLOCK_SIZE_CONFORMANT(_size, _block_size) \ + if (_size % _block_size != 0) { \ + throw BlockSizeError(__FILE__, __LINE__, _block_size, \ + _size); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + + } // namespace gko diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp new file mode 100644 index 00000000000..38cac854bc5 --- /dev/null +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -0,0 +1,412 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_MATRIX_FBCSR_HPP_ +#define GKO_PUBLIC_CORE_MATRIX_FBCSR_HPP_ + + +#include +#include +#include + + +namespace gko { +namespace matrix { + + +template +class Dense; + +template +class Csr; + +template +class SparsityCsr; + +template +class Fbcsr; + +template +class FbcsrBuilder; + + +namespace detail { + + +/** + * Computes the number of blocks in some array of given size + * + * @param block_size The size of each block + * @param size The total size of some array/vector + * @return The number of blocks, ie., + * quotient of the size divided by the block size. + * + * @throw BlockSizeError when block_size does not divide the total size. + */ +template +inline IndexType get_num_blocks(const int block_size, const IndexType size) +{ + GKO_ASSERT_BLOCK_SIZE_CONFORMANT(size, block_size); + return size / block_size; +} + + +} // namespace detail + + +/** + * @brief Fixed-block compressed sparse row storage matrix format + * + * FBCSR is a matrix format meant for matrices having a natural block structure + * made up of small, dense, disjoint blocks. It is similar to CSR \sa Csr. + * However, unlike Csr, each non-zero location stores a small dense block of + * entries having a constant size. This reduces the number of integers that need + * to be stored in order to refer to a given non-zero entry, and enables + * efficient implementation of certain block methods. + * + * The block size is expected to be known in advance and passed to the + * constructor. + * + * @note The total number of rows and the number of columns are expected to be + * divisible by the block size. + * + * The nonzero elements are stored in a 1D array row-wise, and accompanied + * with a row pointer array which stores the starting index of each block-row. + * An additional block-column index array is used to identify the block-column + * of each nonzero block. + * + * The Fbcsr LinOp supports different operations: + * + * ```cpp + * matrix::Fbcsr *A, *B, *C; // matrices + * matrix::Dense *b, *x; // vectors tall-and-skinny matrices + * matrix::Dense *alpha, *beta; // scalars of dimension 1x1 + * + * // Applying to Dense matrices computes an SpMV/SpMM product + * A->apply(b, x) // x = A*b + * A->apply(alpha, b, beta, x) // x = alpha*A*b + beta*x + * ``` + * + * @tparam ValueType precision of matrix elements + * @tparam IndexType precision of matrix indexes + * + * @ingroup fbcsr + * @ingroup mat_formats + * @ingroup LinOp + */ +template +class Fbcsr : public EnableLinOp>, + public EnableCreateMethod>, + public ConvertibleTo, IndexType>>, + public ConvertibleTo>, + public ConvertibleTo>, + public ConvertibleTo>, + public DiagonalExtractable, + public ReadableFromMatrixData, + public WritableToMatrixData, + public Transposable, + public EnableAbsoluteComputation< + remove_complex>> { + friend class EnableCreateMethod; + friend class EnablePolymorphicObject; + friend class Dense; + friend class SparsityCsr; + friend class FbcsrBuilder; + friend class Fbcsr, IndexType>; + +public: + using value_type = ValueType; + using index_type = IndexType; + using transposed_type = Fbcsr; + using mat_data = matrix_data; + using absolute_type = remove_complex; + + /** + * For moving to another Fbcsr of the same type, use the default + * implementation provided by EnableLinOp via the + * EnablePolymorphicAssignment mixin. + */ + using EnableLinOp>::move_to; + + /** + * For converting (copying) to another Fbcsr of the same type, + * use the default implementation provided by EnableLinOp. + */ + using EnableLinOp>::convert_to; + + friend class Fbcsr, IndexType>; + + void convert_to( + Fbcsr, IndexType> *result) const override; + + void move_to(Fbcsr, IndexType> *result) override; + + void convert_to(Dense *other) const override; + + void move_to(Dense *other) override; + + /** + * Converts the matrix to CSR format + * + * @note Any explicit zeros in the original matrix are retained + * in the converted result. + */ + void convert_to(Csr *result) const override; + + void move_to(Csr *result) override; + + /** + * Get the block sparsity pattern in CSR-like format + * + * @note The actual non-zero values are never copied; + * the result always has a value array of size 1 with the value 1. + */ + void convert_to(SparsityCsr *result) const override; + + void move_to(SparsityCsr *result) override; + + /** + * Reads a @ref matrix_data into Fbcsr format. + * Requires the block size to be set beforehand @sa set_block_size. + * + * @warning Unlike Csr::read, here explicit non-zeros are NOT dropped. + */ + void read(const mat_data &data) override; + + void write(mat_data &data) const override; + + std::unique_ptr transpose() const override; + + std::unique_ptr conj_transpose() const override; + + std::unique_ptr> extract_diagonal() const override; + + std::unique_ptr compute_absolute() const override; + + void compute_absolute_inplace() override; + + /** + * Sorts the values blocks and block-column indices in each row + * by column index + */ + void sort_by_column_index(); + + /** + * Tests if all row entry pairs (value, col_idx) are sorted by column index + * + * @returns True if all row entry pairs (value, col_idx) are sorted by + * column index + */ + bool is_sorted_by_column_index() const; + + /** + * @return The values of the matrix. + */ + value_type *get_values() noexcept { return values_.get_data(); } + + /** + * @copydoc Fbcsr::get_values() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const value_type *get_const_values() const noexcept + { + return values_.get_const_data(); + } + + /** + * @return The column indexes of the matrix. + */ + index_type *get_col_idxs() noexcept { return col_idxs_.get_data(); } + + /** + * @copydoc Fbcsr::get_col_idxs() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const index_type *get_const_col_idxs() const noexcept + { + return col_idxs_.get_const_data(); + } + + /** + * @return The row pointers of the matrix. + */ + index_type *get_row_ptrs() noexcept { return row_ptrs_.get_data(); } + + /** + * @copydoc Fbcsr::get_row_ptrs() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const index_type *get_const_row_ptrs() const noexcept + { + return row_ptrs_.get_const_data(); + } + + /** + * @return The number of elements explicitly stored in the matrix + */ + size_type get_num_stored_elements() const noexcept + { + return values_.get_num_elems(); + } + + /** + * @return The number of non-zero blocks explicitly stored in the matrix + */ + size_type get_num_stored_blocks() const noexcept + { + return col_idxs_.get_num_elems(); + } + + /** + * @return The fixed block size for this matrix + */ + int get_block_size() const noexcept { return bs_; } + + /** + * Set the fixed block size for this matrix + * + * @param block_size The block size + */ + void set_block_size(const int block_size) noexcept { bs_ = block_size; } + + /** + * @return The number of block-rows in the matrix + */ + index_type get_num_block_rows() const noexcept + { + return row_ptrs_.get_num_elems() - 1; + } + + /** + * @return The number of block-columns in the matrix + */ + index_type get_num_block_cols() const noexcept { return nbcols_; } + +protected: + /** + * Creates an uninitialized FBCSR matrix with the given block size. + * + * @param exec Executor associated to the matrix + * @param block_size The desired size of the dense square nonzero blocks; + * defaults to 1. + */ + Fbcsr(std::shared_ptr exec, int block_size = 1) + : Fbcsr(std::move(exec), dim<2>{}, {}, block_size) + {} + + /** + * Creates an uninitialized FBCSR matrix of the specified size. + * + * @param exec Executor associated to the matrix + * @param size size of the matrix + * @param num_nonzeros number of nonzeros + * @param block_size Size of the small dense square blocks + */ + Fbcsr(std::shared_ptr exec, const dim<2> &size, + size_type num_nonzeros, int block_size) + : EnableLinOp(exec, size), + bs_{block_size}, + nbcols_{static_cast( + detail::get_num_blocks(block_size, size[1]))}, + values_(exec, num_nonzeros), + col_idxs_(exec, detail::get_num_blocks(block_size * block_size, + num_nonzeros)), + row_ptrs_(exec, detail::get_num_blocks(block_size, size[0]) + 1) + {} + + /** + * Creates a FBCSR matrix from already allocated (and initialized) row + * pointer, column index and value arrays. + * + * @tparam ValuesArray type of `values` array + * @tparam ColIdxsArray type of `col_idxs` array + * @tparam RowPtrsArray type of `row_ptrs` array + * + * @param exec Executor associated to the matrix + * @param size size of the matrix + * @param block_size Size of the small square dense nonzero blocks + * @param values array of matrix values + * @param col_idxs array of column indexes + * @param row_ptrs array of row pointers + * + * @note If one of `row_ptrs`, `col_idxs` or `values` is not an rvalue, not + * an array of IndexType, IndexType and ValueType, respectively, or + * is on the wrong executor, an internal copy of that array will be + * created, and the original array data will not be used in the + * matrix. + */ + template + Fbcsr(std::shared_ptr exec, const dim<2> &size, + int block_size, ValuesArray &&values, ColIdxsArray &&col_idxs, + RowPtrsArray &&row_ptrs) + : EnableLinOp(exec, size), + bs_{block_size}, + nbcols_{static_cast( + detail::get_num_blocks(block_size, size[1]))}, + values_{exec, std::forward(values)}, + col_idxs_{exec, std::forward(col_idxs)}, + row_ptrs_{exec, std::forward(row_ptrs)} + { + GKO_ASSERT_EQ(values_.get_num_elems(), + col_idxs_.get_num_elems() * bs_ * bs_); + GKO_ASSERT_EQ(this->get_size()[0] / bs_ + 1, row_ptrs_.get_num_elems()); + } + + void apply_impl(const LinOp *b, LinOp *x) const override; + + void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, + LinOp *x) const override; + +private: + int bs_; ///< Block size + index_type nbcols_; ///< Number of block-columns + Array values_; ///< Non-zero values of all blocks + Array col_idxs_; ///< Block-column indices of all blocks + Array row_ptrs_; ///< Block-row pointers into @ref col_idxs_ +}; + + +} // namespace matrix +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_MATRIX_FBCSR_HPP_ diff --git a/include/ginkgo/core/matrix/sparsity_csr.hpp b/include/ginkgo/core/matrix/sparsity_csr.hpp index 087c0538b7d..bce6621dd7d 100644 --- a/include/ginkgo/core/matrix/sparsity_csr.hpp +++ b/include/ginkgo/core/matrix/sparsity_csr.hpp @@ -49,6 +49,10 @@ template class Csr; +template +class Fbcsr; + + /** * SparsityCsr is a matrix format which stores only the sparsity pattern of a * sparse matrix by compressing each row of the matrix (compressed sparse row @@ -77,6 +81,7 @@ class SparsityCsr friend class EnableCreateMethod; friend class EnablePolymorphicObject; friend class Csr; + friend class Fbcsr; public: using EnableLinOp::convert_to; diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 3ae5bcfdd96..a2b62ec8671 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -81,6 +81,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index 80c6262c30c..acfe33d3ef6 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -20,6 +20,7 @@ target_sources(ginkgo_omp matrix/dense_kernels.cpp matrix/diagonal_kernels.cpp matrix/ell_kernels.cpp + matrix/fbcsr_kernels.cpp matrix/hybrid_kernels.cpp matrix/sellp_kernels.cpp matrix/sparsity_csr_kernels.cpp diff --git a/omp/matrix/fbcsr_kernels.cpp b/omp/matrix/fbcsr_kernels.cpp new file mode 100644 index 00000000000..f2747e12e18 --- /dev/null +++ b/omp/matrix/fbcsr_kernels.cpp @@ -0,0 +1,190 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_kernels.hpp" + + +#include +#include +#include + + +#include + + +#include +#include +#include +#include + + +namespace gko { +namespace kernels { +namespace omp { +/** + * @brief The fixed-block compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +template +void spmv(std::shared_ptr exec, + const matrix::Fbcsr *const a, + const matrix::Dense *const b, + matrix::Dense *const c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *const alpha, + const matrix::Fbcsr *const a, + const matrix::Dense *const b, + const matrix::Dense *const beta, + matrix::Dense *const c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Dense *const result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_csr(const std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Csr *const result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + + +template +inline void convert_fbcsr_to_csc( + size_type num_rows, const IndexType *const row_ptrs, + const IndexType *const col_idxs, const ValueType *const fbcsr_vals, + IndexType *const row_idxs, IndexType *const col_ptrs, + ValueType *const csc_vals, UnaryOperator op) GKO_NOT_IMPLEMENTED; + + +template +void transpose_and_transform( + std::shared_ptr exec, + matrix::Fbcsr *const trans, + const matrix::Fbcsr *const orig, + UnaryOperator op) GKO_NOT_IMPLEMENTED; + + +template +void transpose(std::shared_ptr exec, + const matrix::Fbcsr *const orig, + matrix::Fbcsr *const trans) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Fbcsr *const orig, + matrix::Fbcsr *const trans) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + + +template +void calculate_max_nnz_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *const source, + size_type *const result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void calculate_nonzeros_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *const source, + Array *const result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::Fbcsr *const to_check, + bool *const is_sorted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +void sort_by_column_index(const std::shared_ptr exec, + matrix::Fbcsr *const to_sort) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Fbcsr *const orig, + matrix::Diagonal *const diag) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr +} // namespace omp +} // namespace kernels +} // namespace gko diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index 573b555506a..e84453b8664 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -18,6 +18,7 @@ target_sources(ginkgo_reference matrix/dense_kernels.cpp matrix/diagonal_kernels.cpp matrix/ell_kernels.cpp + matrix/fbcsr_kernels.cpp matrix/hybrid_kernels.cpp matrix/sellp_kernels.cpp matrix/sparsity_csr_kernels.cpp diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp new file mode 100644 index 00000000000..35c0bcbbf8b --- /dev/null +++ b/reference/matrix/fbcsr_kernels.cpp @@ -0,0 +1,510 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_kernels.hpp" + + +#include +#include +#include +#include + + +#include +#include +#include +#include + + +#include "accessor/block_col_major.hpp" +#include "accessor/range.hpp" +#include "core/base/allocator.hpp" +#include "core/base/iterator_factory.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/fbcsr_builder.hpp" +#include "reference/components/format_conversion.hpp" + + +namespace gko { +namespace kernels { +namespace reference { +/** + * @brief The fixed-block compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +template +void spmv(const std::shared_ptr, + const matrix::Fbcsr *const a, + const matrix::Dense *const b, + matrix::Dense *const c) +{ + const int bs = a->get_block_size(); + const auto nvecs = static_cast(b->get_size()[1]); + const IndexType nbrows = a->get_num_block_rows(); + const size_type nbnz = a->get_num_stored_blocks(); + auto row_ptrs = a->get_const_row_ptrs(); + auto col_idxs = a->get_const_col_idxs(); + auto vals = a->get_const_values(); + const acc::range> avalues{ + std::array{nbnz, static_cast(bs), + static_cast(bs)}, + vals}; + + for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { + for (IndexType i = ibrow * bs * nvecs; i < (ibrow + 1) * bs * nvecs; + ++i) { + c->get_values()[i] = zero(); + } + for (IndexType inz = row_ptrs[ibrow]; inz < row_ptrs[ibrow + 1]; + ++inz) { + for (int ib = 0; ib < bs; ib++) { + const IndexType row = ibrow * bs + ib; + for (int jb = 0; jb < bs; jb++) { + const auto val = avalues(inz, ib, jb); + const auto col = col_idxs[inz] * bs + jb; + for (size_type j = 0; j < nvecs; ++j) { + c->at(row, j) += val * b->at(col, j); + } + } + } + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + + +template +void advanced_spmv(const std::shared_ptr, + const matrix::Dense *const alpha, + const matrix::Fbcsr *const a, + const matrix::Dense *const b, + const matrix::Dense *const beta, + matrix::Dense *const c) +{ + const int bs = a->get_block_size(); + const auto nvecs = static_cast(b->get_size()[1]); + const IndexType nbrows = a->get_num_block_rows(); + auto row_ptrs = a->get_const_row_ptrs(); + auto col_idxs = a->get_const_col_idxs(); + auto vals = a->get_const_values(); + auto valpha = alpha->at(0, 0); + auto vbeta = beta->at(0, 0); + const acc::range> avalues{ + std::array{a->get_num_stored_blocks(), + static_cast(bs), + static_cast(bs)}, + vals}; + + for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { + for (IndexType i = ibrow * bs * nvecs; i < (ibrow + 1) * bs * nvecs; + ++i) + c->get_values()[i] *= vbeta; + + for (IndexType inz = row_ptrs[ibrow]; inz < row_ptrs[ibrow + 1]; + ++inz) { + for (int ib = 0; ib < bs; ib++) { + const IndexType row = ibrow * bs + ib; + for (int jb = 0; jb < bs; jb++) { + const auto val = avalues(inz, ib, jb); + const auto col = col_idxs[inz] * bs + jb; + for (size_type j = 0; j < nvecs; ++j) + c->at(row, j) += valpha * val * b->at(col, j); + } + } + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + + +template +void convert_to_dense(const std::shared_ptr, + const matrix::Fbcsr *const source, + matrix::Dense *const result) +{ + const int bs = source->get_block_size(); + const IndexType nbrows = source->get_num_block_rows(); + const IndexType nbcols = source->get_num_block_cols(); + const IndexType *const row_ptrs = source->get_const_row_ptrs(); + const IndexType *const col_idxs = source->get_const_col_idxs(); + const ValueType *const vals = source->get_const_values(); + + const acc::range> values{ + std::array{source->get_num_stored_blocks(), + static_cast(bs), + static_cast(bs)}, + vals}; + + for (IndexType brow = 0; brow < nbrows; ++brow) { + for (size_type bcol = 0; bcol < nbcols; ++bcol) { + for (int ib = 0; ib < bs; ib++) { + for (int jb = 0; jb < bs; jb++) { + result->at(brow * bs + ib, bcol * bs + jb) = + zero(); + } + } + } + for (IndexType ibnz = row_ptrs[brow]; ibnz < row_ptrs[brow + 1]; + ++ibnz) { + for (int ib = 0; ib < bs; ib++) { + const IndexType row = brow * bs + ib; + for (int jb = 0; jb < bs; jb++) { + result->at(row, col_idxs[ibnz] * bs + jb) = + values(ibnz, ib, jb); + } + } + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_csr(const std::shared_ptr, + const matrix::Fbcsr *const source, + matrix::Csr *const result) +{ + const int bs = source->get_block_size(); + const IndexType nbrows = source->get_num_block_rows(); + const IndexType nbcols = source->get_num_block_cols(); + const IndexType *const browptrs = source->get_const_row_ptrs(); + const IndexType *const bcolinds = source->get_const_col_idxs(); + const ValueType *const bvals = source->get_const_values(); + + assert(nbrows * bs == result->get_size()[0]); + assert(nbcols * bs == result->get_size()[1]); + assert(source->get_num_stored_elements() == + result->get_num_stored_elements()); + + IndexType *const row_ptrs = result->get_row_ptrs(); + IndexType *const col_idxs = result->get_col_idxs(); + ValueType *const vals = result->get_values(); + + const acc::range> bvalues{ + std::array{source->get_num_stored_blocks(), + static_cast(bs), + static_cast(bs)}, + bvals}; + + for (IndexType brow = 0; brow < nbrows; ++brow) { + const IndexType nz_browstart = browptrs[brow] * bs * bs; + const IndexType numblocks_brow = browptrs[brow + 1] - browptrs[brow]; + for (int ib = 0; ib < bs; ib++) + row_ptrs[brow * bs + ib] = nz_browstart + numblocks_brow * bs * ib; + + for (IndexType ibnz = browptrs[brow]; ibnz < browptrs[brow + 1]; + ++ibnz) { + const IndexType bcol = bcolinds[ibnz]; + + for (int ib = 0; ib < bs; ib++) { + const IndexType row = brow * bs + ib; + const IndexType inz_blockstart = + row_ptrs[row] + (ibnz - browptrs[brow]) * bs; + + for (int jb = 0; jb < bs; jb++) { + const IndexType inz = inz_blockstart + jb; + vals[inz] = bvalues(ibnz, ib, jb); + col_idxs[inz] = bcol * bs + jb; + } + } + } + } + + row_ptrs[source->get_size()[0]] = + static_cast(source->get_num_stored_elements()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + + +template +void convert_fbcsr_to_fbcsc(const IndexType num_blk_rows, const int blksz, + const IndexType *const row_ptrs, + const IndexType *const col_idxs, + const ValueType *const fbcsr_vals, + IndexType *const row_idxs, + IndexType *const col_ptrs, + ValueType *const csc_vals, UnaryOperator op) +{ + const acc::range> rvalues{ + std::array{static_cast(row_ptrs[num_blk_rows]), + static_cast(blksz), + static_cast(blksz)}, + fbcsr_vals}; + const acc::range> cvalues{ + std::array{static_cast(row_ptrs[num_blk_rows]), + static_cast(blksz), + static_cast(blksz)}, + csc_vals}; + for (IndexType brow = 0; brow < num_blk_rows; ++brow) { + for (auto i = row_ptrs[brow]; i < row_ptrs[brow + 1]; ++i) { + const auto dest_idx = col_ptrs[col_idxs[i]]; + col_ptrs[col_idxs[i]]++; + row_idxs[dest_idx] = brow; + for (int ib = 0; ib < blksz; ib++) { + for (int jb = 0; jb < blksz; jb++) { + cvalues(dest_idx, ib, jb) = + op(transpose_blocks ? rvalues(i, jb, ib) + : rvalues(i, ib, jb)); + } + } + } + } +} + + +template +void transpose_and_transform( + matrix::Fbcsr *const trans, + const matrix::Fbcsr *const orig, UnaryOperator op) +{ + const int bs = orig->get_block_size(); + auto trans_row_ptrs = trans->get_row_ptrs(); + auto orig_row_ptrs = orig->get_const_row_ptrs(); + auto trans_col_idxs = trans->get_col_idxs(); + auto orig_col_idxs = orig->get_const_col_idxs(); + auto trans_vals = trans->get_values(); + auto orig_vals = orig->get_const_values(); + + const IndexType nbcols = orig->get_num_block_cols(); + const IndexType nbrows = orig->get_num_block_rows(); + auto orig_nbnz = orig_row_ptrs[nbrows]; + + trans_row_ptrs[0] = 0; + convert_idxs_to_ptrs(orig_col_idxs, orig_nbnz, trans_row_ptrs + 1, nbcols); + + convert_fbcsr_to_fbcsc( + nbrows, bs, orig_row_ptrs, orig_col_idxs, orig_vals, trans_col_idxs, + trans_row_ptrs + 1, trans_vals, op); +} + + +template +void transpose(std::shared_ptr, + const matrix::Fbcsr *const orig, + matrix::Fbcsr *const trans) +{ + transpose_and_transform(trans, orig, [](const ValueType x) { return x; }); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr, + const matrix::Fbcsr *const orig, + matrix::Fbcsr *const trans) +{ + transpose_and_transform(trans, orig, + [](const ValueType x) { return conj(x); }); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + + +template +void calculate_max_nnz_per_row( + std::shared_ptr, + const matrix::Fbcsr *const source, + size_type *const result) +{ + const auto num_rows = source->get_size()[0]; + const auto row_ptrs = source->get_const_row_ptrs(); + const int bs = source->get_block_size(); + IndexType max_nnz = 0; + + for (size_type i = 0; i < num_rows; i++) { + const size_type ibrow = i / bs; + max_nnz = + std::max((row_ptrs[ibrow + 1] - row_ptrs[ibrow]) * bs, max_nnz); + } + + *result = max_nnz; +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void calculate_nonzeros_per_row( + std::shared_ptr, + const matrix::Fbcsr *source, Array *result) +{ + const auto row_ptrs = source->get_const_row_ptrs(); + auto row_nnz_val = result->get_data(); + const int bs = source->get_block_size(); + assert(result->get_num_elems() == source->get_size()[0]); + + for (size_type i = 0; i < result->get_num_elems(); i++) { + const size_type ibrow = i / bs; + row_nnz_val[i] = (row_ptrs[ibrow + 1] - row_ptrs[ibrow]) * bs; + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void is_sorted_by_column_index( + std::shared_ptr, + const matrix::Fbcsr *const to_check, + bool *const is_sorted) +{ + const auto row_ptrs = to_check->get_const_row_ptrs(); + const auto col_idxs = to_check->get_const_col_idxs(); + const size_type nbrows = to_check->get_num_block_rows(); + + for (size_type i = 0; i < nbrows; ++i) { + for (auto idx = row_ptrs[i] + 1; idx < row_ptrs[i + 1]; ++idx) { + if (col_idxs[idx - 1] > col_idxs[idx]) { + *is_sorted = false; + return; + } + } + } + *is_sorted = true; + return; +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +static void sort_by_column_index_impl( + matrix::Fbcsr *const to_sort) +{ + auto row_ptrs = to_sort->get_const_row_ptrs(); + auto col_idxs = to_sort->get_col_idxs(); + auto values = to_sort->get_values(); + const auto nbrows = to_sort->get_num_block_rows(); + constexpr int bs2 = mat_blk_sz * mat_blk_sz; + for (IndexType i = 0; i < nbrows; ++i) { + IndexType *const brow_col_idxs = col_idxs + row_ptrs[i]; + ValueType *const brow_vals = values + row_ptrs[i] * bs2; + const IndexType nbnz_brow = row_ptrs[i + 1] - row_ptrs[i]; + + std::vector col_permute(nbnz_brow); + std::iota(col_permute.begin(), col_permute.end(), 0); + auto helper = detail::IteratorFactory( + brow_col_idxs, col_permute.data(), nbnz_brow); + std::sort(helper.begin(), helper.end()); + + std::vector oldvalues(nbnz_brow * bs2); + std::copy(brow_vals, brow_vals + nbnz_brow * bs2, oldvalues.begin()); + for (IndexType ibz = 0; ibz < nbnz_brow; ibz++) { + for (int i = 0; i < bs2; i++) { + brow_vals[ibz * bs2 + i] = + oldvalues[col_permute[ibz] * bs2 + i]; + } + } + } +} + +template +void sort_by_column_index(const std::shared_ptr exec, + matrix::Fbcsr *const to_sort) +{ + const int bs = to_sort->get_block_size(); + if (bs == 2) { + sort_by_column_index_impl<2>(to_sort); + } else if (bs == 3) { + sort_by_column_index_impl<3>(to_sort); + } else if (bs == 4) { + sort_by_column_index_impl<4>(to_sort); + } else { + GKO_NOT_IMPLEMENTED; + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr, + const matrix::Fbcsr *const orig, + matrix::Diagonal *const diag) +{ + const auto row_ptrs = orig->get_const_row_ptrs(); + const auto col_idxs = orig->get_const_col_idxs(); + const auto values = orig->get_const_values(); + const int bs = orig->get_block_size(); + const IndexType nbrows = orig->get_num_block_rows(); + const IndexType nbdim_min = + std::min(orig->get_num_block_rows(), orig->get_num_block_cols()); + auto diag_values = diag->get_values(); + + assert(diag->get_size()[0] == nbdim_min * bs); + + const acc::range> vblocks{ + std::array{orig->get_num_stored_blocks(), + static_cast(bs), + static_cast(bs)}, + values}; + + for (IndexType ibrow = 0; ibrow < nbdim_min; ++ibrow) { + for (IndexType idx = row_ptrs[ibrow]; idx < row_ptrs[ibrow + 1]; + ++idx) { + if (col_idxs[idx] == ibrow) { + for (int ib = 0; ib < bs; ib++) { + diag_values[ibrow * bs + ib] = vblocks(idx, ib, ib); + } + break; + } + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr +} // namespace reference +} // namespace kernels +} // namespace gko diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt index 81298ba86cd..c826a9eaca7 100644 --- a/reference/test/matrix/CMakeLists.txt +++ b/reference/test/matrix/CMakeLists.txt @@ -3,6 +3,7 @@ ginkgo_create_test(csr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) +ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(hybrid_kernels) ginkgo_create_test(identity) ginkgo_create_test(permutation) diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp new file mode 100644 index 00000000000..96f376fa68c --- /dev/null +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -0,0 +1,774 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/csr_kernels.hpp" +#include "core/matrix/fbcsr_kernels.hpp" +#include "core/test/matrix/fbcsr_sample.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +template +class Fbcsr : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using Mtx = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using Dense = gko::matrix::Dense; + using SparCsr = gko::matrix::SparsityCsr; + using Diag = gko::matrix::Diagonal; + using Vec = gko::matrix::Dense; + + Fbcsr() + : exec(gko::ReferenceExecutor::create()), + fbsample(exec), + fbsample2(exec), + fbsamplesquare(exec), + mtx(fbsample.generate_fbcsr()), + refmtx(fbsample.generate_fbcsr()), + ref2mtx(fbsample2.generate_fbcsr()), + refcsrmtx(fbsample.generate_csr()), + ref2csrmtx(fbsample2.generate_csr()), + refspcmtx(fbsample.generate_sparsity_csr()), + mtx2(fbsample2.generate_fbcsr()), + m2diag(fbsample2.extract_diagonal()), + mtxsq(fbsamplesquare.generate_fbcsr()) + {} + + void assert_equal_to_mtx(const Csr *const m) + { + ASSERT_EQ(m->get_size(), refcsrmtx->get_size()); + ASSERT_EQ(m->get_num_stored_elements(), + refcsrmtx->get_num_stored_elements()); + for (index_type i = 0; i < m->get_size()[0] + 1; i++) + ASSERT_EQ(m->get_const_row_ptrs()[i], + refcsrmtx->get_const_row_ptrs()[i]); + for (index_type i = 0; i < m->get_num_stored_elements(); i++) { + ASSERT_EQ(m->get_const_col_idxs()[i], + refcsrmtx->get_const_col_idxs()[i]); + ASSERT_EQ(m->get_const_values()[i], + refcsrmtx->get_const_values()[i]); + } + } + + void assert_equal_to_mtx(const SparCsr *m) + { + ASSERT_EQ(m->get_size(), refspcmtx->get_size()); + ASSERT_EQ(m->get_num_nonzeros(), refspcmtx->get_num_nonzeros()); + for (index_type i = 0; i < m->get_size()[0] + 1; i++) + ASSERT_EQ(m->get_const_row_ptrs()[i], + refspcmtx->get_const_row_ptrs()[i]); + for (index_type i = 0; i < m->get_num_nonzeros(); i++) { + ASSERT_EQ(m->get_const_col_idxs()[i], + refspcmtx->get_const_col_idxs()[i]); + } + } + + std::shared_ptr exec; + const gko::testing::FbcsrSample fbsample; + const gko::testing::FbcsrSample2 fbsample2; + const gko::testing::FbcsrSampleSquare + fbsamplesquare; + std::unique_ptr mtx; + const std::unique_ptr refmtx; + const std::unique_ptr ref2mtx; + const std::unique_ptr refcsrmtx; + const std::unique_ptr ref2csrmtx; + const std::unique_ptr refdenmtx; + const std::unique_ptr refspcmtx; + const std::unique_ptr mtx2; + const std::unique_ptr m2diag; + const std::unique_ptr mtxsq; +}; + +TYPED_TEST_SUITE(Fbcsr, gko::test::ValueIndexTypes); + + +template +constexpr typename std::enable_if_t::value, T> +get_some_number() +{ + return static_cast(1.2); +} + +template +constexpr typename std::enable_if_t::value, T> +get_some_number() +{ + using RT = gko::remove_complex; + return {static_cast(1.2), static_cast(3.4)}; +} + + +TYPED_TEST(Fbcsr, AppliesToDenseVector) +{ + using Vec = typename TestFixture::Vec; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + const index_type nrows = this->mtx2->get_size()[0]; + const index_type ncols = this->mtx2->get_size()[1]; + auto x = Vec::create(this->exec, gko::dim<2>{(gko::size_type)ncols, 1}); + T *const xvals = x->get_values(); + for (index_type i = 0; i < ncols; i++) { + xvals[i] = std::sin(static_cast(static_cast((i + 1) ^ 2))); + } + auto y = Vec::create(this->exec, gko::dim<2>{(gko::size_type)nrows, 1}); + auto yref = Vec::create(this->exec, gko::dim<2>{(gko::size_type)nrows, 1}); + using Csr = typename TestFixture::Csr; + auto csr_mtx = Csr::create(this->mtx->get_executor(), + std::make_shared()); + this->mtx2->convert_to(csr_mtx.get()); + + this->mtx2->apply(x.get(), y.get()); + csr_mtx->apply(x.get(), yref.get()); + + const double tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(y, yref, tolerance); +} + + +TYPED_TEST(Fbcsr, AppliesToDenseMatrix) +{ + using Vec = typename TestFixture::Vec; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + const gko::size_type nrows = this->mtx2->get_size()[0]; + const gko::size_type ncols = this->mtx2->get_size()[1]; + const gko::size_type nvecs = 3; + auto x = Vec::create(this->exec, gko::dim<2>{ncols, nvecs}); + for (index_type i = 0; i < ncols; i++) { + for (index_type j = 0; j < nvecs; j++) { + x->at(i, j) = (static_cast(3.0 * i) + get_some_number()) / + static_cast(j + 1.0); + } + } + auto y = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); + auto yref = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); + + this->mtx2->apply(x.get(), y.get()); + this->ref2csrmtx->apply(x.get(), yref.get()); + + const double tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(y, yref, tolerance); +} + + +TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseVector) +{ + using Vec = typename TestFixture::Vec; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + const T alphav = -1.0; + const T betav = 2.0; + auto alpha = gko::initialize({alphav}, this->exec); + auto beta = gko::initialize({betav}, this->exec); + const gko::size_type nrows = this->mtx2->get_size()[0]; + const gko::size_type ncols = this->mtx2->get_size()[1]; + auto x = Vec::create(this->exec, gko::dim<2>{ncols, 1}); + auto y = Vec::create(this->exec, gko::dim<2>{nrows, 1}); + for (index_type i = 0; i < ncols; i++) { + x->at(i, 0) = (i + 1.0) * (i + 1.0); + } + for (index_type i = 0; i < nrows; i++) { + y->at(i, 0) = static_cast(std::sin(2 * 3.14 * (i + 0.1) / nrows)) + + get_some_number(); + } + auto yref = y->clone(); + + this->mtx2->apply(alpha.get(), x.get(), beta.get(), y.get()); + this->ref2csrmtx->apply(alpha.get(), x.get(), beta.get(), yref.get()); + + const double tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(y, yref, tolerance); +} + + +TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseMatrix) +{ + using Vec = typename TestFixture::Vec; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + const T alphav = -1.0; + const T betav = 2.0; + auto alpha = gko::initialize({alphav}, this->exec); + auto beta = gko::initialize({betav}, this->exec); + const gko::size_type nrows = this->mtx2->get_size()[0]; + const gko::size_type ncols = this->mtx2->get_size()[1]; + const gko::size_type nvecs = 3; + auto x = Vec::create(this->exec, gko::dim<2>{ncols, nvecs}); + auto y = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); + for (index_type i = 0; i < ncols; i++) { + for (index_type j = 0; j < nvecs; j++) { + x->at(i, j) = + std::log(static_cast(0.1 + static_cast((i + 1) ^ 2))); + } + } + for (index_type i = 0; i < nrows; i++) { + for (index_type j = 0; j < nvecs; j++) { + y->at(i, j) = + static_cast(std::sin(2 * 3.14 * (i + j + 0.1) / nrows)) + + get_some_number(); + } + } + auto yref = y->clone(); + + this->mtx2->apply(alpha.get(), x.get(), beta.get(), y.get()); + this->ref2csrmtx->apply(alpha.get(), x.get(), beta.get(), yref.get()); + + const double tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(y, yref, tolerance); +} + + +TYPED_TEST(Fbcsr, ApplyFailsOnWrongInnerDimension) +{ + using Vec = typename TestFixture::Vec; + auto x = Vec::create(this->exec, gko::dim<2>{2}); + auto y = Vec::create(this->exec, gko::dim<2>{this->fbsample.nrows}); + + ASSERT_THROW(this->mtx->apply(x.get(), y.get()), gko::DimensionMismatch); +} + + +TYPED_TEST(Fbcsr, ApplyFailsOnWrongNumberOfRows) +{ + using Vec = typename TestFixture::Vec; + auto x = Vec::create(this->exec, gko::dim<2>{this->fbsample.ncols, 2}); + auto y = Vec::create(this->exec, gko::dim<2>{3, 2}); + + ASSERT_THROW(this->mtx->apply(x.get(), y.get()), gko::DimensionMismatch); +} + + +TYPED_TEST(Fbcsr, ApplyFailsOnWrongNumberOfCols) +{ + using Vec = typename TestFixture::Vec; + auto x = Vec::create(this->exec, gko::dim<2>{this->fbsample.ncols, 3}); + auto y = Vec::create(this->exec, gko::dim<2>{this->fbsample.nrows, 2}); + + ASSERT_THROW(this->mtx->apply(x.get(), y.get()), gko::DimensionMismatch); +} + + +TYPED_TEST(Fbcsr, ConvertsToPrecision) +{ + using ValueType = typename TestFixture::value_type; + using IndexType = typename TestFixture::index_type; + using OtherType = typename gko::next_precision; + using Fbcsr = typename TestFixture::Mtx; + using OtherFbcsr = gko::matrix::Fbcsr; + auto tmp = OtherFbcsr::create(this->exec); + auto res = Fbcsr::create(this->exec); + // If OtherType is more precise: 0, otherwise r + auto residual = r::value < r::value + ? gko::remove_complex{0} + : gko::remove_complex{r::value}; + + this->mtx->convert_to(tmp.get()); + tmp->convert_to(res.get()); + + GKO_ASSERT_MTX_NEAR(this->mtx, res, residual); +} + + +TYPED_TEST(Fbcsr, MovesToPrecision) +{ + using ValueType = typename TestFixture::value_type; + using IndexType = typename TestFixture::index_type; + using OtherType = typename gko::next_precision; + using Fbcsr = typename TestFixture::Mtx; + using OtherFbcsr = gko::matrix::Fbcsr; + auto tmp = OtherFbcsr::create(this->exec); + auto res = Fbcsr::create(this->exec); + // If OtherType is more precise: 0, otherwise r + auto residual = r::value < r::value + ? gko::remove_complex{0} + : gko::remove_complex{r::value}; + + this->mtx->move_to(tmp.get()); + tmp->move_to(res.get()); + + GKO_ASSERT_MTX_NEAR(this->mtx, res, residual); +} + + +TYPED_TEST(Fbcsr, ConvertsToDense) +{ + using Dense = typename TestFixture::Dense; + auto dense_mtx = Dense::create(this->mtx->get_executor()); + + this->mtx->convert_to(dense_mtx.get()); + + auto refdenmtx = Dense::create(this->mtx->get_executor()); + this->refcsrmtx->convert_to(refdenmtx.get()); + GKO_ASSERT_MTX_NEAR(dense_mtx, refdenmtx, 0.0); +} + + +TYPED_TEST(Fbcsr, MovesToDense) +{ + using Dense = typename TestFixture::Dense; + auto dense_mtx = Dense::create(this->mtx->get_executor()); + + this->mtx->move_to(dense_mtx.get()); + + auto refdenmtx = Dense::create(this->mtx->get_executor()); + this->refcsrmtx->convert_to(refdenmtx.get()); + GKO_ASSERT_MTX_NEAR(dense_mtx, refdenmtx, 0.0); +} + + +TYPED_TEST(Fbcsr, ConvertsToCsr) +{ + using Csr = typename TestFixture::Csr; + auto csr_mtx = Csr::create(this->mtx->get_executor(), + std::make_shared()); + this->mtx->convert_to(csr_mtx.get()); + this->assert_equal_to_mtx(csr_mtx.get()); + + auto csr_mtx_2 = Csr::create(this->mtx->get_executor(), + std::make_shared()); + this->ref2mtx->convert_to(csr_mtx_2.get()); + GKO_ASSERT_MTX_NEAR(csr_mtx_2, this->ref2csrmtx, 0.0); +} + + +TYPED_TEST(Fbcsr, MovesToCsr) +{ + using Csr = typename TestFixture::Csr; + auto csr_mtx = Csr::create(this->mtx->get_executor(), + std::make_shared()); + + this->mtx->move_to(csr_mtx.get()); + + this->assert_equal_to_mtx(csr_mtx.get()); +} + + +TYPED_TEST(Fbcsr, ConvertsToSparsityCsr) +{ + using SparsityCsr = typename TestFixture::SparCsr; + auto sparsity_mtx = SparsityCsr::create(this->mtx->get_executor()); + + this->mtx->convert_to(sparsity_mtx.get()); + + this->assert_equal_to_mtx(sparsity_mtx.get()); +} + + +TYPED_TEST(Fbcsr, MovesToSparsityCsr) +{ + using SparsityCsr = typename TestFixture::SparCsr; + using Fbcsr = typename TestFixture::Mtx; + auto sparsity_mtx = SparsityCsr::create(this->mtx->get_executor()); + + this->mtx->move_to(sparsity_mtx.get()); + + this->assert_equal_to_mtx(sparsity_mtx.get()); +} + + +TYPED_TEST(Fbcsr, ConvertsEmptyToPrecision) +{ + using ValueType = typename TestFixture::value_type; + using IndexType = typename TestFixture::index_type; + using OtherType = typename gko::next_precision; + using Fbcsr = typename TestFixture::Mtx; + using OtherFbcsr = gko::matrix::Fbcsr; + auto empty = OtherFbcsr::create(this->exec); + empty->get_row_ptrs()[0] = 0; + auto res = Fbcsr::create(this->exec); + + empty->convert_to(res.get()); + + ASSERT_EQ(res->get_num_stored_elements(), 0); + ASSERT_EQ(*res->get_const_row_ptrs(), 0); + ASSERT_FALSE(res->get_size()); +} + + +TYPED_TEST(Fbcsr, MovesEmptyToPrecision) +{ + using ValueType = typename TestFixture::value_type; + using IndexType = typename TestFixture::index_type; + using OtherType = typename gko::next_precision; + using Fbcsr = typename TestFixture::Mtx; + using OtherFbcsr = gko::matrix::Fbcsr; + auto empty = OtherFbcsr::create(this->exec); + empty->get_row_ptrs()[0] = 0; + auto res = Fbcsr::create(this->exec); + + empty->move_to(res.get()); + + ASSERT_EQ(res->get_num_stored_elements(), 0); + ASSERT_EQ(*res->get_const_row_ptrs(), 0); + ASSERT_FALSE(res->get_size()); +} + + +TYPED_TEST(Fbcsr, ConvertsEmptyToDense) +{ + using ValueType = typename TestFixture::value_type; + using Fbcsr = typename TestFixture::Mtx; + using Dense = typename TestFixture::Dense; + auto empty = Fbcsr::create(this->exec); + auto res = Dense::create(this->exec); + + empty->convert_to(res.get()); + + ASSERT_FALSE(res->get_size()); +} + + +TYPED_TEST(Fbcsr, MovesEmptyToDense) +{ + using ValueType = typename TestFixture::value_type; + using Fbcsr = typename TestFixture::Mtx; + using Dense = typename TestFixture::Dense; + auto empty = Fbcsr::create(this->exec); + auto res = Dense::create(this->exec); + + empty->move_to(res.get()); + + ASSERT_FALSE(res->get_size()); +} + + +TYPED_TEST(Fbcsr, ConvertsEmptyToSparsityCsr) +{ + using ValueType = typename TestFixture::value_type; + using IndexType = typename TestFixture::index_type; + using Fbcsr = typename TestFixture::Mtx; + using SparCsr = typename TestFixture::SparCsr; + auto empty = Fbcsr::create(this->exec); + empty->get_row_ptrs()[0] = 0; + auto res = SparCsr::create(this->exec); + + empty->convert_to(res.get()); + + ASSERT_EQ(res->get_num_nonzeros(), 0); + ASSERT_EQ(*res->get_const_row_ptrs(), 0); +} + + +TYPED_TEST(Fbcsr, MovesEmptyToSparsityCsr) +{ + using ValueType = typename TestFixture::value_type; + using IndexType = typename TestFixture::index_type; + using Fbcsr = typename TestFixture::Mtx; + using SparCsr = typename TestFixture::SparCsr; + auto empty = Fbcsr::create(this->exec); + empty->get_row_ptrs()[0] = 0; + auto res = SparCsr::create(this->exec); + + empty->move_to(res.get()); + + ASSERT_EQ(res->get_num_nonzeros(), 0); + ASSERT_EQ(*res->get_const_row_ptrs(), 0); +} + + +TYPED_TEST(Fbcsr, CalculatesNonzerosPerRow) +{ + using IndexType = typename TestFixture::index_type; + gko::Array row_nnz(this->exec, this->mtx2->get_size()[0]); + + gko::kernels::reference::fbcsr::calculate_nonzeros_per_row( + this->exec, this->mtx2.get(), &row_nnz); + gko::Array refrnnz(this->exec, this->mtx2->get_size()[0]); + gko::kernels::reference::csr ::calculate_nonzeros_per_row( + this->exec, this->ref2csrmtx.get(), &refrnnz); + + ASSERT_EQ(row_nnz.get_num_elems(), refrnnz.get_num_elems()); + auto row_nnz_val = row_nnz.get_data(); + for (gko::size_type i = 0; i < this->mtx2->get_size()[0]; i++) + ASSERT_EQ(row_nnz_val[i], refrnnz.get_const_data()[i]); +} + + +TYPED_TEST(Fbcsr, CalculatesMaxNnzPerRow) +{ + using IndexType = typename TestFixture::index_type; + gko::size_type max_row_nnz{}; + + gko::kernels::reference::fbcsr::calculate_max_nnz_per_row( + this->exec, this->mtx2.get(), &max_row_nnz); + gko::size_type ref_max_row_nnz{}; + gko::kernels::reference::csr::calculate_max_nnz_per_row( + this->exec, this->ref2csrmtx.get(), &ref_max_row_nnz); + + ASSERT_EQ(max_row_nnz, ref_max_row_nnz); +} + + +TYPED_TEST(Fbcsr, SquareMtxIsTransposable) +{ + using Fbcsr = typename TestFixture::Mtx; + using Csr = typename TestFixture::Csr; + auto csrmtxsq = + Csr::create(this->exec, std::make_shared()); + this->mtxsq->convert_to(csrmtxsq.get()); + + std::unique_ptr reftmtx = csrmtxsq->transpose(); + auto reftmtx_as_csr = static_cast(reftmtx.get()); + auto trans = this->mtxsq->transpose(); + auto trans_as_fbcsr = static_cast(trans.get()); + + GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftmtx_as_csr, 0.0); +} + + +TYPED_TEST(Fbcsr, NonSquareMtxIsTransposable) +{ + using Fbcsr = typename TestFixture::Mtx; + using Csr = typename TestFixture::Csr; + auto csrmtx = + Csr::create(this->exec, std::make_shared()); + this->mtx2->convert_to(csrmtx.get()); + + std::unique_ptr reftmtx = csrmtx->transpose(); + auto reftmtx_as_csr = static_cast(reftmtx.get()); + auto trans = this->mtx2->transpose(); + auto trans_as_fbcsr = static_cast(trans.get()); + + GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftmtx_as_csr, 0.0); +} + + +TYPED_TEST(Fbcsr, RecognizeSortedMatrix) +{ + ASSERT_TRUE(this->mtx->is_sorted_by_column_index()); +} + + +TYPED_TEST(Fbcsr, RecognizeUnsortedMatrix) +{ + using Fbcsr = typename TestFixture::Mtx; + using index_type = typename TestFixture::index_type; + auto cpmat = this->mtx->clone(); + index_type *const colinds = cpmat->get_col_idxs(); + std::swap(colinds[0], colinds[1]); + + ASSERT_FALSE(cpmat->is_sorted_by_column_index()); +} + + +TYPED_TEST(Fbcsr, SortUnsortedMatrix) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using Mtx = typename TestFixture::Mtx; + const gko::testing::FbcsrSampleUnsorted fbsample( + this->exec); + auto unsrt_mtx = fbsample.generate_fbcsr(); + auto srt_mtx = unsrt_mtx->clone(); + + srt_mtx->sort_by_column_index(); + + GKO_ASSERT_MTX_NEAR(unsrt_mtx, srt_mtx, 0.0); + ASSERT_TRUE(srt_mtx->is_sorted_by_column_index()); +} + + +TYPED_TEST(Fbcsr, ExtractsDiagonal) +{ + using T = typename TestFixture::value_type; + auto matrix = this->mtx2->clone(); + + auto diag = matrix->extract_diagonal(); + + ASSERT_EQ(this->m2diag->get_size(), diag->get_size()); + for (gko::size_type i = 0; i < this->m2diag->get_size()[0]; i++) { + ASSERT_EQ(this->m2diag->get_const_values()[i], + diag->get_const_values()[i]); + } +} + + +TYPED_TEST(Fbcsr, InplaceAbsolute) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using Mtx = typename TestFixture::Mtx; + using Csr = typename TestFixture::Csr; + auto mtx = this->fbsample2.generate_fbcsr(); + const std::unique_ptr refabs = this->ref2csrmtx->clone(); + refabs->compute_absolute_inplace(); + + mtx->compute_absolute_inplace(); + + const gko::remove_complex tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(mtx, refabs, tolerance); +} + + +TYPED_TEST(Fbcsr, OutplaceAbsolute) +{ + using value_type = typename TestFixture::value_type; + using Mtx = typename TestFixture::Mtx; + using AbsCsr = typename gko::remove_complex; + using AbsMtx = typename gko::remove_complex; + auto mtx = this->fbsample2.generate_fbcsr(); + + const std::unique_ptr refabs = + this->ref2csrmtx->compute_absolute(); + auto abs_mtx = mtx->compute_absolute(); + + const gko::remove_complex tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(abs_mtx, refabs, tolerance); +} + + +template +class FbcsrComplex : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using Mtx = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; +}; + +TYPED_TEST_SUITE(FbcsrComplex, gko::test::ComplexValueIndexTypes); + + +TYPED_TEST(FbcsrComplex, ConvertsComplexToCsr) +{ + using Csr = typename TestFixture::Csr; + using Fbcsr = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto exec = gko::ReferenceExecutor::create(); + gko::testing::FbcsrSampleComplex csample(exec); + std::unique_ptr mtx = csample.generate_fbcsr(); + auto csr_mtx = + Csr::create(exec, std::make_shared()); + + mtx->convert_to(csr_mtx.get()); + + GKO_ASSERT_MTX_NEAR(csr_mtx, mtx, 0.0); +} + + +TYPED_TEST(FbcsrComplex, MtxIsConjugateTransposable) +{ + using Csr = typename TestFixture::Csr; + using Fbcsr = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto exec = gko::ReferenceExecutor::create(); + gko::testing::FbcsrSampleComplex csample(exec); + auto csrmtx = csample.generate_csr(); + auto mtx = csample.generate_fbcsr(); + + auto reftranslinop = csrmtx->conj_transpose(); + auto reftrans = static_cast(reftranslinop.get()); + auto trans = mtx->conj_transpose(); + auto trans_as_fbcsr = static_cast(trans.get()); + + GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftrans, 0.0); +} + + +TYPED_TEST(FbcsrComplex, InplaceAbsolute) +{ + using Csr = typename TestFixture::Csr; + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + gko::testing::FbcsrSample fbsample( + gko::ReferenceExecutor::create()); + auto mtx = fbsample.generate_fbcsr(); + auto csrmtx = fbsample.generate_csr(); + + mtx->compute_absolute_inplace(); + csrmtx->compute_absolute_inplace(); + + const gko::remove_complex tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(mtx, csrmtx, tolerance); +} + + +TYPED_TEST(FbcsrComplex, OutplaceAbsolute) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using AbsMtx = typename gko::remove_complex; + gko::testing::FbcsrSample fbsample( + gko::ReferenceExecutor::create()); + auto mtx = fbsample.generate_fbcsr(); + auto csrmtx = fbsample.generate_csr(); + + auto abs_mtx = mtx->compute_absolute(); + auto refabs = mtx->compute_absolute(); + + const gko::remove_complex tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(abs_mtx, refabs, tolerance); +} + + +} // namespace