From 4d2179395d0b7a1d1335717e5b5145a6871a140a Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Fri, 6 Nov 2020 13:04:22 +0100 Subject: [PATCH 01/58] [tests fail] copied files for fixed block size BCSR format, core tests pass --- core/CMakeLists.txt | 2 + core/components/fixed_block.hpp | 233 +++ core/device_hooks/common_kernels.inc.cpp | 138 ++ core/matrix/fbcsr.cpp | 698 ++++++++ core/matrix/fbcsr_builder.hpp | 94 + core/matrix/fbcsr_kernels.hpp | 275 +++ core/test/matrix/CMakeLists.txt | 2 + core/test/matrix/fbcsr.cpp | 378 ++++ core/test/matrix/fbcsr_builder.cpp | 120 ++ cuda/CMakeLists.txt | 1 + cuda/matrix/fbcsr_kernels.cu | 1446 +++++++++++++++ cuda/test/matrix/CMakeLists.txt | 1 + cuda/test/matrix/fbcsr_kernels.cpp | 883 ++++++++++ hip/CMakeLists.txt | 1 + hip/matrix/fbcsr_kernels.hip.cpp | 1263 ++++++++++++++ hip/test/matrix/CMakeLists.txt | 1 + hip/test/matrix/fbcsr_kernels.hip.cpp | 866 +++++++++ include/ginkgo/core/matrix/fbcsr.hpp | 569 ++++++ .../ginkgo/core/matrix/matrix_strategies.hpp | 503 ++++++ include/ginkgo/core/matrix/sparsity_csr.hpp | 4 + include/ginkgo/ginkgo.hpp | 2 + omp/CMakeLists.txt | 1 + omp/components/fbcsr_spgeam.hpp | 31 + omp/matrix/fbcsr_kernels.cpp | 884 ++++++++++ omp/test/matrix/CMakeLists.txt | 1 + omp/test/matrix/fbcsr_kernels.cpp | 662 +++++++ reference/CMakeLists.txt | 1 + reference/components/fbcsr_spgeam.hpp | 31 + reference/matrix/fbcsr_kernels.cpp | 968 ++++++++++ reference/test/matrix/CMakeLists.txt | 1 + reference/test/matrix/fbcsr_kernels.cpp | 1550 +++++++++++++++++ 31 files changed, 11610 insertions(+) create mode 100644 core/components/fixed_block.hpp create mode 100644 core/matrix/fbcsr.cpp create mode 100644 core/matrix/fbcsr_builder.hpp create mode 100644 core/matrix/fbcsr_kernels.hpp create mode 100644 core/test/matrix/fbcsr.cpp create mode 100644 core/test/matrix/fbcsr_builder.cpp create mode 100644 cuda/matrix/fbcsr_kernels.cu create mode 100644 cuda/test/matrix/fbcsr_kernels.cpp create mode 100644 hip/matrix/fbcsr_kernels.hip.cpp create mode 100644 hip/test/matrix/fbcsr_kernels.hip.cpp create mode 100644 include/ginkgo/core/matrix/fbcsr.hpp create mode 100644 include/ginkgo/core/matrix/matrix_strategies.hpp create mode 100644 omp/components/fbcsr_spgeam.hpp create mode 100644 omp/matrix/fbcsr_kernels.cpp create mode 100644 omp/test/matrix/fbcsr_kernels.cpp create mode 100644 reference/components/fbcsr_spgeam.hpp create mode 100644 reference/matrix/fbcsr_kernels.cpp create mode 100644 reference/test/matrix/fbcsr_kernels.cpp diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 754727381da..4e1a7086148 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -22,9 +22,11 @@ target_sources(ginkgo log/stream.cpp matrix/coo.cpp matrix/csr.cpp + matrix/fbcsr.cpp matrix/dense.cpp matrix/diagonal.cpp matrix/ell.cpp + matrix/fbcsr.cpp matrix/hybrid.cpp matrix/identity.cpp matrix/permutation.cpp diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp new file mode 100644 index 00000000000..526cdb25909 --- /dev/null +++ b/core/components/fixed_block.hpp @@ -0,0 +1,233 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_COMPONENTS_FIXED_BLOCK_HPP_ +#define GKO_CORE_COMPONENTS_FIXED_BLOCK_HPP_ + + +#include +#include + +#include + +namespace gko { +namespace blockutils { + + +/// Error that denotes issues between block sizes and matrix dimensions +template +class BlockSizeError : public Error { +public: + BlockSizeError(const std::string &file, const int line, + const int block_size, const IndexType size) + : Error(file, line, + " block size = " + std::to_string(block_size) + + ", size = " + std::to_string(size)) + {} +}; + +/// Error that denotes issues between block sizes and matrix dimensions +template +class BlockReadError : public Error { +public: + BlockReadError(const std::string &file, const int line, + const std::string &msg) + : Error(file, line, msg) + {} +}; + +template +int getNumFixedBlocks(const int block_size, const IndexType size) +{ + if (size % block_size != 0) + throw BlockSizeError(__FILE__, __LINE__, block_size, size); + return size / block_size; +} + +/// A dense block of values with compile-time constant dimensions +/** The blocks are stored row-major. However, in future, + * a layout template parameter can be added if needed. + * + * The primary use is to reinterpret subsets of entries in a big array as + * small dense blocks. + */ +template +class FixedBlock final { + static_assert(nrows > 0, "Requires positive number of rows!"); + static_assert(ncols > 0, "Requires positive number of columns!"); + +public: + using value_type = ValueType; + + value_type &at(const int row, const int col) + { + return vals[row * ncols + col]; + } + + const value_type &at(const int row, const int col) const + { + return vals[row * ncols + col]; + } + + value_type &operator()(const int row, const int col) + { + return at(row, col); + } + + const value_type &operator()(const int row, const int col) const + { + return at(row, col); + } + +private: + ValueType vals[nrows * ncols]; +}; + + +/// Two-dimensional square block +// template +// using FixedBlock = FixedRectangularBlock; + +/// Fixed-size column vector +// template +// using FixedSegment = FixedRectangularBlock; + +/// A lightweight dynamic block type for the host space +template +class DenseBlock final { +public: + using value_type = ValueType; + + DenseBlock() : nrows_{0}, ncols_{0}, vals_{nullptr} {} + + DenseBlock(const int num_rows, const int num_cols) + : nrows_{num_rows}, + ncols_{num_cols}, + vals_{new value_type[num_rows * num_cols]} + {} + + ~DenseBlock() { delete[] vals_; } + + value_type &at(const int row, const int col) + { + return vals_[row * ncols_ + col]; + } + + const value_type &at(const int row, const int col) const + { + return vals_[row * ncols_ + col]; + } + + value_type &operator()(const int row, const int col) + { + return at(row, col); + } + + const value_type &operator()(const int row, const int col) const + { + return at(row, col); + } + + int size() const { return nrows_ * ncols_; } + + void resize(const int nrows, const int ncols) + { + nrows_ = nrows; + ncols_ = ncols; + delete[] vals_; + vals_ = new value_type[nrows_ * ncols_]; + } + + void zero() + { + for (int i = 0; i < nrows_ * ncols_; i++) + // vals_[i] = gko::zero(); + vals_[i] = static_cast(0); + } + +private: + int nrows_; + int ncols_; + value_type *vals_; +}; + +/// A view into a an array of dense of dense blocks of some runtime-defined size +template +class DenseBlocksView final { +public: + using value_type = ValueType; + using index_type = IndexType; + + /** + * @param buffer Segment of memory to be interpreted as an array of 2D + * blocks + * @param num_rows Number of rows in each block + * @param num_cols Number of columns in each block + */ + DenseBlocksView(ValueType *const buffer, const int num_rows, + const int num_cols) + : nrows_{num_rows}, ncols_{num_cols}, vals_{buffer} + {} + + value_type &at(const index_type block, const int row, const int col) + { + return vals_[block * nrows_ * ncols_ + row * ncols_ + col]; + } + + const typename std::remove_const::type &at( + const index_type block, const int row, const int col) const + { + return vals_[block * nrows_ * ncols_ + row * ncols_ + col]; + } + + value_type &operator()(const index_type block, const int row, const int col) + { + return at(block, row, col); + } + + const typename std::remove_const::type &operator()( + const index_type block, const int row, const int col) const + { + return at(block, row, col); + } + +private: + int nrows_; ///< Number of rows in each block + int ncols_; ///< Number of columns in each block + value_type *vals_; +}; + + +} // namespace blockutils +} // namespace gko + +#endif diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index eceae46dd8b..43f4d90d8c0 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -49,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/dense_kernels.hpp" #include "core/matrix/diagonal_kernels.hpp" #include "core/matrix/ell_kernels.hpp" +#include "core/matrix/fbcsr_kernels.hpp" #include "core/matrix/hybrid_kernels.hpp" #include "core/matrix/sellp_kernels.hpp" #include "core/matrix/sparsity_csr_kernels.hpp" @@ -803,6 +804,143 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); } // namespace csr +// TODO (script:fbcsr): adapt this block as needed +namespace fbcsr { + + +template +GKO_DECLARE_FBCSR_SPMV_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + +template +GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + +template +GKO_DECLARE_FBCSR_SPGEMM_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEMM_KERNEL); + +template +GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL); + +template +GKO_DECLARE_FBCSR_SPGEAM_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEAM_KERNEL); + +template +GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + +template +GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL); + +template +GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL); + +template +GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL); + +template +GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL); + +template +GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL); + +template +GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + +template +GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + +template +GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); + +template +GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL); + +template +GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL); + +template +GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL); + +template +GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + +template +GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + +template +GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + +template +GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + +template +GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr + + namespace coo { diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp new file mode 100644 index 00000000000..e362b3b466f --- /dev/null +++ b/core/matrix/fbcsr.cpp @@ -0,0 +1,698 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/components/absolute_array.hpp" +#include "core/components/fill_array.hpp" +#include "core/components/fixed_block.hpp" +#include "core/matrix/fbcsr_kernels.hpp" + + +namespace gko { +namespace matrix { +namespace fbcsr { + + +GKO_REGISTER_OPERATION(spmv, fbcsr::spmv); +// GKO_REGISTER_OPERATION(advanced_spmv, fbcsr::advanced_spmv); +GKO_REGISTER_OPERATION(spgemm, fbcsr::spgemm); +// GKO_REGISTER_OPERATION(advanced_spgemm, fbcsr::advanced_spgemm); +GKO_REGISTER_OPERATION(spgeam, fbcsr::spgeam); +GKO_REGISTER_OPERATION(convert_to_coo, fbcsr::convert_to_coo); +GKO_REGISTER_OPERATION(convert_to_dense, fbcsr::convert_to_dense); +// GKO_REGISTER_OPERATION(convert_to_sellp, fbcsr::convert_to_sellp); +GKO_REGISTER_OPERATION(calculate_total_cols, fbcsr::calculate_total_cols); +// GKO_REGISTER_OPERATION(convert_to_ell, fbcsr::convert_to_ell); +// GKO_REGISTER_OPERATION(convert_to_hybrid, fbcsr::convert_to_hybrid); +GKO_REGISTER_OPERATION(transpose, fbcsr::transpose); +GKO_REGISTER_OPERATION(conj_transpose, fbcsr::conj_transpose); +GKO_REGISTER_OPERATION(row_permute, fbcsr::row_permute); +GKO_REGISTER_OPERATION(column_permute, fbcsr::column_permute); +GKO_REGISTER_OPERATION(inverse_row_permute, fbcsr::inverse_row_permute); +GKO_REGISTER_OPERATION(inverse_column_permute, fbcsr::inverse_column_permute); +GKO_REGISTER_OPERATION(calculate_max_nnz_per_row, + fbcsr::calculate_max_nnz_per_row); +GKO_REGISTER_OPERATION(calculate_nonzeros_per_row, + fbcsr::calculate_nonzeros_per_row); +GKO_REGISTER_OPERATION(sort_by_column_index, fbcsr::sort_by_column_index); +GKO_REGISTER_OPERATION(is_sorted_by_column_index, + fbcsr::is_sorted_by_column_index); +GKO_REGISTER_OPERATION(extract_diagonal, fbcsr::extract_diagonal); +GKO_REGISTER_OPERATION(fill_array, components::fill_array); +GKO_REGISTER_OPERATION(inplace_absolute_array, + components::inplace_absolute_array); +GKO_REGISTER_OPERATION(outplace_absolute_array, + components::outplace_absolute_array); + + +} // namespace fbcsr + + +template +Fbcsr::Fbcsr(std::shared_ptr exec, + const dim<2> &size, size_type num_nonzeros, + int block_size, + std::shared_ptr strategy) + : EnableLinOp(exec, size), + bs_{block_size}, + values_(exec, num_nonzeros), + col_idxs_(exec, gko::blockutils::getNumFixedBlocks( + block_size * block_size, num_nonzeros)), + row_ptrs_(exec, + gko::blockutils::getNumFixedBlocks(block_size, size[0]) + 1), + startrow_(exec, strategy->calc_size(num_nonzeros)), + strategy_(strategy->copy()) +{ + if (size[0] % bs_ != 0) + throw gko::BadDimension(__FILE__, __LINE__, __func__, "construct", + size[0], size[1], + "block size does not divide the dim 0!"); + if (size[1] % bs_ != 0) + throw gko::BadDimension(__FILE__, __LINE__, __func__, "construct", + size[0], size[1], + "block size does not divide the dim 1!"); + if (num_nonzeros % (bs_ * bs_) != 0) + throw gko::BadDimension(__FILE__, __LINE__, __func__, "construct", + size[0], size[1], + "block size^2 does not divide NNZ!"); +} + + +template +void Fbcsr::apply_impl(const LinOp *const b, + LinOp *const x) const +{ + // TODO (script:fbcsr): change the code imported from matrix/csr if needed + using Dense = Dense; + using TFbcsr = Fbcsr; + if (auto b_fbcsr = dynamic_cast(b)) { + // if b is a FBCSR matrix, we compute a SpGeMM + throw /*::gko::*/ NotImplemented(__FILE__, __LINE__, + "SpGeMM for Fbcsr"); + auto x_fbcsr = as(x); + this->get_executor()->run(fbcsr::make_spgemm(this, b_fbcsr, x_fbcsr)); + } else { + // otherwise we assume that b is dense and compute a SpMV/SpMM + this->get_executor()->run( + fbcsr::make_spmv(this, as(b), as(x))); + } +} + + +template +void Fbcsr::apply_impl(const LinOp *alpha, const LinOp *b, + const LinOp *beta, LinOp *x) const + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Dense = Dense; +// using TFbcsr = Fbcsr; +// if (auto b_fbcsr = dynamic_cast(b)) { +// // if b is a FBCSR matrix, we compute a SpGeMM +// auto x_fbcsr = as(x); +// auto x_copy = x_fbcsr->clone(); +// this->get_executor()->run( +// fbcsr::make_advanced_spgemm(as(alpha), this, b_fbcsr, +// as(beta), x_copy.get(), +// x_fbcsr)); +// } else if (dynamic_cast *>(b)) { +// // if b is an identity matrix, we compute an SpGEAM +// auto x_fbcsr = as(x); +// auto x_copy = x_fbcsr->clone(); +// this->get_executor()->run(fbcsr::make_spgeam( +// as(alpha), this, as(beta), lend(x_copy), x_fbcsr)); +// } else { +// // otherwise we assume that b is dense and compute a SpMV/SpMM +// this->get_executor()->run( +// fbcsr::make_advanced_spmv(as(alpha), this, as(b), +// as(beta), as(x))); +// } +//} + + +template +void Fbcsr::convert_to( + Fbcsr, IndexType> *const result) const +{ + result->values_ = this->values_; + result->col_idxs_ = this->col_idxs_; + result->row_ptrs_ = this->row_ptrs_; + result->set_size(this->get_size()); + result->bs_ = this->bs_; + convert_strategy_helper(result); +} + + +template +void Fbcsr::move_to( + Fbcsr, IndexType> *result) +{ + this->convert_to(result); +} + + +template +void Fbcsr::convert_to( + Coo *result) const GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto exec = this->get_executor(); +// auto tmp = Coo::create( +// exec, this->get_size(), this->get_num_stored_elements()); +// tmp->values_ = this->values_; +// tmp->col_idxs_ = this->col_idxs_; +// exec->run(fbcsr::make_convert_to_coo(this, tmp.get())); +// tmp->move_to(result); +//} + + +template +void Fbcsr::move_to(Coo *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// this->convert_to(result); +//} + + +template +void Fbcsr::convert_to(Dense *result) const + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto exec = this->get_executor(); +// auto tmp = Dense::create(exec, this->get_size()); +// exec->run(fbcsr::make_convert_to_dense(this, tmp.get())); +// tmp->move_to(result); +//} + + +template +void Fbcsr::move_to(Dense *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// this->convert_to(result); +//} + + +// template +// void Fbcsr::convert_to( +// Hybrid *result) const +// GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto exec = this->get_executor(); +// Array row_nnz(exec, this->get_size()[0]); +// +// size_type ell_lim = zero(); +// size_type coo_lim = zero(); +// result->get_strategy()->compute_hybrid_config(row_nnz, &ell_lim, +// &coo_lim); const auto max_nnz_per_row = +// std::max(result->get_ell_num_stored_elements_per_row(), ell_lim); +// const auto stride = std::max(result->get_ell_stride(), +// this->get_size()[0]); const auto coo_nnz = +// std::max(result->get_coo_num_stored_elements(), coo_lim); +// auto tmp = Hybrid::create( +// exec, this->get_size(), max_nnz_per_row, stride, coo_nnz, +// result->get_strategy()); +// exec->run(fbcsr::make_convert_to_hybrid(this, tmp.get())); +// tmp->move_to(result); +//} + + +// template +// void Fbcsr::move_to(Hybrid +// *result) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// this->convert_to(result); +//} + + +// template +// void Fbcsr::convert_to( +// Sellp *result) const +// GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto exec = this->get_executor(); +// const auto stride_factor = (result->get_stride_factor() == 0) +// ? default_stride_factor +// : result->get_stride_factor(); +// const auto slice_size = (result->get_slice_size() == 0) +// ? default_slice_size +// : result->get_slice_size(); +// size_type total_cols = 0; +// exec->run(fbcsr::make_calculate_total_cols(this, &total_cols, +// stride_factor, +// slice_size)); +// auto tmp = Sellp::create( +// exec, this->get_size(), slice_size, stride_factor, total_cols); +// exec->run(fbcsr::make_convert_to_sellp(this, tmp.get())); +// tmp->move_to(result); +//} + + +// template +// void Fbcsr::move_to(Sellp +// *result) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// this->convert_to(result); +//} + + +template +void Fbcsr::convert_to( + SparsityCsr *result) const +{ + auto exec = this->get_executor(); + auto tmp = SparsityCsr::create( + exec, this->get_size(), this->get_num_stored_elements()); + tmp->col_idxs_ = this->col_idxs_; + tmp->row_ptrs_ = this->row_ptrs_; + // if (result->value_.get_data()) { + // tmp->value_ = result->value_; + // } else { + tmp->value_ = gko::Array(exec, {one()}); + // } + tmp->move_to(result); +} + + +template +void Fbcsr::move_to( + SparsityCsr *result) +{ + this->convert_to(result); +} + + +// template +// void Fbcsr::convert_to( +// Ell *result) const +// GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto exec = this->get_executor(); +// size_type max_nnz_per_row; +// exec->run(fbcsr::make_calculate_max_nnz_per_row(this, &max_nnz_per_row)); +// auto tmp = Ell::create(exec, this->get_size(), +// max_nnz_per_row); +// exec->run(fbcsr::make_convert_to_ell(this, tmp.get())); +// tmp->move_to(result); +//} + + +// template +// void Fbcsr::move_to(Ell *result) +// GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// this->convert_to(result); +//} + + +/* Within blocks, the storage order is row-major. + * Currently, this implementation is sequential and has complexity O(n log n) + * assuming nnz = O(n). + * Can this be changed to a parallel O(n) implementation? + */ +template +void Fbcsr::read(const mat_data &data) +{ + if (data.nonzeros.size() > std::numeric_limits::max()) + throw std::range_error(std::string("file: ") + __FILE__ + ":" + + std::to_string(__LINE__) + + ": List of nonzeros is too big!"); + + const index_type nnz = static_cast(data.nonzeros.size()); + + const int bs = this->bs_; + // GKO_ASSERT_EQ(nnz%(this->bs_*this->bs_), 0); + + using Blk_t = blockutils::DenseBlock; + + struct FbEntry { + index_type block_row; + index_type block_column; + }; + + struct FbLess { + bool operator()(const FbEntry &a, const FbEntry &b) const + { + if (a.block_row != b.block_row) + return a.block_row < b.block_row; + else + return a.block_column < b.block_column; + } + }; + + auto create_block_set = [nnz, bs](const mat_data &data) { + std::map blocks; + for (index_type inz = 0; inz < nnz; inz++) { + const index_type row = data.nonzeros[inz].row; + const index_type col = data.nonzeros[inz].column; + const value_type val = data.nonzeros[inz].value; + + const int localrow = static_cast(row % bs); + const int localcol = static_cast(col % bs); + const index_type blockrow = row / bs; + const index_type blockcol = col / bs; + + // const typename std::map::iterator it + // = blocks.find(FbEntry{row/bs, col/bs, + // DenseBlock()}); + Blk_t &nnzblk = blocks[{blockrow, blockcol}]; + if (nnzblk.size() == 0) { + nnzblk.resize(bs, bs); + nnzblk.zero(); + nnzblk(localrow, localcol) = val; + } else { + if (nnzblk(localrow, localcol) != gko::zero()) + throw Error(__FILE__, __LINE__, + "Error in reading fixed block CSR matrix!"); + nnzblk(localrow, localcol) = val; + } + } + return blocks; + }; + + const std::map blocks = create_block_set(data); + + auto tmp = Fbcsr::create(this->get_executor()->get_master(), data.size, + blocks.size() * bs * bs, bs, this->get_strategy()); + + tmp->row_ptrs_.get_data()[0] = 0; + index_type cur_brow = 0, cur_bnz = 0, + cur_bcol = blocks.begin()->first.block_column; + const index_type num_brows = data.size[0] / bs; + + gko::blockutils::DenseBlocksView values( + tmp->values_.get_data(), bs, bs); + + for (auto it = blocks.begin(); it != blocks.end(); it++) { + if (cur_brow >= num_brows) + throw gko::OutOfBoundsError(__FILE__, __LINE__, cur_brow, + num_brows); + + // set block-column index and block values + tmp->col_idxs_.get_data()[cur_bnz] = it->first.block_column; + // vals + for (int ibr = 0; ibr < bs; ibr++) + for (int jbr = 0; jbr < bs; jbr++) + values(cur_bnz, ibr, jbr) = it->second(ibr, jbr); + + if (it->first.block_row > cur_brow) { + tmp->row_ptrs_.get_data()[++cur_brow] = cur_bnz; + } else { + assert(cur_brow == it->first.block_row); + assert(cur_bcol <= it->first.block_column); + } + + cur_bcol = it->first.block_column; + cur_bnz++; + } + + tmp->row_ptrs_.get_data()[++cur_brow] = + static_cast(blocks.size()); + assert(cur_brow == tmp->get_size()[0] / bs); + + tmp->make_srow(); + tmp->move_to(this); +} + + +template +void Fbcsr::write(mat_data &data) const +{ + std::unique_ptr op{}; + const Fbcsr *tmp{}; + if (this->get_executor()->get_master() != this->get_executor()) { + op = this->clone(this->get_executor()->get_master()); + tmp = static_cast(op.get()); + } else { + tmp = this; + } + + data = {tmp->get_size(), {}}; + + const gko::blockutils::DenseBlocksView + vblocks(tmp->values_.get_const_data(), bs_, bs_); + + for (size_type brow = 0; brow < tmp->get_size()[0] / bs_; ++brow) { + const auto start = tmp->row_ptrs_.get_const_data()[brow]; + const auto end = tmp->row_ptrs_.get_const_data()[brow + 1]; + + for (auto inz = start; inz < end; ++inz) { + for (int ib = 0; ib < bs_; ib++) { + const auto row = brow * bs_ + ib; + for (int jb = 0; jb < bs_; jb++) { + const auto col = + tmp->col_idxs_.get_const_data()[inz] * bs_ + jb; + const auto val = vblocks(inz, ib, jb); + data.nonzeros.emplace_back(row, col, val); + } + } + } + } +} + + +template +std::unique_ptr Fbcsr::transpose() const + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto exec = this->get_executor(); +// auto trans_cpy = +// Fbcsr::create(exec, gko::transpose(this->get_size()), +// this->get_num_stored_elements(), this->get_strategy()); +// +// exec->run(fbcsr::make_transpose(this, trans_cpy.get())); +// trans_cpy->make_srow(); +// return std::move(trans_cpy); +//} + + +template +std::unique_ptr Fbcsr::conj_transpose() const + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto exec = this->get_executor(); +// auto trans_cpy = +// Fbcsr::create(exec, gko::transpose(this->get_size()), +// this->get_num_stored_elements(), this->get_strategy()); +// +// exec->run(fbcsr::make_conj_transpose(this, trans_cpy.get())); +// trans_cpy->make_srow(); +// return std::move(trans_cpy); +//} + + +template +std::unique_ptr Fbcsr::row_permute( + const Array *permutation_indices) const GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); +// auto exec = this->get_executor(); +// auto permute_cpy = +// Fbcsr::create(exec, this->get_size(), this->get_num_stored_elements(), +// this->get_strategy()); +// +// exec->run( +// fbcsr::make_row_permute(permutation_indices, this, +// permute_cpy.get())); +// permute_cpy->make_srow(); +// return std::move(permute_cpy); +//} + + +template +std::unique_ptr Fbcsr::column_permute( + const Array *permutation_indices) const GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); +// auto exec = this->get_executor(); +// auto permute_cpy = +// Fbcsr::create(exec, this->get_size(), this->get_num_stored_elements(), +// this->get_strategy()); +// +// exec->run( +// fbcsr::make_column_permute(permutation_indices, this, +// permute_cpy.get())); +// permute_cpy->make_srow(); +// return std::move(permute_cpy); +//} + + +template +std::unique_ptr Fbcsr::inverse_row_permute( + const Array *inverse_permutation_indices) const + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), +// this->get_size()[0]); +// auto exec = this->get_executor(); +// auto inverse_permute_cpy = +// Fbcsr::create(exec, this->get_size(), this->get_num_stored_elements(), +// this->get_strategy()); +// +// exec->run(fbcsr::make_inverse_row_permute(inverse_permutation_indices, +// this, +// inverse_permute_cpy.get())); +// inverse_permute_cpy->make_srow(); +// return std::move(inverse_permute_cpy); +//} + + +template +std::unique_ptr Fbcsr::inverse_column_permute( + const Array *inverse_permutation_indices) const + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), +// this->get_size()[1]); +// auto exec = this->get_executor(); +// auto inverse_permute_cpy = +// Fbcsr::create(exec, this->get_size(), this->get_num_stored_elements(), +// this->get_strategy()); +// +// exec->run(fbcsr::make_inverse_column_permute( +// inverse_permutation_indices, this, inverse_permute_cpy.get())); +// inverse_permute_cpy->make_srow(); +// return std::move(inverse_permute_cpy); +//} + + +template +void Fbcsr::sort_by_column_index() GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto exec = this->get_executor(); +// exec->run(fbcsr::make_sort_by_column_index(this)); +//} + + +template +bool Fbcsr::is_sorted_by_column_index() const + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto exec = this->get_executor(); +// bool is_sorted; +// exec->run(fbcsr::make_is_sorted_by_column_index(this, &is_sorted)); +// return is_sorted; +//} + + +template +std::unique_ptr> +Fbcsr::extract_diagonal() const GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto exec = this->get_executor(); +// +// const auto diag_size = std::min(this->get_size()[0], this->get_size()[1]); +// auto diag = Diagonal::create(exec, diag_size); +// exec->run(fbcsr::make_fill_array(diag->get_values(), diag->get_size()[0], +// zero())); +// exec->run(fbcsr::make_extract_diagonal(this, lend(diag))); +// return diag; +//} + + +template +void Fbcsr::compute_absolute_inplace() + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto exec = this->get_executor(); +// +// exec->run(fbcsr::make_inplace_absolute_array( +// this->get_values(), this->get_num_stored_elements())); +//} + + +template +std::unique_ptr::absolute_type> +Fbcsr::compute_absolute() const GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto exec = this->get_executor(); +// +// auto abs_fbcsr = absolute_type::create(exec, this->get_size(), +// this->get_num_stored_elements()); +// +// abs_fbcsr->col_idxs_ = col_idxs_; +// abs_fbcsr->row_ptrs_ = row_ptrs_; +// exec->run(fbcsr::make_outplace_absolute_array(this->get_const_values(), +// this->get_num_stored_elements(), +// abs_fbcsr->get_values())); +// +// convert_strategy_helper(abs_fbcsr.get()); +// return abs_fbcsr; +//} + + +#define GKO_DECLARE_FBCSR_MATRIX(ValueType, IndexType) \ + class Fbcsr +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_MATRIX); + + +} // namespace matrix +} // namespace gko diff --git a/core/matrix/fbcsr_builder.hpp b/core/matrix/fbcsr_builder.hpp new file mode 100644 index 00000000000..54b6ece53f9 --- /dev/null +++ b/core/matrix/fbcsr_builder.hpp @@ -0,0 +1,94 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MATRIX_FBCSR_BUILDER_HPP_ +#define GKO_CORE_MATRIX_FBCSR_BUILDER_HPP_ + + +#include + + +namespace gko { +namespace matrix { + + +/** + * @internal + * + * Allows intrusive access to the arrays stored within a @ref Fbcsr matrix. + * + * @tparam ValueType the value type of the matrix + * @tparam IndexType the index type of the matrix + */ +template +class FbcsrBuilder { +public: + /** + * Returns the column index array of the CSR matrix. + */ + Array &get_col_idx_array() { return matrix_->col_idxs_; } + + /** + * Returns the value array of the CSR matrix. + */ + Array &get_value_array() { return matrix_->values_; } + + /// Returns the (uniform) block size + int get_block_size() const { return matrix_->bs_; } + + /** + * Initializes a CsrBuilder from an existing CSR matrix. + */ + explicit FbcsrBuilder(Fbcsr *const matrix) + : matrix_{matrix} + {} + + /** + * Updates the internal matrix data structures at destruction. + */ + ~FbcsrBuilder() { matrix_->make_srow(); } + + // make this type non-movable + FbcsrBuilder(const FbcsrBuilder &) = delete; + FbcsrBuilder(FbcsrBuilder &&) = delete; + FbcsrBuilder &operator=(const FbcsrBuilder &) = delete; + FbcsrBuilder &operator=(FbcsrBuilder &&) = delete; + +private: + Fbcsr *matrix_; +}; + + +} // namespace matrix +} // namespace gko + +#endif // GKO_CORE_MATRIX_FBCSR_BUILDER_HPP_ diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp new file mode 100644 index 00000000000..715bb1192ce --- /dev/null +++ b/core/matrix/fbcsr_kernels.hpp @@ -0,0 +1,275 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MATRIX_FBCSR_KERNELS_HPP_ +#define GKO_CORE_MATRIX_FBCSR_KERNELS_HPP_ + + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_FBCSR_SPMV_KERNEL(ValueType, IndexType) \ + void spmv(std::shared_ptr exec, \ + const matrix::Fbcsr *a, \ + const matrix::Dense *b, matrix::Dense *c) + +#define GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType) \ + void advanced_spmv(std::shared_ptr exec, \ + const matrix::Dense *alpha, \ + const matrix::Fbcsr *a, \ + const matrix::Dense *b, \ + const matrix::Dense *beta, \ + matrix::Dense *c) + +#define GKO_DECLARE_FBCSR_SPGEMM_KERNEL(ValueType, IndexType) \ + void spgemm(std::shared_ptr exec, \ + const matrix::Fbcsr *a, \ + const matrix::Fbcsr *b, \ + matrix::Fbcsr *c) + +#define GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType) \ + void advanced_spgemm(std::shared_ptr exec, \ + const matrix::Dense *alpha, \ + const matrix::Fbcsr *a, \ + const matrix::Fbcsr *b, \ + const matrix::Dense *beta, \ + const matrix::Fbcsr *d, \ + matrix::Fbcsr *c) + +#define GKO_DECLARE_FBCSR_SPGEAM_KERNEL(ValueType, IndexType) \ + void spgeam(std::shared_ptr exec, \ + const matrix::Dense *alpha, \ + const matrix::Fbcsr *a, \ + const matrix::Dense *beta, \ + const matrix::Fbcsr *b, \ + matrix::Fbcsr *c) + +#define GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ + void convert_to_dense(std::shared_ptr exec, \ + const matrix::Fbcsr *source, \ + matrix::Dense *result) + +#define GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL(ValueType, IndexType) \ + void convert_to_coo(std::shared_ptr exec, \ + const matrix::Fbcsr *source, \ + matrix::Coo *result) + +#define GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL(ValueType, IndexType) \ + void convert_to_ell(std::shared_ptr exec, \ + const matrix::Fbcsr *source, \ + matrix::Ell *result) + +#define GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL(ValueType, IndexType) \ + void convert_to_hybrid(std::shared_ptr exec, \ + const matrix::Fbcsr *source, \ + matrix::Hybrid *result) + +#define GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL(ValueType, IndexType) \ + void convert_to_sellp(std::shared_ptr exec, \ + const matrix::Fbcsr *source, \ + matrix::Sellp *result) + +#define GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL(ValueType, IndexType) \ + void calculate_total_cols( \ + std::shared_ptr exec, \ + const matrix::Fbcsr *source, size_type *result, \ + size_type stride_factor, size_type slice_size) + +#define GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL(ValueType, IndexType) \ + void transpose(std::shared_ptr exec, \ + const matrix::Fbcsr *orig, \ + matrix::Fbcsr *trans) + +#define GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType) \ + void conj_transpose(std::shared_ptr exec, \ + const matrix::Fbcsr *orig, \ + matrix::Fbcsr *trans) + +#define GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ + void row_permute(std::shared_ptr exec, \ + const Array *permutation_indices, \ + const matrix::Fbcsr *orig, \ + matrix::Fbcsr *row_permuted) + +#define GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) \ + void column_permute(std::shared_ptr exec, \ + const Array *permutation_indices, \ + const matrix::Fbcsr *orig, \ + matrix::Fbcsr *column_permuted) + +#define GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ + void inverse_row_permute( \ + std::shared_ptr exec, \ + const Array *permutation_indices, \ + const matrix::Fbcsr *orig, \ + matrix::Fbcsr *row_permuted) + +#define GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) \ + void inverse_column_permute( \ + std::shared_ptr exec, \ + const Array *permutation_indices, \ + const matrix::Fbcsr *orig, \ + matrix::Fbcsr *column_permuted) + +#define GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, \ + IndexType) \ + void calculate_max_nnz_per_row( \ + std::shared_ptr exec, \ + const matrix::Fbcsr *source, size_type *result) + +#define GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, \ + IndexType) \ + void calculate_nonzeros_per_row( \ + std::shared_ptr exec, \ + const matrix::Fbcsr *source, \ + Array *result) + +#define GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \ + void sort_by_column_index(std::shared_ptr exec, \ + matrix::Fbcsr *to_sort) + +#define GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType) \ + void is_sorted_by_column_index( \ + std::shared_ptr exec, \ + const matrix::Fbcsr *to_check, bool *is_sorted) + +#define GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) \ + void extract_diagonal(std::shared_ptr exec, \ + const matrix::Fbcsr *orig, \ + matrix::Diagonal *diag) + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_FBCSR_SPMV_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_SPGEMM_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_SPGEAM_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) + + +namespace omp { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace omp + + +namespace cuda { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace cuda + + +namespace reference { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace reference + + +namespace hip { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace hip + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_MATRIX_FBCSR_KERNELS_HPP_ diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt index ac36ed4f7ff..64b3b3ed593 100644 --- a/core/test/matrix/CMakeLists.txt +++ b/core/test/matrix/CMakeLists.txt @@ -5,6 +5,8 @@ ginkgo_create_test(csr_builder) ginkgo_create_test(dense) ginkgo_create_test(diagonal) ginkgo_create_test(ell) +ginkgo_create_test(fbcsr) +ginkgo_create_test(fbcsr_builder) ginkgo_create_test(hybrid) ginkgo_create_test(identity) ginkgo_create_test(permutation) diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp new file mode 100644 index 00000000000..dd456ef7316 --- /dev/null +++ b/core/test/matrix/fbcsr.cpp @@ -0,0 +1,378 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include + + +#include +#include +#include + + +#include "core/components/fixed_block.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +namespace matstr = gko::matrix::matrix_strategy; + + +template +class Fbcsr : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using Mtx = gko::matrix::Fbcsr; + + Fbcsr() + : exec(gko::ReferenceExecutor::create()), + mtx(Mtx::create(exec, gko::dim<2>{6, 12}, 36, 3, + std::make_shared>())) + { + const int bs = 3; + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + index_type *const s = mtx->get_srow(); + r[0] = 0; + r[1] = 2; + r[2] = 4; + c[0] = 1; + c[1] = 3; + c[2] = 0; + c[3] = 2; + + gko::blockutils::DenseBlocksView vals(v, bs, + bs); + + if (mtx->get_size()[0] % bs != 0) + throw gko::BadDimension(__FILE__, __LINE__, __func__, "test fbcsr", + mtx->get_size()[0], mtx->get_size()[1], + "block size does not divide the size!"); + + for (index_type ibrow = 0; ibrow < mtx->get_size()[0] / bs; ibrow++) { + const index_type *const browptr = mtx->get_row_ptrs(); + for (index_type inz = browptr[ibrow]; inz < browptr[ibrow + 1]; + inz++) { + const index_type bcolind = mtx->get_col_idxs()[inz]; + const value_type base = (ibrow + 1) * (bcolind + 1); + for (int ival = 0; ival < bs; ival++) + for (int jval = 0; jval < bs; jval++) + vals(inz, ival, jval) = + base + static_cast>( + ival * bs + jval); + } + } + + // Some of the entries are set to zero + vals(0, 2, 0) = gko::zero(); + vals(0, 2, 2) = gko::zero(); + vals(3, 0, 0) = gko::zero(); + + for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) + s[is] = 0; + + // backup for move tests + orig_size = mtx->get_size(); + orig_rowptrs.resize(3); + orig_colinds.resize(4); + orig_vals.resize(36); + for (index_type i = 0; i < 3; i++) orig_rowptrs[i] = r[i]; + for (index_type i = 0; i < 4; i++) orig_colinds[i] = c[i]; + for (index_type i = 0; i < 36; i++) orig_vals[i] = v[i]; + } + + std::shared_ptr exec; + std::unique_ptr mtx; + + gko::dim<2> orig_size; + std::vector orig_vals; + std::vector orig_rowptrs; + std::vector orig_colinds; + + void assert_equal_to_original_mtx(const Mtx *m) + { + auto v = m->get_const_values(); + auto c = m->get_const_col_idxs(); + auto r = m->get_const_row_ptrs(); + auto s = m->get_const_srow(); + + const int bs = 3; + + ASSERT_EQ(m->get_size(), orig_size); + ASSERT_EQ(m->get_num_stored_elements(), orig_vals.size()); + ASSERT_EQ(m->get_block_size(), bs); + + + for (index_type irow = 0; irow < orig_size[0] / bs; irow++) { + const index_type *const rowptr = &orig_rowptrs[0]; + ASSERT_EQ(r[irow], rowptr[irow]); + + for (index_type inz = rowptr[irow]; inz < rowptr[irow + 1]; inz++) { + ASSERT_EQ(c[inz], orig_colinds[inz]); + + for (int i = 0; i < bs * bs; i++) { + // ASSERT_LT(gko::abs(v[inz*bs*bs + i] - + // mtx->get_values()[inz*bs*bs + i]), + // std::numeric_limits>::epsilon()); + ASSERT_EQ(v[inz * bs * bs + i], + orig_vals[inz * bs * bs + i]); + } + } + } + + ASSERT_EQ(m->get_num_srow_elements(), 0); + // for(index_type is = 0; is < mtx->get_num_srow_elements(); is++) + // ASSERT_EQ(s[is], 0); + } + + void assert_empty(const Mtx *m) + { + ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0)); + ASSERT_EQ(m->get_num_stored_elements(), 0); + ASSERT_EQ(m->get_block_size(), 1); + ASSERT_EQ(m->get_const_values(), nullptr); + ASSERT_EQ(m->get_const_col_idxs(), nullptr); + ASSERT_NE(m->get_const_row_ptrs(), nullptr); + ASSERT_EQ(m->get_const_srow(), nullptr); + } +}; + +TYPED_TEST_CASE(Fbcsr, gko::test::ValueIndexTypes); + + +TYPED_TEST(Fbcsr, KnowsItsSize) +{ + ASSERT_EQ(this->mtx->get_size(), gko::dim<2>(6, 12)); + ASSERT_EQ(this->mtx->get_block_size(), 3); + ASSERT_EQ(this->mtx->get_num_stored_elements(), 36); +} + + +TYPED_TEST(Fbcsr, ContainsCorrectData) +{ + this->assert_equal_to_original_mtx(this->mtx.get()); +} + + +TYPED_TEST(Fbcsr, CanBeEmpty) +{ + using Mtx = typename TestFixture::Mtx; + auto mtx = Mtx::create(this->exec); + + this->assert_empty(mtx.get()); +} + + +TYPED_TEST(Fbcsr, CanBeCreatedFromExistingData) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + + constexpr int bs = 3; + constexpr index_type nbrows = 2; + constexpr index_type nbcols = 4; + constexpr index_type bnnz = 4; + value_type values[bnnz * bs * bs]; + index_type col_idxs[] = {1, 3, 0, 2}; + index_type row_ptrs[] = {0, 2, 4}; + + gko::blockutils::DenseBlocksView vals(values, bs, + bs); + + for (index_type ibrow = 0; ibrow < nbrows; ibrow++) { + for (index_type inz = row_ptrs[ibrow]; inz < row_ptrs[ibrow + 1]; + inz++) { + const index_type bcolind = col_idxs[inz]; + const value_type base = (ibrow + 1) * (bcolind + 1); + for (int ival = 0; ival < bs; ival++) + for (int jval = 0; jval < bs; jval++) + vals(inz, ival, jval) = + base + static_cast>( + ival * bs + jval); + } + } + + // Some of the entries are set to zero + vals(0, 2, 0) = gko::zero(); + vals(0, 2, 2) = gko::zero(); + vals(3, 0, 0) = gko::zero(); + + auto mtx = gko::matrix::Fbcsr::create( + this->exec, gko::dim<2>{nbrows * bs, nbcols * bs}, bs, + gko::Array::view(this->exec, bnnz * bs * bs, values), + gko::Array::view(this->exec, bnnz, col_idxs), + gko::Array::view(this->exec, nbrows + 1, row_ptrs), + std::make_shared>()); + + ASSERT_EQ(mtx->get_num_srow_elements(), 0); + ASSERT_EQ(mtx->get_const_values(), values); + ASSERT_EQ(mtx->get_const_col_idxs(), col_idxs); + ASSERT_EQ(mtx->get_const_row_ptrs(), row_ptrs); + // ASSERT_EQ(mtx->get_const_srow()[0], 0); +} + + +TYPED_TEST(Fbcsr, CanBeCopied) +{ + using Mtx = typename TestFixture::Mtx; + auto copy = Mtx::create(this->exec); + + copy->copy_from(this->mtx.get()); + + this->assert_equal_to_original_mtx(this->mtx.get()); + this->mtx->get_values()[1] = 3.0; + this->assert_equal_to_original_mtx(copy.get()); +} + + +TYPED_TEST(Fbcsr, CanBeMoved) +{ + using Mtx = typename TestFixture::Mtx; + auto copy = Mtx::create(this->exec); + + copy->copy_from(std::move(this->mtx)); + + this->assert_equal_to_original_mtx(copy.get()); +} + + +TYPED_TEST(Fbcsr, CanBeCloned) +{ + using Mtx = typename TestFixture::Mtx; + auto clone = this->mtx->clone(); + + this->assert_equal_to_original_mtx(this->mtx.get()); + this->mtx->get_values()[1] = 5.0; + this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); +} + + +TYPED_TEST(Fbcsr, CanBeCleared) +{ + this->mtx->clear(); + + this->assert_empty(this->mtx.get()); +} + + +TYPED_TEST(Fbcsr, CanBeReadFromMatrixData) +{ + // TODO (script:fbcsr): change the code imported from matrix/csr if needed + using Mtx = typename TestFixture::Mtx; + auto m = + Mtx::create(this->exec, std::make_shared>()); + m->set_block_size(3); + + // Assuming row-major blocks + m->read( + {{6, 12}, {{0, 3, 2.0}, {0, 4, 3.0}, {0, 5, 4.0}, {1, 3, 5.0}, + {1, 4, 6.0}, {1, 5, 7.0}, {2, 4, 9.0}, + + {0, 9, 4.0}, {0, 10, 5.0}, {0, 11, 6.0}, {1, 9, 7.0}, + {1, 10, 8.0}, {1, 11, 9.0}, {2, 9, 10.0}, {2, 10, 11.0}, + {2, 11, 12.0}, + + {3, 0, 2.0}, {3, 1, 3.0}, {3, 2, 4.0}, {4, 0, 5.0}, + {4, 1, 6.0}, {4, 2, 7.0}, {5, 0, 8.0}, {5, 1, 9.0}, + {5, 2, 10.0}, + + {3, 7, 7.0}, {3, 8, 8.0}, {4, 6, 9.0}, {4, 7, 10.0}, + {4, 8, 11.0}, {5, 6, 12.0}, {5, 7, 13.0}, {5, 8, 14.0}}}); + + this->assert_equal_to_original_mtx(m.get()); +} + + +TYPED_TEST(Fbcsr, GeneratesCorrectMatrixData) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using tpl = typename gko::matrix_data::nonzero_type; + gko::matrix_data data; + + this->mtx->write(data); + data.ensure_row_major_order(); + + ASSERT_EQ(data.size, gko::dim<2>(6, 12)); + ASSERT_EQ(data.nonzeros.size(), 36); + EXPECT_EQ(data.nonzeros[0], tpl(0, 3, value_type{2.0})); + EXPECT_EQ(data.nonzeros[1], tpl(0, 4, value_type{3.0})); + EXPECT_EQ(data.nonzeros[2], tpl(0, 5, value_type{4.0})); + EXPECT_EQ(data.nonzeros[3], tpl(0, 9, value_type{4.0})); + EXPECT_EQ(data.nonzeros[4], tpl(0, 10, value_type{5.0})); + EXPECT_EQ(data.nonzeros[5], tpl(0, 11, value_type{6.0})); + + EXPECT_EQ(data.nonzeros[6], tpl(1, 3, value_type{5.0})); + EXPECT_EQ(data.nonzeros[7], tpl(1, 4, value_type{6.0})); + EXPECT_EQ(data.nonzeros[8], tpl(1, 5, value_type{7.0})); + EXPECT_EQ(data.nonzeros[9], tpl(1, 9, value_type{7.0})); + EXPECT_EQ(data.nonzeros[10], tpl(1, 10, value_type{8.0})); + EXPECT_EQ(data.nonzeros[11], tpl(1, 11, value_type{9.0})); + + EXPECT_EQ(data.nonzeros[12], tpl(2, 3, value_type{0.0})); + EXPECT_EQ(data.nonzeros[13], tpl(2, 4, value_type{9.0})); + EXPECT_EQ(data.nonzeros[14], tpl(2, 5, value_type{0.0})); + EXPECT_EQ(data.nonzeros[15], tpl(2, 9, value_type{10.0})); + EXPECT_EQ(data.nonzeros[16], tpl(2, 10, value_type{11.0})); + EXPECT_EQ(data.nonzeros[17], tpl(2, 11, value_type{12.0})); + + EXPECT_EQ(data.nonzeros[18], tpl(3, 0, value_type{2.0})); + EXPECT_EQ(data.nonzeros[19], tpl(3, 1, value_type{3.0})); + EXPECT_EQ(data.nonzeros[20], tpl(3, 2, value_type{4.0})); + EXPECT_EQ(data.nonzeros[21], tpl(3, 6, value_type{0.0})); + EXPECT_EQ(data.nonzeros[22], tpl(3, 7, value_type{7.0})); + EXPECT_EQ(data.nonzeros[23], tpl(3, 8, value_type{8.0})); + + EXPECT_EQ(data.nonzeros[24], tpl(4, 0, value_type{5.0})); + EXPECT_EQ(data.nonzeros[25], tpl(4, 1, value_type{6.0})); + EXPECT_EQ(data.nonzeros[26], tpl(4, 2, value_type{7.0})); + EXPECT_EQ(data.nonzeros[27], tpl(4, 6, value_type{9.0})); + EXPECT_EQ(data.nonzeros[28], tpl(4, 7, value_type{10.0})); + EXPECT_EQ(data.nonzeros[29], tpl(4, 8, value_type{11.0})); + + EXPECT_EQ(data.nonzeros[30], tpl(5, 0, value_type{8.0})); + EXPECT_EQ(data.nonzeros[31], tpl(5, 1, value_type{9.0})); + EXPECT_EQ(data.nonzeros[32], tpl(5, 2, value_type{10.0})); + EXPECT_EQ(data.nonzeros[33], tpl(5, 6, value_type{12.0})); + EXPECT_EQ(data.nonzeros[34], tpl(5, 7, value_type{13.0})); + EXPECT_EQ(data.nonzeros[35], tpl(5, 8, value_type{14.0})); +} + + +} // namespace diff --git a/core/test/matrix/fbcsr_builder.cpp b/core/test/matrix/fbcsr_builder.cpp new file mode 100644 index 00000000000..31ccc2145bc --- /dev/null +++ b/core/test/matrix/fbcsr_builder.cpp @@ -0,0 +1,120 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_builder.hpp" + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class FbcsrBuilder : public ::testing::Test { +public: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using Mtx = gko::matrix::Fbcsr; + +protected: + FbcsrBuilder() + : exec(gko::ReferenceExecutor::create()), + mtx(Mtx::create(exec, gko::dim<2>{2, 3}, 4)) + {} + + std::shared_ptr exec; + std::unique_ptr mtx; +}; + +TYPED_TEST_CASE(FbcsrBuilder, gko::test::ValueIndexTypes); + + +TYPED_TEST(FbcsrBuilder, ReturnsCorrectArrays) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + gko::matrix::FbcsrBuilder builder{this->mtx.get()}; + + auto builder_col_idxs = builder.get_col_idx_array().get_data(); + auto builder_values = builder.get_value_array().get_data(); + auto ref_col_idxs = this->mtx->get_col_idxs(); + auto ref_values = this->mtx->get_values(); + + ASSERT_EQ(builder_col_idxs, ref_col_idxs); + ASSERT_EQ(builder_values, ref_values); +} + + +TYPED_TEST(FbcsrBuilder, UpdatesSrowOnDestruction) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + struct mock_strategy + : public gko::matrix::matrix_strategy::strategy_type { + virtual void process(const gko::Array &, + gko::Array *) override + { + *was_called = true; + } + + virtual int64_t calc_size(const int64_t nnz) override { return 0; } + + virtual std::shared_ptr copy() override + { + return std::make_shared(*was_called); + } + + mock_strategy(bool &flag) : Mtx::strategy_type(""), was_called(&flag) {} + + bool *was_called; + }; + bool was_called{}; + this->mtx->set_strategy(std::make_shared(was_called)); + was_called = false; + + gko::matrix::FbcsrBuilder{this->mtx.get()}; + + ASSERT_TRUE(was_called); +} + + +} // namespace diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index a4ded75abed..842f4b75259 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -98,6 +98,7 @@ target_sources(ginkgo_cuda matrix/dense_kernels.cu matrix/diagonal_kernels.cu matrix/ell_kernels.cu + matrix/fbcsr_kernels.cu matrix/hybrid_kernels.cu matrix/sellp_kernels.cu matrix/sparsity_csr_kernels.cu diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu new file mode 100644 index 00000000000..5d67bcc90d3 --- /dev/null +++ b/cuda/matrix/fbcsr_kernels.cu @@ -0,0 +1,1446 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_kernels.hpp" + + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/dense_kernels.hpp" +#include "core/matrix/fbcsr_builder.hpp" +#include "core/synthesizer/implementation_selection.hpp" +#include "cuda/base/config.hpp" +#include "cuda/base/cusparse_bindings.hpp" +#include "cuda/base/math.hpp" +#include "cuda/base/pointer_mode_guard.hpp" +#include "cuda/base/types.hpp" +#include "cuda/components/atomic.cuh" +#include "cuda/components/cooperative_groups.cuh" +#include "cuda/components/intrinsics.cuh" +#include "cuda/components/merging.cuh" +#include "cuda/components/reduction.cuh" +#include "cuda/components/segment_scan.cuh" +#include "cuda/components/thread_ids.cuh" +#include "cuda/components/uninitialized_array.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The fixed-size block compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +constexpr int default_block_size = 512; +constexpr int warps_in_block = 4; +constexpr int spmv_block_size = warps_in_block * config::warp_size; +constexpr int wsize = config::warp_size; +constexpr int classical_overweight = 32; + + +/** + * A compile-time list of the number items per threads for which spmv kernel + * should be compiled. + */ +using compiled_kernels = syn::value_list; + +using classical_kernels = + syn::value_list; + +using spgeam_kernels = + syn::value_list; + + +namespace host_kernel { + + +template +void merge_path_spmv( + syn::value_list, + std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, matrix::Dense *c, + const matrix::Dense *alpha = nullptr, + const matrix::Dense *beta = nullptr) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const IndexType total = a->get_size()[0] + a->get_num_stored_elements(); +// const IndexType grid_num = +// ceildiv(total, spmv_block_size * items_per_thread); +// const dim3 grid(grid_num); +// const dim3 block(spmv_block_size); +// Array row_out(exec, grid_num); +// Array val_out(exec, grid_num); +// +// for (IndexType column_id = 0; column_id < b->get_size()[1]; column_id++) { +// if (alpha == nullptr && beta == nullptr) { +// const auto b_vals = b->get_const_values() + column_id; +// auto c_vals = c->get_values() + column_id; +// kernel::abstract_merge_path_spmv +// <<>>( +// static_cast(a->get_size()[0]), +// as_cuda_type(a->get_const_values()), +// a->get_const_col_idxs(), +// as_cuda_type(a->get_const_row_ptrs()), +// as_cuda_type(a->get_const_srow()), as_cuda_type(b_vals), +// b->get_stride(), as_cuda_type(c_vals), c->get_stride(), +// as_cuda_type(row_out.get_data()), +// as_cuda_type(val_out.get_data())); +// kernel::abstract_reduce<<<1, spmv_block_size>>>( +// grid_num, as_cuda_type(val_out.get_data()), +// as_cuda_type(row_out.get_data()), as_cuda_type(c_vals), +// c->get_stride()); +// +// } else if (alpha != nullptr && beta != nullptr) { +// const auto b_vals = b->get_const_values() + column_id; +// auto c_vals = c->get_values() + column_id; +// kernel::abstract_merge_path_spmv +// <<>>( +// static_cast(a->get_size()[0]), +// as_cuda_type(alpha->get_const_values()), +// as_cuda_type(a->get_const_values()), +// a->get_const_col_idxs(), +// as_cuda_type(a->get_const_row_ptrs()), +// as_cuda_type(a->get_const_srow()), as_cuda_type(b_vals), +// b->get_stride(), as_cuda_type(beta->get_const_values()), +// as_cuda_type(c_vals), c->get_stride(), +// as_cuda_type(row_out.get_data()), +// as_cuda_type(val_out.get_data())); +// kernel::abstract_reduce<<<1, spmv_block_size>>>( +// grid_num, as_cuda_type(val_out.get_data()), +// as_cuda_type(row_out.get_data()), +// as_cuda_type(alpha->get_const_values()), as_cuda_type(c_vals), +// c->get_stride()); +// } else { +// GKO_KERNEL_NOT_FOUND; +// } +// } +//} + +GKO_ENABLE_IMPLEMENTATION_SELECTION(select_merge_path_spmv, merge_path_spmv); + + +template +int compute_items_per_thread(std::shared_ptr exec) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const int version = +// (exec->get_major_version() << 4) + exec->get_minor_version(); +// // The num_item is decided to make the occupancy 100% +// // TODO: Extend this list when new GPU is released +// // Tune this parameter +// // 128 threads/block the number of items per threads +// // 3.0 3.5: 6 +// // 3.7: 14 +// // 5.0, 5.3, 6.0, 6.2: 8 +// // 5.2, 6.1, 7.0: 12 +// int num_item = 6; +// switch (version) { +// case 0x50: +// case 0x53: +// case 0x60: +// case 0x62: +// num_item = 8; +// break; +// case 0x52: +// case 0x61: +// case 0x70: +// num_item = 12; +// break; +// case 0x37: +// num_item = 14; +// } +// // Ensure that the following is satisfied: +// // sizeof(IndexType) + sizeof(ValueType) +// // <= items_per_thread * sizeof(IndexType) +// constexpr int minimal_num = +// ceildiv(sizeof(IndexType) + sizeof(ValueType), sizeof(IndexType)); +// int items_per_thread = num_item * 4 / sizeof(IndexType); +// return std::max(minimal_num, items_per_thread); +//} + + +template +void classical_spmv( + syn::value_list, + std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, matrix::Dense *c, + const matrix::Dense *alpha = nullptr, + const matrix::Dense *beta = nullptr) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto nwarps = exec->get_num_warps_per_sm() * +// exec->get_num_multiprocessor() * classical_overweight; +// const auto gridx = +// std::min(ceildiv(a->get_size()[0], spmv_block_size / subwarp_size), +// int64(nwarps / warps_in_block)); +// const dim3 grid(gridx, b->get_size()[1]); +// const dim3 block(spmv_block_size); +// +// if (alpha == nullptr && beta == nullptr) { +// kernel::abstract_classical_spmv<<>>( +// a->get_size()[0], as_cuda_type(a->get_const_values()), +// a->get_const_col_idxs(), as_cuda_type(a->get_const_row_ptrs()), +// as_cuda_type(b->get_const_values()), b->get_stride(), +// as_cuda_type(c->get_values()), c->get_stride()); +// +// } else if (alpha != nullptr && beta != nullptr) { +// kernel::abstract_classical_spmv<<>>( +// a->get_size()[0], as_cuda_type(alpha->get_const_values()), +// as_cuda_type(a->get_const_values()), a->get_const_col_idxs(), +// as_cuda_type(a->get_const_row_ptrs()), +// as_cuda_type(b->get_const_values()), b->get_stride(), +// as_cuda_type(beta->get_const_values()), +// as_cuda_type(c->get_values()), c->get_stride()); +// } else { +// GKO_KERNEL_NOT_FOUND; +// } +//} + +GKO_ENABLE_IMPLEMENTATION_SELECTION(select_classical_spmv, classical_spmv); + + +} // namespace host_kernel + + +template +void spmv(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// if (a->get_strategy()->get_name() == "load_balance") { +// components::fill_array(exec, c->get_values(), +// c->get_num_stored_elements(), +// zero()); +// const IndexType nwarps = a->get_num_srow_elements(); +// if (nwarps > 0) { +// const dim3 fbcsr_block(config::warp_size, warps_in_block, 1); +// const dim3 fbcsr_grid(ceildiv(nwarps, warps_in_block), +// b->get_size()[1]); +// kernel::abstract_spmv<<>>( +// nwarps, static_cast(a->get_size()[0]), +// as_cuda_type(a->get_const_values()), a->get_const_col_idxs(), +// as_cuda_type(a->get_const_row_ptrs()), +// as_cuda_type(a->get_const_srow()), +// as_cuda_type(b->get_const_values()), +// as_cuda_type(b->get_stride()), as_cuda_type(c->get_values()), +// as_cuda_type(c->get_stride())); +// } else { +// GKO_NOT_SUPPORTED(nwarps); +// } +// } else if (a->get_strategy()->get_name() == "merge_path") { +// int items_per_thread = +// host_kernel::compute_items_per_thread(exec); +// host_kernel::select_merge_path_spmv( +// compiled_kernels(), +// [&items_per_thread](int compiled_info) { +// return items_per_thread == compiled_info; +// }, +// syn::value_list(), syn::type_list<>(), exec, a, b, c); +// } else if (a->get_strategy()->get_name() == "classical") { +// IndexType max_length_per_row = 0; +// using Tfbcsr = matrix::Fbcsr; +// if (auto strategy = +// std::dynamic_pointer_cast( +// a->get_strategy())) { +// max_length_per_row = strategy->get_max_length_per_row(); +// } else if (auto strategy = std::dynamic_pointer_cast< +// const typename Tfbcsr::automatical>(a->get_strategy())) +// { +// max_length_per_row = strategy->get_max_length_per_row(); +// } else { +// GKO_NOT_SUPPORTED(a->get_strategy()); +// } +// host_kernel::select_classical_spmv( +// classical_kernels(), +// [&max_length_per_row](int compiled_info) { +// return max_length_per_row >= compiled_info; +// }, +// syn::value_list(), syn::type_list<>(), exec, a, b, c); +// } else if (a->get_strategy()->get_name() == "sparselib" || +// a->get_strategy()->get_name() == "cusparse") { +// if (cusparse::is_supported::value) { +// // TODO: add implementation for int64 and multiple RHS +// auto handle = exec->get_cusparse_handle(); +// { +// cusparse::pointer_mode_guard pm_guard(handle); +// const auto alpha = one(); +// const auto beta = zero(); +// // TODO: add implementation for int64 and multiple RHS +// if (b->get_stride() != 1 || c->get_stride() != 1) +// GKO_NOT_IMPLEMENTED; +// +//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) +// auto descr = cusparse::create_mat_descr(); +// auto row_ptrs = a->get_const_row_ptrs(); +// auto col_idxs = a->get_const_col_idxs(); +// cusparse::spmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, +// a->get_size()[0], a->get_size()[1], +// a->get_num_stored_elements(), &alpha, descr, +// a->get_const_values(), row_ptrs, col_idxs, +// b->get_const_values(), &beta, c->get_values()); +// +// cusparse::destroy(descr); +//#else // CUDA_VERSION >= 11000 +// cusparseOperation_t trans = CUSPARSE_OPERATION_NON_TRANSPOSE; +// cusparseSpMVAlg_t alg = CUSPARSE_FBCSRMV_ALG1; +// auto row_ptrs = +// const_cast(a->get_const_row_ptrs()); +// auto col_idxs = +// const_cast(a->get_const_col_idxs()); +// auto values = const_cast(a->get_const_values()); +// auto mat = cusparse::create_fbcsr( +// a->get_size()[0], a->get_size()[1], +// a->get_num_stored_elements(), row_ptrs, col_idxs, values); +// auto b_val = const_cast(b->get_const_values()); +// auto c_val = c->get_values(); +// auto vecb = +// cusparse::create_dnvec(b->get_num_stored_elements(), +// b_val); +// auto vecc = +// cusparse::create_dnvec(c->get_num_stored_elements(), +// c_val); +// size_type buffer_size = 0; +// cusparse::spmv_buffersize(handle, trans, &alpha, +// mat, +// vecb, &beta, vecc, alg, +// &buffer_size); +// +// gko::Array buffer_array(exec, buffer_size); +// auto buffer = buffer_array.get_data(); +// cusparse::spmv(handle, trans, &alpha, mat, vecb, +// &beta, vecc, alg, buffer); +// cusparse::destroy(vecb); +// cusparse::destroy(vecc); +// cusparse::destroy(mat); +//#endif +// } +// } else { +// GKO_NOT_IMPLEMENTED; +// } +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// if (a->get_strategy()->get_name() == "load_balance") { +// dense::scale(exec, beta, c); +// +// const IndexType nwarps = a->get_num_srow_elements(); +// +// if (nwarps > 0) { +// const dim3 fbcsr_block(config::warp_size, warps_in_block, 1); +// const dim3 fbcsr_grid(ceildiv(nwarps, warps_in_block), +// b->get_size()[1]); +// kernel::abstract_spmv<<>>( +// nwarps, static_cast(a->get_size()[0]), +// as_cuda_type(alpha->get_const_values()), +// as_cuda_type(a->get_const_values()), a->get_const_col_idxs(), +// as_cuda_type(a->get_const_row_ptrs()), +// as_cuda_type(a->get_const_srow()), +// as_cuda_type(b->get_const_values()), +// as_cuda_type(b->get_stride()), as_cuda_type(c->get_values()), +// as_cuda_type(c->get_stride())); +// } else { +// GKO_NOT_SUPPORTED(nwarps); +// } +// } else if (a->get_strategy()->get_name() == "sparselib" || +// a->get_strategy()->get_name() == "cusparse") { +// if (cusparse::is_supported::value) { +// // TODO: add implementation for int64 and multiple RHS +// if (b->get_stride() != 1 || c->get_stride() != 1) +// GKO_NOT_IMPLEMENTED; +// +//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) +// auto descr = cusparse::create_mat_descr(); +// auto row_ptrs = a->get_const_row_ptrs(); +// auto col_idxs = a->get_const_col_idxs(); +// cusparse::spmv(exec->get_cusparse_handle(), +// CUSPARSE_OPERATION_NON_TRANSPOSE, a->get_size()[0], +// a->get_size()[1], a->get_num_stored_elements(), +// alpha->get_const_values(), descr, +// a->get_const_values(), row_ptrs, col_idxs, +// b->get_const_values(), beta->get_const_values(), +// c->get_values()); +// +// cusparse::destroy(descr); +//#else // CUDA_VERSION >= 11000 +// cusparseOperation_t trans = CUSPARSE_OPERATION_NON_TRANSPOSE; +// cusparseSpMVAlg_t alg = CUSPARSE_FBCSRMV_ALG1; +// auto row_ptrs = const_cast(a->get_const_row_ptrs()); +// auto col_idxs = const_cast(a->get_const_col_idxs()); +// auto values = const_cast(a->get_const_values()); +// auto mat = cusparse::create_fbcsr(a->get_size()[0], +// a->get_size()[1], +// a->get_num_stored_elements(), +// row_ptrs, col_idxs, values); +// auto b_val = const_cast(b->get_const_values()); +// auto c_val = c->get_values(); +// auto vecb = +// cusparse::create_dnvec(b->get_num_stored_elements(), b_val); +// auto vecc = +// cusparse::create_dnvec(c->get_num_stored_elements(), c_val); +// size_type buffer_size = 0; +// cusparse::spmv_buffersize( +// exec->get_cusparse_handle(), trans, alpha->get_const_values(), +// mat, vecb, beta->get_const_values(), vecc, alg, &buffer_size); +// gko::Array buffer_array(exec, buffer_size); +// auto buffer = buffer_array.get_data(); +// cusparse::spmv( +// exec->get_cusparse_handle(), trans, alpha->get_const_values(), +// mat, vecb, beta->get_const_values(), vecc, alg, buffer); +// cusparse::destroy(vecb); +// cusparse::destroy(vecc); +// cusparse::destroy(mat); +//#endif +// } else { +// GKO_NOT_IMPLEMENTED; +// } +// } else if (a->get_strategy()->get_name() == "classical") { +// IndexType max_length_per_row = 0; +// using Tfbcsr = matrix::Fbcsr; +// if (auto strategy = +// std::dynamic_pointer_cast( +// a->get_strategy())) { +// max_length_per_row = strategy->get_max_length_per_row(); +// } else if (auto strategy = std::dynamic_pointer_cast< +// const typename Tfbcsr::automatical>(a->get_strategy())) +// { +// max_length_per_row = strategy->get_max_length_per_row(); +// } else { +// GKO_NOT_SUPPORTED(a->get_strategy()); +// } +// host_kernel::select_classical_spmv( +// classical_kernels(), +// [&max_length_per_row](int compiled_info) { +// return max_length_per_row >= compiled_info; +// }, +// syn::value_list(), syn::type_list<>(), exec, a, b, c, alpha, +// beta); +// } else if (a->get_strategy()->get_name() == "merge_path") { +// int items_per_thread = +// host_kernel::compute_items_per_thread(exec); +// host_kernel::select_merge_path_spmv( +// compiled_kernels(), +// [&items_per_thread](int compiled_info) { +// return items_per_thread == compiled_info; +// }, +// syn::value_list(), syn::type_list<>(), exec, a, b, c, alpha, +// beta); +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + + +template +void spgemm(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto a_nnz = IndexType(a->get_num_stored_elements()); +// auto a_vals = a->get_const_values(); +// auto a_row_ptrs = a->get_const_row_ptrs(); +// auto a_col_idxs = a->get_const_col_idxs(); +// auto b_vals = b->get_const_values(); +// auto b_row_ptrs = b->get_const_row_ptrs(); +// auto b_col_idxs = b->get_const_col_idxs(); +// auto c_row_ptrs = c->get_row_ptrs(); +// +// if (cusparse::is_supported::value) { +// auto handle = exec->get_cusparse_handle(); +// cusparse::pointer_mode_guard pm_guard(handle); +// +// auto alpha = one(); +// auto a_nnz = static_cast(a->get_num_stored_elements()); +// auto b_nnz = static_cast(b->get_num_stored_elements()); +// auto null_value = static_cast(nullptr); +// auto null_index = static_cast(nullptr); +// auto zero_nnz = IndexType{}; +// auto m = IndexType(a->get_size()[0]); +// auto n = IndexType(b->get_size()[1]); +// auto k = IndexType(a->get_size()[1]); +// matrix::FbcsrBuilder c_builder{c}; +// auto &c_col_idxs_array = c_builder.get_col_idx_array(); +// auto &c_vals_array = c_builder.get_value_array(); +// +//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) +// auto a_descr = cusparse::create_mat_descr(); +// auto b_descr = cusparse::create_mat_descr(); +// auto c_descr = cusparse::create_mat_descr(); +// auto d_descr = cusparse::create_mat_descr(); +// auto info = cusparse::create_spgemm_info(); +// // allocate buffer +// size_type buffer_size{}; +// cusparse::spgemm_buffer_size( +// handle, m, n, k, &alpha, a_descr, a_nnz, a_row_ptrs, a_col_idxs, +// b_descr, b_nnz, b_row_ptrs, b_col_idxs, null_value, d_descr, +// zero_nnz, null_index, null_index, info, buffer_size); +// Array buffer_array(exec, buffer_size); +// auto buffer = buffer_array.get_data(); +// +// // count nnz +// IndexType c_nnz{}; +// cusparse::spgemm_nnz(handle, m, n, k, a_descr, a_nnz, a_row_ptrs, +// a_col_idxs, b_descr, b_nnz, b_row_ptrs, +// b_col_idxs, d_descr, zero_nnz, null_index, +// null_index, c_descr, c_row_ptrs, &c_nnz, info, +// buffer); +// +// // accumulate non-zeros +// c_col_idxs_array.resize_and_reset(c_nnz); +// c_vals_array.resize_and_reset(c_nnz); +// auto c_col_idxs = c_col_idxs_array.get_data(); +// auto c_vals = c_vals_array.get_data(); +// cusparse::spgemm(handle, m, n, k, &alpha, a_descr, a_nnz, a_vals, +// a_row_ptrs, a_col_idxs, b_descr, b_nnz, b_vals, +// b_row_ptrs, b_col_idxs, null_value, d_descr, +// zero_nnz, null_value, null_index, null_index, +// c_descr, c_vals, c_row_ptrs, c_col_idxs, info, +// buffer); +// +// cusparse::destroy(info); +// cusparse::destroy(d_descr); +// cusparse::destroy(c_descr); +// cusparse::destroy(b_descr); +// cusparse::destroy(a_descr); +// +//#else // CUDA_VERSION >= 11000 +// const auto beta = zero(); +// auto spgemm_descr = cusparse::create_spgemm_descr(); +// auto a_descr = cusparse::create_fbcsr(m, k, a_nnz, +// const_cast(a_row_ptrs), +// const_cast(a_col_idxs), +// const_cast(a_vals)); +// auto b_descr = cusparse::create_fbcsr(k, n, b_nnz, +// const_cast(b_row_ptrs), +// const_cast(b_col_idxs), +// const_cast(b_vals)); +// auto c_descr = cusparse::create_fbcsr(m, n, zero_nnz, null_index, +// null_index, null_value); +// +// // estimate work +// size_type buffer1_size{}; +// cusparse::spgemm_work_estimation(handle, &alpha, a_descr, b_descr, +// &beta, c_descr, spgemm_descr, +// buffer1_size, nullptr); +// Array buffer1{exec, buffer1_size}; +// cusparse::spgemm_work_estimation(handle, &alpha, a_descr, b_descr, +// &beta, c_descr, spgemm_descr, +// buffer1_size, buffer1.get_data()); +// +// // compute spgemm +// size_type buffer2_size{}; +// cusparse::spgemm_compute(handle, &alpha, a_descr, b_descr, &beta, +// c_descr, spgemm_descr, buffer1.get_data(), +// buffer2_size, nullptr); +// Array buffer2{exec, buffer2_size}; +// cusparse::spgemm_compute(handle, &alpha, a_descr, b_descr, &beta, +// c_descr, spgemm_descr, buffer1.get_data(), +// buffer2_size, buffer2.get_data()); +// +// // copy data to result +// auto c_nnz = cusparse::sparse_matrix_nnz(c_descr); +// c_col_idxs_array.resize_and_reset(c_nnz); +// c_vals_array.resize_and_reset(c_nnz); +// cusparse::fbcsr_set_pointers(c_descr, c_row_ptrs, +// c_col_idxs_array.get_data(), +// c_vals_array.get_data()); +// +// cusparse::spgemm_copy(handle, &alpha, a_descr, b_descr, &beta, +// c_descr, +// spgemm_descr); +// +// cusparse::destroy(c_descr); +// cusparse::destroy(b_descr); +// cusparse::destroy(a_descr); +// cusparse::destroy(spgemm_descr); +//#endif // CUDA_VERSION >= 11000 +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEMM_KERNEL); + + +namespace { + + +template +void spgeam(syn::value_list, + std::shared_ptr exec, const ValueType *alpha, + const IndexType *a_row_ptrs, const IndexType *a_col_idxs, + const ValueType *a_vals, const ValueType *beta, + const IndexType *b_row_ptrs, const IndexType *b_col_idxs, + const ValueType *b_vals, + matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto m = static_cast(c->get_size()[0]); +// auto c_row_ptrs = c->get_row_ptrs(); +// // count nnz for alpha * A + beta * B +// auto subwarps_per_block = default_block_size / subwarp_size; +// auto num_blocks = ceildiv(m, subwarps_per_block); +// kernel::spgeam_nnz<<>>( +// a_row_ptrs, a_col_idxs, b_row_ptrs, b_col_idxs, m, c_row_ptrs); +// +// // build row pointers +// components::prefix_sum(exec, c_row_ptrs, m + 1); +// +// // accumulate non-zeros for alpha * A + beta * B +// matrix::FbcsrBuilder c_builder{c}; +// auto c_nnz = exec->copy_val_to_host(c_row_ptrs + m); +// c_builder.get_col_idx_array().resize_and_reset(c_nnz); +// c_builder.get_value_array().resize_and_reset(c_nnz); +// auto c_col_idxs = c->get_col_idxs(); +// auto c_vals = c->get_values(); +// kernel::spgeam<<>>( +// as_cuda_type(alpha), a_row_ptrs, a_col_idxs, as_cuda_type(a_vals), +// as_cuda_type(beta), b_row_ptrs, b_col_idxs, as_cuda_type(b_vals), m, +// c_row_ptrs, c_col_idxs, as_cuda_type(c_vals)); +//} + +GKO_ENABLE_IMPLEMENTATION_SELECTION(select_spgeam, spgeam); + + +} // namespace + + +template +void advanced_spgemm(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + const matrix::Dense *beta, + const matrix::Fbcsr *d, + matrix::Fbcsr *c) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// if (cusparse::is_supported::value) { +// auto handle = exec->get_cusparse_handle(); +// cusparse::pointer_mode_guard pm_guard(handle); +// +// auto valpha = exec->copy_val_to_host(alpha->get_const_values()); +// auto a_nnz = IndexType(a->get_num_stored_elements()); +// auto a_vals = a->get_const_values(); +// auto a_row_ptrs = a->get_const_row_ptrs(); +// auto a_col_idxs = a->get_const_col_idxs(); +// auto b_nnz = IndexType(b->get_num_stored_elements()); +// auto b_vals = b->get_const_values(); +// auto b_row_ptrs = b->get_const_row_ptrs(); +// auto b_col_idxs = b->get_const_col_idxs(); +// auto vbeta = exec->copy_val_to_host(beta->get_const_values()); +// auto d_nnz = IndexType(d->get_num_stored_elements()); +// auto d_vals = d->get_const_values(); +// auto d_row_ptrs = d->get_const_row_ptrs(); +// auto d_col_idxs = d->get_const_col_idxs(); +// auto m = IndexType(a->get_size()[0]); +// auto n = IndexType(b->get_size()[1]); +// auto k = IndexType(a->get_size()[1]); +// auto c_row_ptrs = c->get_row_ptrs(); +// +//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) +// matrix::FbcsrBuilder c_builder{c}; +// auto &c_col_idxs_array = c_builder.get_col_idx_array(); +// auto &c_vals_array = c_builder.get_value_array(); +// auto a_descr = cusparse::create_mat_descr(); +// auto b_descr = cusparse::create_mat_descr(); +// auto c_descr = cusparse::create_mat_descr(); +// auto d_descr = cusparse::create_mat_descr(); +// auto info = cusparse::create_spgemm_info(); +// // allocate buffer +// size_type buffer_size{}; +// cusparse::spgemm_buffer_size( +// handle, m, n, k, &valpha, a_descr, a_nnz, a_row_ptrs, a_col_idxs, +// b_descr, b_nnz, b_row_ptrs, b_col_idxs, &vbeta, d_descr, d_nnz, +// d_row_ptrs, d_col_idxs, info, buffer_size); +// Array buffer_array(exec, buffer_size); +// auto buffer = buffer_array.get_data(); +// +// // count nnz +// IndexType c_nnz{}; +// cusparse::spgemm_nnz(handle, m, n, k, a_descr, a_nnz, a_row_ptrs, +// a_col_idxs, b_descr, b_nnz, b_row_ptrs, +// b_col_idxs, d_descr, d_nnz, d_row_ptrs, +// d_col_idxs, c_descr, c_row_ptrs, &c_nnz, info, +// buffer); +// +// // accumulate non-zeros +// c_col_idxs_array.resize_and_reset(c_nnz); +// c_vals_array.resize_and_reset(c_nnz); +// auto c_col_idxs = c_col_idxs_array.get_data(); +// auto c_vals = c_vals_array.get_data(); +// cusparse::spgemm(handle, m, n, k, &valpha, a_descr, a_nnz, a_vals, +// a_row_ptrs, a_col_idxs, b_descr, b_nnz, b_vals, +// b_row_ptrs, b_col_idxs, &vbeta, d_descr, d_nnz, +// d_vals, d_row_ptrs, d_col_idxs, c_descr, c_vals, +// c_row_ptrs, c_col_idxs, info, buffer); +// +// cusparse::destroy(info); +// cusparse::destroy(d_descr); +// cusparse::destroy(c_descr); +// cusparse::destroy(b_descr); +// cusparse::destroy(a_descr); +//#else // CUDA_VERSION >= 11000 +// auto null_value = static_cast(nullptr); +// auto null_index = static_cast(nullptr); +// auto one_val = one(); +// auto zero_val = zero(); +// auto zero_nnz = IndexType{}; +// auto spgemm_descr = cusparse::create_spgemm_descr(); +// auto a_descr = cusparse::create_fbcsr(m, k, a_nnz, +// const_cast(a_row_ptrs), +// const_cast(a_col_idxs), +// const_cast(a_vals)); +// auto b_descr = cusparse::create_fbcsr(k, n, b_nnz, +// const_cast(b_row_ptrs), +// const_cast(b_col_idxs), +// const_cast(b_vals)); +// auto c_descr = cusparse::create_fbcsr(m, n, zero_nnz, null_index, +// null_index, null_value); +// +// // estimate work +// size_type buffer1_size{}; +// cusparse::spgemm_work_estimation(handle, &one_val, a_descr, b_descr, +// &zero_val, c_descr, spgemm_descr, +// buffer1_size, nullptr); +// Array buffer1{exec, buffer1_size}; +// cusparse::spgemm_work_estimation(handle, &one_val, a_descr, b_descr, +// &zero_val, c_descr, spgemm_descr, +// buffer1_size, buffer1.get_data()); +// +// // compute spgemm +// size_type buffer2_size{}; +// cusparse::spgemm_compute(handle, &one_val, a_descr, b_descr, +// &zero_val, +// c_descr, spgemm_descr, buffer1.get_data(), +// buffer2_size, nullptr); +// Array buffer2{exec, buffer2_size}; +// cusparse::spgemm_compute(handle, &one_val, a_descr, b_descr, +// &zero_val, +// c_descr, spgemm_descr, buffer1.get_data(), +// buffer2_size, buffer2.get_data()); +// +// // write result to temporary storage +// auto c_tmp_nnz = cusparse::sparse_matrix_nnz(c_descr); +// Array c_tmp_row_ptrs_array(exec, m + 1); +// Array c_tmp_col_idxs_array(exec, c_tmp_nnz); +// Array c_tmp_vals_array(exec, c_tmp_nnz); +// cusparse::fbcsr_set_pointers(c_descr, c_tmp_row_ptrs_array.get_data(), +// c_tmp_col_idxs_array.get_data(), +// c_tmp_vals_array.get_data()); +// +// cusparse::spgemm_copy(handle, &one_val, a_descr, b_descr, &zero_val, +// c_descr, spgemm_descr); +// +// cusparse::destroy(c_descr); +// cusparse::destroy(b_descr); +// cusparse::destroy(a_descr); +// cusparse::destroy(spgemm_descr); +// +// auto spgeam_total_nnz = c_tmp_nnz + d->get_num_stored_elements(); +// auto nnz_per_row = spgeam_total_nnz / m; +// select_spgeam( +// spgeam_kernels(), +// [&](int compiled_subwarp_size) { +// return compiled_subwarp_size >= nnz_per_row || +// compiled_subwarp_size == config::warp_size; +// }, +// syn::value_list(), syn::type_list<>(), exec, +// alpha->get_const_values(), c_tmp_row_ptrs_array.get_const_data(), +// c_tmp_col_idxs_array.get_const_data(), +// c_tmp_vals_array.get_const_data(), beta->get_const_values(), +// d_row_ptrs, d_col_idxs, d_vals, c); +//#endif // CUDA_VERSION >= 11000 +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL); + + +template +void spgeam(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *beta, + const matrix::Fbcsr *b, + matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto total_nnz = +// a->get_num_stored_elements() + b->get_num_stored_elements(); +// auto nnz_per_row = total_nnz / a->get_size()[0]; +// select_spgeam( +// spgeam_kernels(), +// [&](int compiled_subwarp_size) { +// return compiled_subwarp_size >= nnz_per_row || +// compiled_subwarp_size == config::warp_size; +// }, +// syn::value_list(), syn::type_list<>(), exec, +// alpha->get_const_values(), a->get_const_row_ptrs(), +// a->get_const_col_idxs(), a->get_const_values(), +// beta->get_const_values(), b->get_const_row_ptrs(), +// b->get_const_col_idxs(), b->get_const_values(), c); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEAM_KERNEL); + + +template +void convert_row_ptrs_to_idxs(std::shared_ptr exec, + const IndexType *ptrs, size_type num_rows, + IndexType *idxs) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto grid_dim = ceildiv(num_rows, default_block_size); +// +// kernel::convert_row_ptrs_to_idxs<<>>( +// num_rows, as_cuda_type(ptrs), as_cuda_type(idxs)); +//} + + +template +void convert_to_coo(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Coo *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = result->get_size()[0]; +// +// auto row_idxs = result->get_row_idxs(); +// const auto source_row_ptrs = source->get_const_row_ptrs(); +// +// convert_row_ptrs_to_idxs(exec, source_row_ptrs, num_rows, row_idxs); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL); + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Dense *result) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto num_rows = result->get_size()[0]; +// const auto num_cols = result->get_size()[1]; +// const auto stride = result->get_stride(); +// const auto row_ptrs = source->get_const_row_ptrs(); +// const auto col_idxs = source->get_const_col_idxs(); +// const auto vals = source->get_const_values(); +// +// const dim3 block_size(config::warp_size, +// config::max_block_size / config::warp_size, 1); +// const dim3 init_grid_dim(ceildiv(num_cols, block_size.x), +// ceildiv(num_rows, block_size.y), 1); +// kernel::initialize_zero_dense<<>>( +// num_rows, num_cols, stride, as_cuda_type(result->get_values())); +// +// auto grid_dim = ceildiv(num_rows, default_block_size); +// kernel::fill_in_dense<<>>( +// num_rows, as_cuda_type(row_ptrs), as_cuda_type(col_idxs), +// as_cuda_type(vals), stride, as_cuda_type(result->get_values())); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_sellp(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Sellp *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto num_rows = result->get_size()[0]; +// const auto num_cols = result->get_size()[1]; +// +// auto result_values = result->get_values(); +// auto result_col_idxs = result->get_col_idxs(); +// auto slice_lengths = result->get_slice_lengths(); +// auto slice_sets = result->get_slice_sets(); +// +// const auto slice_size = (result->get_slice_size() == 0) +// ? matrix::default_slice_size +// : result->get_slice_size(); +// const auto stride_factor = (result->get_stride_factor() == 0) +// ? matrix::default_stride_factor +// : result->get_stride_factor(); +// const int slice_num = ceildiv(num_rows, slice_size); +// +// const auto source_values = source->get_const_values(); +// const auto source_row_ptrs = source->get_const_row_ptrs(); +// const auto source_col_idxs = source->get_const_col_idxs(); +// +// auto nnz_per_row = Array(exec, num_rows); +// auto grid_dim = ceildiv(num_rows, default_block_size); +// +// if (grid_dim > 0) { +// kernel::calculate_nnz_per_row<<>>( +// num_rows, as_cuda_type(source_row_ptrs), +// as_cuda_type(nnz_per_row.get_data())); +// } +// +// grid_dim = slice_num; +// +// if (grid_dim > 0) { +// kernel::calculate_slice_lengths<<>>( +// num_rows, slice_size, stride_factor, +// as_cuda_type(nnz_per_row.get_const_data()), +// as_cuda_type(slice_lengths), as_cuda_type(slice_sets)); +// } +// +// components::prefix_sum(exec, slice_sets, slice_num + 1); +// +// grid_dim = ceildiv(num_rows, default_block_size); +// if (grid_dim > 0) { +// kernel::fill_in_sellp<<>>( +// num_rows, slice_size, as_cuda_type(source_values), +// as_cuda_type(source_row_ptrs), as_cuda_type(source_col_idxs), +// as_cuda_type(slice_lengths), as_cuda_type(slice_sets), +// as_cuda_type(result_col_idxs), as_cuda_type(result_values)); +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL); + + +template +void convert_to_ell(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Ell *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto source_values = source->get_const_values(); +// const auto source_row_ptrs = source->get_const_row_ptrs(); +// const auto source_col_idxs = source->get_const_col_idxs(); +// +// auto result_values = result->get_values(); +// auto result_col_idxs = result->get_col_idxs(); +// const auto stride = result->get_stride(); +// const auto max_nnz_per_row = result->get_num_stored_elements_per_row(); +// const auto num_rows = result->get_size()[0]; +// const auto num_cols = result->get_size()[1]; +// +// const auto init_grid_dim = +// ceildiv(max_nnz_per_row * num_rows, default_block_size); +// +// kernel::initialize_zero_ell<<>>( +// max_nnz_per_row, stride, as_cuda_type(result_values), +// as_cuda_type(result_col_idxs)); +// +// const auto grid_dim = +// ceildiv(num_rows * config::warp_size, default_block_size); +// +// kernel::fill_in_ell<<>>( +// num_rows, stride, as_cuda_type(source_values), +// as_cuda_type(source_row_ptrs), as_cuda_type(source_col_idxs), +// as_cuda_type(result_values), as_cuda_type(result_col_idxs)); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL); + + +template +void calculate_total_cols(std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result, size_type stride_factor, + size_type slice_size) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto num_rows = source->get_size()[0]; +// +// if (num_rows == 0) { +// *result = 0; +// return; +// } +// +// const auto slice_num = ceildiv(num_rows, slice_size); +// const auto row_ptrs = source->get_const_row_ptrs(); +// +// auto nnz_per_row = Array(exec, num_rows); +// auto grid_dim = ceildiv(num_rows, default_block_size); +// +// kernel::calculate_nnz_per_row<<>>( +// num_rows, as_cuda_type(row_ptrs), +// as_cuda_type(nnz_per_row.get_data())); +// +// grid_dim = ceildiv(slice_num * config::warp_size, default_block_size); +// auto max_nnz_per_slice = Array(exec, slice_num); +// +// kernel::reduce_max_nnz_per_slice<<>>( +// num_rows, slice_size, stride_factor, +// as_cuda_type(nnz_per_row.get_const_data()), +// as_cuda_type(max_nnz_per_slice.get_data())); +// +// grid_dim = ceildiv(slice_num, default_block_size); +// auto block_results = Array(exec, grid_dim); +// +// kernel::reduce_total_cols<<>>( +// slice_num, as_cuda_type(max_nnz_per_slice.get_const_data()), +// as_cuda_type(block_results.get_data())); +// +// auto d_result = Array(exec, 1); +// +// kernel::reduce_total_cols<<<1, default_block_size>>>( +// grid_dim, as_cuda_type(block_results.get_const_data()), +// as_cuda_type(d_result.get_data())); +// +// *result = exec->copy_val_to_host(d_result.get_const_data()); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL); + + +template +void transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// if (cusparse::is_supported::value) { +//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) +// cusparseAction_t copyValues = CUSPARSE_ACTION_NUMERIC; +// cusparseIndexBase_t idxBase = CUSPARSE_INDEX_BASE_ZERO; +// +// cusparse::transpose( +// exec->get_cusparse_handle(), orig->get_size()[0], +// orig->get_size()[1], orig->get_num_stored_elements(), +// orig->get_const_values(), orig->get_const_row_ptrs(), +// orig->get_const_col_idxs(), trans->get_values(), +// trans->get_row_ptrs(), trans->get_col_idxs(), copyValues, +// idxBase); +//#else // CUDA_VERSION >= 11000 +// cudaDataType_t cu_value = +// gko::kernels::cuda::cuda_data_type(); +// cusparseAction_t copyValues = CUSPARSE_ACTION_NUMERIC; +// cusparseIndexBase_t idxBase = CUSPARSE_INDEX_BASE_ZERO; +// cusparseFbcsr2CscAlg_t alg = CUSPARSE_FBCSR2CSC_ALG1; +// size_type buffer_size = 0; +// cusparse::transpose_buffersize( +// exec->get_cusparse_handle(), orig->get_size()[0], +// orig->get_size()[1], orig->get_num_stored_elements(), +// orig->get_const_values(), orig->get_const_row_ptrs(), +// orig->get_const_col_idxs(), trans->get_values(), +// trans->get_row_ptrs(), trans->get_col_idxs(), cu_value, +// copyValues, idxBase, alg, &buffer_size); +// Array buffer_array(exec, buffer_size); +// auto buffer = buffer_array.get_data(); +// cusparse::transpose( +// exec->get_cusparse_handle(), orig->get_size()[0], +// orig->get_size()[1], orig->get_num_stored_elements(), +// orig->get_const_values(), orig->get_const_row_ptrs(), +// orig->get_const_col_idxs(), trans->get_values(), +// trans->get_row_ptrs(), trans->get_col_idxs(), cu_value, +// copyValues, idxBase, alg, buffer); +//#endif +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// if (cusparse::is_supported::value) { +// const dim3 block_size(default_block_size, 1, 1); +// const dim3 grid_size( +// ceildiv(trans->get_num_stored_elements(), block_size.x), 1, 1); +// +//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) +// cusparseAction_t copyValues = CUSPARSE_ACTION_NUMERIC; +// cusparseIndexBase_t idxBase = CUSPARSE_INDEX_BASE_ZERO; +// +// cusparse::transpose( +// exec->get_cusparse_handle(), orig->get_size()[0], +// orig->get_size()[1], orig->get_num_stored_elements(), +// orig->get_const_values(), orig->get_const_row_ptrs(), +// orig->get_const_col_idxs(), trans->get_values(), +// trans->get_row_ptrs(), trans->get_col_idxs(), copyValues, +// idxBase); +//#else // CUDA_VERSION >= 11000 +// cudaDataType_t cu_value = +// gko::kernels::cuda::cuda_data_type(); +// cusparseAction_t copyValues = CUSPARSE_ACTION_NUMERIC; +// cusparseIndexBase_t idxBase = CUSPARSE_INDEX_BASE_ZERO; +// cusparseFbcsr2CscAlg_t alg = CUSPARSE_FBCSR2CSC_ALG1; +// size_type buffer_size = 0; +// cusparse::transpose_buffersize( +// exec->get_cusparse_handle(), orig->get_size()[0], +// orig->get_size()[1], orig->get_num_stored_elements(), +// orig->get_const_values(), orig->get_const_row_ptrs(), +// orig->get_const_col_idxs(), trans->get_values(), +// trans->get_row_ptrs(), trans->get_col_idxs(), cu_value, +// copyValues, idxBase, alg, &buffer_size); +// Array buffer_array(exec, buffer_size); +// auto buffer = buffer_array.get_data(); +// cusparse::transpose( +// exec->get_cusparse_handle(), orig->get_size()[0], +// orig->get_size()[1], orig->get_num_stored_elements(), +// orig->get_const_values(), orig->get_const_row_ptrs(), +// orig->get_const_col_idxs(), trans->get_values(), +// trans->get_row_ptrs(), trans->get_col_idxs(), cu_value, +// copyValues, idxBase, alg, buffer); +//#endif +// +// conjugate_kernel<<>>( +// trans->get_num_stored_elements(), +// as_cuda_type(trans->get_values())); +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + + +template +void row_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *row_permuted) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); + + +template +void column_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *column_permuted) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL); + + +template +void inverse_row_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *row_permuted) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL); + + +template +void inverse_column_permute( + std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL); + + +template +void calculate_max_nnz_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto num_rows = source->get_size()[0]; +// +// auto nnz_per_row = Array(exec, num_rows); +// auto block_results = Array(exec, default_block_size); +// auto d_result = Array(exec, 1); +// +// const auto grid_dim = ceildiv(num_rows, default_block_size); +// kernel::calculate_nnz_per_row<<>>( +// num_rows, as_cuda_type(source->get_const_row_ptrs()), +// as_cuda_type(nnz_per_row.get_data())); +// +// const auto n = ceildiv(num_rows, default_block_size); +// const auto reduce_dim = n <= default_block_size ? n : default_block_size; +// kernel::reduce_max_nnz<<>>( +// num_rows, as_cuda_type(nnz_per_row.get_const_data()), +// as_cuda_type(block_results.get_data())); +// +// kernel::reduce_max_nnz<<<1, default_block_size>>>( +// reduce_dim, as_cuda_type(block_results.get_const_data()), +// as_cuda_type(d_result.get_data())); +// +// *result = exec->copy_val_to_host(d_result.get_const_data()); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void convert_to_hybrid(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Hybrid *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto ell_val = result->get_ell_values(); +// auto ell_col = result->get_ell_col_idxs(); +// auto coo_val = result->get_coo_values(); +// auto coo_col = result->get_coo_col_idxs(); +// auto coo_row = result->get_coo_row_idxs(); +// const auto stride = result->get_ell_stride(); +// const auto max_nnz_per_row = +// result->get_ell_num_stored_elements_per_row(); const auto num_rows = +// result->get_size()[0]; const auto coo_num_stored_elements = +// result->get_coo_num_stored_elements(); auto grid_dim = +// ceildiv(max_nnz_per_row * num_rows, default_block_size); +// +// kernel::initialize_zero_ell<<>>( +// max_nnz_per_row, stride, as_cuda_type(ell_val), +// as_cuda_type(ell_col)); +// +// grid_dim = ceildiv(num_rows, default_block_size); +// auto coo_offset = Array(exec, num_rows); +// kernel::calculate_hybrid_coo_row_nnz<<>>( +// num_rows, max_nnz_per_row, as_cuda_type(source->get_const_row_ptrs()), +// as_cuda_type(coo_offset.get_data())); +// +// components::prefix_sum(exec, coo_offset.get_data(), num_rows); +// +// grid_dim = ceildiv(num_rows * config::warp_size, default_block_size); +// kernel::fill_in_hybrid<<>>( +// num_rows, stride, max_nnz_per_row, +// as_cuda_type(source->get_const_values()), +// as_cuda_type(source->get_const_row_ptrs()), +// as_cuda_type(source->get_const_col_idxs()), +// as_cuda_type(coo_offset.get_const_data()), as_cuda_type(ell_val), +// as_cuda_type(ell_col), as_cuda_type(coo_val), as_cuda_type(coo_col), +// as_cuda_type(coo_row)); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL); + + +template +void calculate_nonzeros_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + Array *result) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto num_rows = source->get_size()[0]; +// auto row_ptrs = source->get_const_row_ptrs(); +// auto grid_dim = ceildiv(num_rows, default_block_size); +// +// kernel::calculate_nnz_per_row<<>>( +// num_rows, as_cuda_type(row_ptrs), as_cuda_type(result->get_data())); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void sort_by_column_index(std::shared_ptr exec, + matrix::Fbcsr *to_sort) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// if (cusparse::is_supported::value) { +// auto handle = exec->get_cusparse_handle(); +// auto descr = cusparse::create_mat_descr(); +// auto m = IndexType(to_sort->get_size()[0]); +// auto n = IndexType(to_sort->get_size()[1]); +// auto nnz = IndexType(to_sort->get_num_stored_elements()); +// auto row_ptrs = to_sort->get_const_row_ptrs(); +// auto col_idxs = to_sort->get_col_idxs(); +// auto vals = to_sort->get_values(); +// +// // copy values +// Array tmp_vals_array(exec, nnz); +// exec->copy(nnz, vals, tmp_vals_array.get_data()); +// auto tmp_vals = tmp_vals_array.get_const_data(); +// +// // init identity permutation +// Array permutation_array(exec, nnz); +// auto permutation = permutation_array.get_data(); +// cusparse::create_identity_permutation(handle, nnz, permutation); +// +// // allocate buffer +// size_type buffer_size{}; +// cusparse::fbcsrsort_buffer_size(handle, m, n, nnz, row_ptrs, col_idxs, +// buffer_size); +// Array buffer_array{exec, buffer_size}; +// auto buffer = buffer_array.get_data(); +// +// // sort column indices +// cusparse::fbcsrsort(handle, m, n, nnz, descr, row_ptrs, col_idxs, +// permutation, buffer); +// +// // sort values +//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) +// cusparse::gather(handle, nnz, tmp_vals, vals, permutation); +//#else // CUDA_VERSION >= 11000 +// auto val_vec = cusparse::create_spvec(nnz, nnz, permutation, vals); +// auto tmp_vec = +// cusparse::create_dnvec(nnz, const_cast(tmp_vals)); +// cusparse::gather(handle, tmp_vec, val_vec); +//#endif +// +// cusparse::destroy(descr); +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::Fbcsr *to_check, + bool *is_sorted) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// *is_sorted = true; +// auto cpu_array = Array::view(exec->get_master(), 1, is_sorted); +// auto gpu_array = Array{exec, cpu_array}; +// auto block_size = default_block_size; +// auto num_rows = static_cast(to_check->get_size()[0]); +// auto num_blocks = ceildiv(num_rows, block_size); +// kernel::check_unsorted<<>>( +// to_check->get_const_row_ptrs(), to_check->get_const_col_idxs(), +// num_rows, gpu_array.get_data()); +// cpu_array = gpu_array; +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto nnz = orig->get_num_stored_elements(); +// const auto diag_size = diag->get_size()[0]; +// const auto num_blocks = +// ceildiv(config::warp_size * diag_size, default_block_size); +// +// const auto orig_values = orig->get_const_values(); +// const auto orig_row_ptrs = orig->get_const_row_ptrs(); +// const auto orig_col_idxs = orig->get_const_col_idxs(); +// auto diag_values = diag->get_values(); +// +// kernel::extract_diagonal<<>>( +// diag_size, nnz, as_cuda_type(orig_values), +// as_cuda_type(orig_row_ptrs), as_cuda_type(orig_col_idxs), +// as_cuda_type(diag_values)); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/test/matrix/CMakeLists.txt b/cuda/test/matrix/CMakeLists.txt index 65ce218ac71..5be841b3d00 100644 --- a/cuda/test/matrix/CMakeLists.txt +++ b/cuda/test/matrix/CMakeLists.txt @@ -3,5 +3,6 @@ ginkgo_create_test(csr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) +ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(hybrid_kernels) ginkgo_create_test(sellp_kernels) diff --git a/cuda/test/matrix/fbcsr_kernels.cpp b/cuda/test/matrix/fbcsr_kernels.cpp new file mode 100644 index 00000000000..043412c6402 --- /dev/null +++ b/cuda/test/matrix/fbcsr_kernels.cpp @@ -0,0 +1,883 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/fbcsr_kernels.hpp" +#include "cuda/test/utils.hpp" + + +namespace { + + +class Fbcsr : public ::testing::Test { +protected: + using Vec = gko::matrix::Dense<>; + using Mtx = gko::matrix::Fbcsr<>; + using ComplexVec = gko::matrix::Dense>; + using ComplexMtx = gko::matrix::Fbcsr>; + + Fbcsr() : mtx_size(532, 231), rand_engine(42) {} + + void SetUp() + { + ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); + ref = gko::ReferenceExecutor::create(); + cuda = gko::CudaExecutor::create(0, ref); + } + + void TearDown() + { + if (cuda != nullptr) { + ASSERT_NO_THROW(cuda->synchronize()); + } + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(min_nnz_row, num_cols), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + void set_up_apply_data(std::shared_ptr strategy, + int num_vectors = 1) + { + mtx = Mtx::create(ref, strategy); + mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 1)); + square_mtx = Mtx::create(ref, strategy); + square_mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[0], 1)); + expected = gen_mtx(mtx_size[0], num_vectors, 1); + y = gen_mtx(mtx_size[1], num_vectors, 1); + alpha = gko::initialize({2.0}, ref); + beta = gko::initialize({-1.0}, ref); + dmtx = Mtx::create(cuda, strategy); + dmtx->copy_from(mtx.get()); + square_dmtx = Mtx::create(cuda, strategy); + square_dmtx->copy_from(square_mtx.get()); + dresult = Vec::create(cuda); + dresult->copy_from(expected.get()); + dy = Vec::create(cuda); + dy->copy_from(y.get()); + dalpha = Vec::create(cuda); + dalpha->copy_from(alpha.get()); + dbeta = Vec::create(cuda); + dbeta->copy_from(beta.get()); + } + + void set_up_apply_complex_data( + std::shared_ptr strategy) + { + complex_mtx = ComplexMtx::create(ref, strategy); + complex_mtx->copy_from( + gen_mtx(mtx_size[0], mtx_size[1], 1)); + complex_dmtx = ComplexMtx::create(cuda, strategy); + complex_dmtx->copy_from(complex_mtx.get()); + } + + struct matrix_pair { + std::unique_ptr ref; + std::unique_ptr cuda; + }; + + matrix_pair gen_unsorted_mtx() + { + constexpr int min_nnz_per_row = 2; // Must be at least 2 + auto local_mtx_ref = + gen_mtx(mtx_size[0], mtx_size[1], min_nnz_per_row); + for (size_t row = 0; row < mtx_size[0]; ++row) { + const auto row_ptrs = local_mtx_ref->get_const_row_ptrs(); + const auto start_row = row_ptrs[row]; + auto col_idx = local_mtx_ref->get_col_idxs() + start_row; + auto vals = local_mtx_ref->get_values() + start_row; + const auto nnz_in_this_row = row_ptrs[row + 1] - row_ptrs[row]; + auto swap_idx_dist = + std::uniform_int_distribution<>(0, nnz_in_this_row - 1); + // shuffle `nnz_in_this_row / 2` times + for (size_t perm = 0; perm < nnz_in_this_row; perm += 2) { + const auto idx1 = swap_idx_dist(rand_engine); + const auto idx2 = swap_idx_dist(rand_engine); + std::swap(col_idx[idx1], col_idx[idx2]); + std::swap(vals[idx1], vals[idx2]); + } + } + auto local_mtx_cuda = Mtx::create(cuda); + local_mtx_cuda->copy_from(local_mtx_ref.get()); + + return {std::move(local_mtx_ref), std::move(local_mtx_cuda)}; + } + + std::shared_ptr ref; + std::shared_ptr cuda; + + const gko::dim<2> mtx_size; + std::ranlux48 rand_engine; + + std::unique_ptr mtx; + std::unique_ptr complex_mtx; + std::unique_ptr square_mtx; + std::unique_ptr expected; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr beta; + + std::unique_ptr dmtx; + std::unique_ptr complex_dmtx; + std::unique_ptr square_dmtx; + std::unique_ptr dresult; + std::unique_ptr dy; + std::unique_ptr dalpha; + std::unique_ptr dbeta; +}; + + +TEST_F(Fbcsr, StrategyAfterCopyIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(cuda)); +// +// ASSERT_EQ(mtx->get_strategy()->get_name(), +// dmtx->get_strategy()->get_name()); +//} + + +TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithLoadBalance) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(cuda)); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithLoadBalance) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(cuda)); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithCusparse) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithCusparse) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithMergePath) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithMergePath) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithClassical) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithClassical) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithAutomatical) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(cuda)); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRefWithLoadBalance) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(cuda), 3); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithLoadBalance) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(cuda), 3); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRefWithClassical) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(), 3); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithClassical) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(), 3); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRefWithMergePath) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(), 3); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithMergePath) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(), 3); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyToFbcsrMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto trans = mtx->transpose(); +// auto d_trans = dmtx->transpose(); +// +// mtx->apply(alpha.get(), trans.get(), beta.get(), square_mtx.get()); +// dmtx->apply(dalpha.get(), d_trans.get(), dbeta.get(), square_dmtx.get()); +// +// GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, 1e-14); +// GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); +// ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, SimpleApplyToFbcsrMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto trans = mtx->transpose(); +// auto d_trans = dmtx->transpose(); +// +// mtx->apply(trans.get(), square_mtx.get()); +// dmtx->apply(d_trans.get(), square_dmtx.get()); +// +// GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, 1e-14); +// GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); +// ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, AdvancedApplyToIdentityMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto a = gen_mtx(mtx_size[0], mtx_size[1], 0); +// auto b = gen_mtx(mtx_size[0], mtx_size[1], 0); +// auto da = Mtx::create(cuda); +// auto db = Mtx::create(cuda); +// da->copy_from(a.get()); +// db->copy_from(b.get()); +// auto id = gko::matrix::Identity::create(ref, +// mtx_size[1]); auto did = +// gko::matrix::Identity::create(cuda, mtx_size[1]); +// +// a->apply(alpha.get(), id.get(), beta.get(), b.get()); +// da->apply(dalpha.get(), did.get(), dbeta.get(), db.get()); +// +// GKO_ASSERT_MTX_NEAR(b, db, 1e-14); +// GKO_ASSERT_MTX_EQ_SPARSITY(b, db); +// ASSERT_TRUE(db->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, TransposeIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(cuda)); +// +// auto trans = mtx->transpose(); +// auto d_trans = dmtx->transpose(); +// +// GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), +// static_cast(trans.get()), 0.0); +// ASSERT_TRUE(static_cast(d_trans.get())->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, ConjugateTransposeIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_complex_data(std::make_shared(cuda)); +// +// auto trans = complex_mtx->conj_transpose(); +// auto d_trans = complex_dmtx->conj_transpose(); +// +// GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), +// static_cast(trans.get()), 0.0); +// ASSERT_TRUE( +// static_cast(d_trans.get())->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, ConvertToDenseIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto dense_mtx = gko::matrix::Dense<>::create(ref); +// auto ddense_mtx = gko::matrix::Dense<>::create(cuda); +// +// mtx->convert_to(dense_mtx.get()); +// dmtx->convert_to(ddense_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToDenseIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto dense_mtx = gko::matrix::Dense<>::create(ref); +// auto ddense_mtx = gko::matrix::Dense<>::create(cuda); +// +// mtx->move_to(dense_mtx.get()); +// dmtx->move_to(ddense_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, ConvertToEllIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto ell_mtx = gko::matrix::Ell<>::create(ref); +// auto dell_mtx = gko::matrix::Ell<>::create(cuda); +// +// mtx->convert_to(ell_mtx.get()); +// dmtx->convert_to(dell_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(ell_mtx.get(), dell_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToEllIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto ell_mtx = gko::matrix::Ell<>::create(ref); +// auto dell_mtx = gko::matrix::Ell<>::create(cuda); +// +// mtx->move_to(ell_mtx.get()); +// dmtx->move_to(dell_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(ell_mtx.get(), dell_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, ConvertToSparsityFbcsrIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(ref); +// auto d_sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(cuda); +// +// mtx->convert_to(sparsity_mtx.get()); +// dmtx->convert_to(d_sparsity_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(sparsity_mtx.get(), d_sparsity_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToSparsityFbcsrIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(ref); +// auto d_sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(cuda); +// +// mtx->move_to(sparsity_mtx.get()); +// dmtx->move_to(d_sparsity_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(sparsity_mtx.get(), d_sparsity_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, CalculateMaxNnzPerRowIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// gko::size_type max_nnz_per_row; +// gko::size_type dmax_nnz_per_row; +// +// gko::kernels::reference::fbcsr::calculate_max_nnz_per_row(ref, mtx.get(), +// &max_nnz_per_row); +// gko::kernels::cuda::fbcsr::calculate_max_nnz_per_row(cuda, dmtx.get(), +// &dmax_nnz_per_row); +// +// ASSERT_EQ(max_nnz_per_row, dmax_nnz_per_row); +//} + + +TEST_F(Fbcsr, ConvertToCooIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto coo_mtx = gko::matrix::Coo<>::create(ref); +// auto dcoo_mtx = gko::matrix::Coo<>::create(cuda); +// +// mtx->convert_to(coo_mtx.get()); +// dmtx->convert_to(dcoo_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToCooIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto coo_mtx = gko::matrix::Coo<>::create(ref); +// auto dcoo_mtx = gko::matrix::Coo<>::create(cuda); +// +// mtx->move_to(coo_mtx.get()); +// dmtx->move_to(dcoo_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, ConvertToSellpIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto sellp_mtx = gko::matrix::Sellp<>::create(ref); +// auto dsellp_mtx = gko::matrix::Sellp<>::create(cuda); +// +// mtx->convert_to(sellp_mtx.get()); +// dmtx->convert_to(dsellp_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(sellp_mtx.get(), dsellp_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToSellpIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto sellp_mtx = gko::matrix::Sellp<>::create(ref); +// auto dsellp_mtx = gko::matrix::Sellp<>::create(cuda); +// +// mtx->move_to(sellp_mtx.get()); +// dmtx->move_to(dsellp_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(sellp_mtx.get(), dsellp_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, ConvertsEmptyToSellp) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto dempty_mtx = Mtx::create(cuda); +// auto dsellp_mtx = gko::matrix::Sellp<>::create(cuda); +// +// dempty_mtx->convert_to(dsellp_mtx.get()); +// +// ASSERT_EQ(cuda->copy_val_to_host(dsellp_mtx->get_const_slice_sets()), 0); +// ASSERT_FALSE(dsellp_mtx->get_size()); +//} + + +TEST_F(Fbcsr, CalculateTotalColsIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// gko::size_type total_cols; +// gko::size_type dtotal_cols; +// +// gko::kernels::reference::fbcsr::calculate_total_cols( +// ref, mtx.get(), &total_cols, 2, gko::matrix::default_slice_size); +// gko::kernels::cuda::fbcsr::calculate_total_cols( +// cuda, dmtx.get(), &dtotal_cols, 2, gko::matrix::default_slice_size); +// +// ASSERT_EQ(total_cols, dtotal_cols); +//} + + +TEST_F(Fbcsr, CalculatesNonzerosPerRow) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// gko::Array row_nnz(ref, mtx->get_size()[0]); +// gko::Array drow_nnz(cuda, dmtx->get_size()[0]); +// +// gko::kernels::reference::fbcsr::calculate_nonzeros_per_row(ref, mtx.get(), +// &row_nnz); +// gko::kernels::cuda::fbcsr::calculate_nonzeros_per_row(cuda, dmtx.get(), +// &drow_nnz); +// +// GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +//} + + +TEST_F(Fbcsr, ConvertToHybridIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Hybrid_type = gko::matrix::Hybrid<>; +// set_up_apply_data(std::make_shared()); +// auto hybrid_mtx = Hybrid_type::create( +// ref, std::make_shared(2)); +// auto dhybrid_mtx = Hybrid_type::create( +// cuda, std::make_shared(2)); +// +// mtx->convert_to(hybrid_mtx.get()); +// dmtx->convert_to(dhybrid_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToHybridIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Hybrid_type = gko::matrix::Hybrid<>; +// set_up_apply_data(std::make_shared()); +// auto hybrid_mtx = Hybrid_type::create( +// ref, std::make_shared(2)); +// auto dhybrid_mtx = Hybrid_type::create( +// cuda, std::make_shared(2)); +// +// mtx->move_to(hybrid_mtx.get()); +// dmtx->move_to(dhybrid_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, RecognizeSortedMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// bool is_sorted_cuda{}; +// bool is_sorted_ref{}; +// +// is_sorted_ref = mtx->is_sorted_by_column_index(); +// is_sorted_cuda = dmtx->is_sorted_by_column_index(); +// +// ASSERT_EQ(is_sorted_ref, is_sorted_cuda); +//} + + +TEST_F(Fbcsr, RecognizeUnsortedMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto uns_mtx = gen_unsorted_mtx(); +// bool is_sorted_cuda{}; +// bool is_sorted_ref{}; +// +// is_sorted_ref = uns_mtx.ref->is_sorted_by_column_index(); +// is_sorted_cuda = uns_mtx.cuda->is_sorted_by_column_index(); +// +// ASSERT_EQ(is_sorted_ref, is_sorted_cuda); +//} + + +TEST_F(Fbcsr, SortSortedMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// mtx->sort_by_column_index(); +// dmtx->sort_by_column_index(); +// +// // Values must be unchanged, therefore, tolerance is `0` +// GKO_ASSERT_MTX_NEAR(mtx, dmtx, 0); +//} + + +TEST_F(Fbcsr, SortUnsortedMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto uns_mtx = gen_unsorted_mtx(); +// +// uns_mtx.ref->sort_by_column_index(); +// uns_mtx.cuda->sort_by_column_index(); +// +// // Values must be unchanged, therefore, tolerance is `0` +// GKO_ASSERT_MTX_NEAR(uns_mtx.ref, uns_mtx.cuda, 0); +//} + + +TEST_F(Fbcsr, OneAutomaticalWorksWithDifferentMatrices) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto automatical = std::make_shared(); +// auto row_len_limit = std::max(automatical->nvidia_row_len_limit, +// automatical->amd_row_len_limit); +// auto load_balance_mtx = Mtx::create(ref); +// auto classical_mtx = Mtx::create(ref); +// load_balance_mtx->copy_from( +// gen_mtx(1, row_len_limit + 1000, row_len_limit + 1)); +// classical_mtx->copy_from(gen_mtx(50, 50, 1)); +// auto load_balance_mtx_d = Mtx::create(cuda); +// auto classical_mtx_d = Mtx::create(cuda); +// load_balance_mtx_d->copy_from(load_balance_mtx.get()); +// classical_mtx_d->copy_from(classical_mtx.get()); +// +// load_balance_mtx_d->set_strategy(automatical); +// classical_mtx_d->set_strategy(automatical); +// +// EXPECT_EQ("load_balance", load_balance_mtx_d->get_strategy()->get_name()); +// EXPECT_EQ("classical", classical_mtx_d->get_strategy()->get_name()); +// ASSERT_NE(load_balance_mtx_d->get_strategy().get(), +// classical_mtx_d->get_strategy().get()); +//} + + +TEST_F(Fbcsr, ExtractDiagonalIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// auto diag = mtx->extract_diagonal(); +// auto ddiag = dmtx->extract_diagonal(); +// +// GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +//} + + +TEST_F(Fbcsr, InplaceAbsoluteMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(cuda)); +// +// mtx->compute_absolute_inplace(); +// dmtx->compute_absolute_inplace(); +// +// GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); +//} + + +TEST_F(Fbcsr, OutplaceAbsoluteMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(cuda)); +// +// auto abs_mtx = mtx->compute_absolute(); +// auto dabs_mtx = dmtx->compute_absolute(); +// +// GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +//} + + +TEST_F(Fbcsr, InplaceAbsoluteComplexMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_complex_data(std::make_shared(cuda)); +// +// complex_mtx->compute_absolute_inplace(); +// complex_dmtx->compute_absolute_inplace(); +// +// GKO_ASSERT_MTX_NEAR(complex_mtx, complex_dmtx, 1e-14); +//} + + +TEST_F(Fbcsr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_complex_data(std::make_shared(cuda)); +// +// auto abs_mtx = complex_mtx->compute_absolute(); +// auto dabs_mtx = complex_dmtx->compute_absolute(); +// +// GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +//} + + +} // namespace diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index 14aefba1a0d..2b9ed806938 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -166,6 +166,7 @@ set(GINKGO_HIP_SOURCES matrix/dense_kernels.hip.cpp matrix/diagonal_kernels.hip.cpp matrix/ell_kernels.hip.cpp + matrix/fbcsr_kernels.hip.cpp matrix/hybrid_kernels.hip.cpp matrix/sellp_kernels.hip.cpp matrix/sparsity_csr_kernels.hip.cpp diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp new file mode 100644 index 00000000000..5263e535e7a --- /dev/null +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -0,0 +1,1263 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_kernels.hpp" + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/dense_kernels.hpp" +#include "core/matrix/fbcsr_builder.hpp" +#include "core/synthesizer/implementation_selection.hpp" +#include "hip/base/config.hip.hpp" +#include "hip/base/hipsparse_bindings.hip.hpp" +#include "hip/base/math.hip.hpp" +#include "hip/base/pointer_mode_guard.hip.hpp" +#include "hip/base/types.hip.hpp" +#include "hip/components/atomic.hip.hpp" +#include "hip/components/cooperative_groups.hip.hpp" +#include "hip/components/intrinsics.hip.hpp" +#include "hip/components/merging.hip.hpp" +#include "hip/components/reduction.hip.hpp" +#include "hip/components/segment_scan.hip.hpp" +#include "hip/components/thread_ids.hip.hpp" +#include "hip/components/uninitialized_array.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The Compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +constexpr int default_block_size = 512; +constexpr int warps_in_block = 4; +constexpr int spmv_block_size = warps_in_block * config::warp_size; +constexpr int wsize = config::warp_size; +constexpr int classical_overweight = 32; + + +/** + * A compile-time list of the number items per threads for which spmv kernel + * should be compiled. + */ +using compiled_kernels = syn::value_list; + +using classical_kernels = + syn::value_list; + +using spgeam_kernels = + syn::value_list; + + +namespace host_kernel { + + +template +void merge_path_spmv( + syn::value_list, + std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, matrix::Dense *c, + const matrix::Dense *alpha = nullptr, + const matrix::Dense *beta = nullptr) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const IndexType total = a->get_size()[0] + a->get_num_stored_elements(); +// const IndexType grid_num = +// ceildiv(total, spmv_block_size * items_per_thread); +// const dim3 grid(grid_num); +// const dim3 block(spmv_block_size); +// Array row_out(exec, grid_num); +// Array val_out(exec, grid_num); +// +// for (IndexType column_id = 0; column_id < b->get_size()[1]; column_id++) { +// if (alpha == nullptr && beta == nullptr) { +// const auto b_vals = b->get_const_values() + column_id; +// auto c_vals = c->get_values() + column_id; +// hipLaunchKernelGGL( +// HIP_KERNEL_NAME( +// kernel::abstract_merge_path_spmv), +// dim3(grid), dim3(block), 0, 0, +// static_cast(a->get_size()[0]), +// as_hip_type(a->get_const_values()), a->get_const_col_idxs(), +// as_hip_type(a->get_const_row_ptrs()), +// as_hip_type(a->get_const_srow()), as_hip_type(b_vals), +// b->get_stride(), as_hip_type(c_vals), c->get_stride(), +// as_hip_type(row_out.get_data()), +// as_hip_type(val_out.get_data())); +// hipLaunchKernelGGL(kernel::abstract_reduce, dim3(1), +// dim3(spmv_block_size), 0, 0, grid_num, +// as_hip_type(val_out.get_data()), +// as_hip_type(row_out.get_data()), +// as_hip_type(c_vals), c->get_stride()); +// +// } else if (alpha != nullptr && beta != nullptr) { +// const auto b_vals = b->get_const_values() + column_id; +// auto c_vals = c->get_values() + column_id; +// hipLaunchKernelGGL( +// HIP_KERNEL_NAME( +// kernel::abstract_merge_path_spmv), +// dim3(grid), dim3(block), 0, 0, +// static_cast(a->get_size()[0]), +// as_hip_type(alpha->get_const_values()), +// as_hip_type(a->get_const_values()), a->get_const_col_idxs(), +// as_hip_type(a->get_const_row_ptrs()), +// as_hip_type(a->get_const_srow()), as_hip_type(b_vals), +// b->get_stride(), as_hip_type(beta->get_const_values()), +// as_hip_type(c_vals), c->get_stride(), +// as_hip_type(row_out.get_data()), +// as_hip_type(val_out.get_data())); +// hipLaunchKernelGGL(kernel::abstract_reduce, dim3(1), +// dim3(spmv_block_size), 0, 0, grid_num, +// as_hip_type(val_out.get_data()), +// as_hip_type(row_out.get_data()), +// as_hip_type(alpha->get_const_values()), +// as_hip_type(c_vals), c->get_stride()); +// } else { +// GKO_KERNEL_NOT_FOUND; +// } +// } +//} + +GKO_ENABLE_IMPLEMENTATION_SELECTION(select_merge_path_spmv, merge_path_spmv); + + +template +int compute_items_per_thread(std::shared_ptr exec) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +//#if GINKGO_HIP_PLATFORM_NVCC +// +// +// const int version = +// (exec->get_major_version() << 4) + exec->get_minor_version(); +// // The num_item is decided to make the occupancy 100% +// // TODO: Extend this list when new GPU is released +// // Tune this parameter +// // 128 threads/block the number of items per threads +// // 3.0 3.5: 6 +// // 3.7: 14 +// // 5.0, 5.3, 6.0, 6.2: 8 +// // 5.2, 6.1, 7.0: 12 +// int num_item = 6; +// switch (version) { +// case 0x50: +// case 0x53: +// case 0x60: +// case 0x62: +// num_item = 8; +// break; +// case 0x52: +// case 0x61: +// case 0x70: +// num_item = 12; +// break; +// case 0x37: +// num_item = 14; +// } +// +// +//#else +// +// +// // HIP uses the minimal num_item to make the code work correctly. +// // TODO: this parameter should be tuned. +// int num_item = 6; +// +// +//#endif // GINKGO_HIP_PLATFORM_NVCC +// +// +// // Ensure that the following is satisfied: +// // sizeof(IndexType) + sizeof(ValueType) +// // <= items_per_thread * sizeof(IndexType) +// constexpr int minimal_num = +// ceildiv(sizeof(IndexType) + sizeof(ValueType), sizeof(IndexType)); +// int items_per_thread = num_item * 4 / sizeof(IndexType); +// return std::max(minimal_num, items_per_thread); +//} + + +template +void classical_spmv( + syn::value_list, std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, matrix::Dense *c, + const matrix::Dense *alpha = nullptr, + const matrix::Dense *beta = nullptr) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto nwarps = exec->get_num_warps_per_sm() * +// exec->get_num_multiprocessor() * classical_overweight; +// const auto gridx = +// std::min(ceildiv(a->get_size()[0], spmv_block_size / subwarp_size), +// int64(nwarps / warps_in_block)); +// const dim3 grid(gridx, b->get_size()[1]); +// const dim3 block(spmv_block_size); +// +// if (alpha == nullptr && beta == nullptr) { +// hipLaunchKernelGGL( +// HIP_KERNEL_NAME(kernel::abstract_classical_spmv), +// dim3(grid), dim3(block), 0, 0, a->get_size()[0], +// as_hip_type(a->get_const_values()), a->get_const_col_idxs(), +// as_hip_type(a->get_const_row_ptrs()), +// as_hip_type(b->get_const_values()), b->get_stride(), +// as_hip_type(c->get_values()), c->get_stride()); +// +// } else if (alpha != nullptr && beta != nullptr) { +// hipLaunchKernelGGL( +// HIP_KERNEL_NAME(kernel::abstract_classical_spmv), +// dim3(grid), dim3(block), 0, 0, a->get_size()[0], +// as_hip_type(alpha->get_const_values()), +// as_hip_type(a->get_const_values()), a->get_const_col_idxs(), +// as_hip_type(a->get_const_row_ptrs()), +// as_hip_type(b->get_const_values()), b->get_stride(), +// as_hip_type(beta->get_const_values()), +// as_hip_type(c->get_values()), c->get_stride()); +// } else { +// GKO_KERNEL_NOT_FOUND; +// } +//} + +GKO_ENABLE_IMPLEMENTATION_SELECTION(select_classical_spmv, classical_spmv); + + +} // namespace host_kernel + + +template +void spmv(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// if (a->get_strategy()->get_name() == "load_balance") { +// components::fill_array(exec, c->get_values(), +// c->get_num_stored_elements(), +// zero()); +// const IndexType nwarps = a->get_num_srow_elements(); +// if (nwarps > 0) { +// const dim3 fbcsr_block(config::warp_size, warps_in_block, 1); +// const dim3 fbcsr_grid(ceildiv(nwarps, warps_in_block), +// b->get_size()[1]); +// hipLaunchKernelGGL( +// kernel::abstract_spmv, dim3(fbcsr_grid), dim3(fbcsr_block), 0, +// 0, nwarps, static_cast(a->get_size()[0]), +// as_hip_type(a->get_const_values()), a->get_const_col_idxs(), +// as_hip_type(a->get_const_row_ptrs()), +// as_hip_type(a->get_const_srow()), +// as_hip_type(b->get_const_values()), +// as_hip_type(b->get_stride()), as_hip_type(c->get_values()), +// as_hip_type(c->get_stride())); +// } else { +// GKO_NOT_SUPPORTED(nwarps); +// } +// } else if (a->get_strategy()->get_name() == "merge_path") { +// int items_per_thread = +// host_kernel::compute_items_per_thread(exec); +// host_kernel::select_merge_path_spmv( +// compiled_kernels(), +// [&items_per_thread](int compiled_info) { +// return items_per_thread == compiled_info; +// }, +// syn::value_list(), syn::type_list<>(), exec, a, b, c); +// } else if (a->get_strategy()->get_name() == "classical") { +// IndexType max_length_per_row = 0; +// using Tfbcsr = matrix::Fbcsr; +// if (auto strategy = +// std::dynamic_pointer_cast( +// a->get_strategy())) { +// max_length_per_row = strategy->get_max_length_per_row(); +// } else if (auto strategy = std::dynamic_pointer_cast< +// const typename Tfbcsr::automatical>(a->get_strategy())) +// { +// max_length_per_row = strategy->get_max_length_per_row(); +// } else { +// GKO_NOT_SUPPORTED(a->get_strategy()); +// } +// host_kernel::select_classical_spmv( +// classical_kernels(), +// [&max_length_per_row](int compiled_info) { +// return max_length_per_row >= compiled_info; +// }, +// syn::value_list(), syn::type_list<>(), exec, a, b, c); +// } else if (a->get_strategy()->get_name() == "sparselib" || +// a->get_strategy()->get_name() == "cusparse") { +// if (hipsparse::is_supported::value) { +// // TODO: add implementation for int64 and multiple RHS +// auto handle = exec->get_hipsparse_handle(); +// auto descr = hipsparse::create_mat_descr(); +// { +// hipsparse::pointer_mode_guard pm_guard(handle); +// auto row_ptrs = a->get_const_row_ptrs(); +// auto col_idxs = a->get_const_col_idxs(); +// auto alpha = one(); +// auto beta = zero(); +// if (b->get_stride() != 1 || c->get_stride() != 1) { +// GKO_NOT_IMPLEMENTED; +// } +// hipsparse::spmv(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, +// a->get_size()[0], a->get_size()[1], +// a->get_num_stored_elements(), &alpha, descr, +// a->get_const_values(), row_ptrs, col_idxs, +// b->get_const_values(), &beta, +// c->get_values()); +// } +// hipsparse::destroy(descr); +// } else { +// GKO_NOT_IMPLEMENTED; +// } +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// if (a->get_strategy()->get_name() == "load_balance") { +// dense::scale(exec, beta, c); +// +// const IndexType nwarps = a->get_num_srow_elements(); +// +// if (nwarps > 0) { +// const dim3 fbcsr_block(config::warp_size, warps_in_block, 1); +// const dim3 fbcsr_grid(ceildiv(nwarps, warps_in_block), +// b->get_size()[1]); +// hipLaunchKernelGGL( +// kernel::abstract_spmv, dim3(fbcsr_grid), dim3(fbcsr_block), 0, +// 0, nwarps, static_cast(a->get_size()[0]), +// as_hip_type(alpha->get_const_values()), +// as_hip_type(a->get_const_values()), a->get_const_col_idxs(), +// as_hip_type(a->get_const_row_ptrs()), +// as_hip_type(a->get_const_srow()), +// as_hip_type(b->get_const_values()), +// as_hip_type(b->get_stride()), as_hip_type(c->get_values()), +// as_hip_type(c->get_stride())); +// } else { +// GKO_NOT_SUPPORTED(nwarps); +// } +// } else if (a->get_strategy()->get_name() == "sparselib" || +// a->get_strategy()->get_name() == "cusparse") { +// if (hipsparse::is_supported::value) { +// // TODO: add implementation for int64 and multiple RHS +// auto descr = hipsparse::create_mat_descr(); +// +// auto row_ptrs = a->get_const_row_ptrs(); +// auto col_idxs = a->get_const_col_idxs(); +// +// if (b->get_stride() != 1 || c->get_stride() != 1) +// GKO_NOT_IMPLEMENTED; +// +// hipsparse::spmv(exec->get_hipsparse_handle(), +// HIPSPARSE_OPERATION_NON_TRANSPOSE, +// a->get_size()[0], a->get_size()[1], +// a->get_num_stored_elements(), +// alpha->get_const_values(), descr, +// a->get_const_values(), row_ptrs, col_idxs, +// b->get_const_values(), beta->get_const_values(), +// c->get_values()); +// +// hipsparse::destroy(descr); +// } else { +// GKO_NOT_IMPLEMENTED; +// } +// } else if (a->get_strategy()->get_name() == "classical") { +// IndexType max_length_per_row = 0; +// using Tfbcsr = matrix::Fbcsr; +// if (auto strategy = +// std::dynamic_pointer_cast( +// a->get_strategy())) { +// max_length_per_row = strategy->get_max_length_per_row(); +// } else if (auto strategy = std::dynamic_pointer_cast< +// const typename Tfbcsr::automatical>(a->get_strategy())) +// { +// max_length_per_row = strategy->get_max_length_per_row(); +// } else { +// GKO_NOT_SUPPORTED(a->get_strategy()); +// } +// host_kernel::select_classical_spmv( +// classical_kernels(), +// [&max_length_per_row](int compiled_info) { +// return max_length_per_row >= compiled_info; +// }, +// syn::value_list(), syn::type_list<>(), exec, a, b, c, alpha, +// beta); +// } else if (a->get_strategy()->get_name() == "merge_path") { +// int items_per_thread = +// host_kernel::compute_items_per_thread(exec); +// host_kernel::select_merge_path_spmv( +// compiled_kernels(), +// [&items_per_thread](int compiled_info) { +// return items_per_thread == compiled_info; +// }, +// syn::value_list(), syn::type_list<>(), exec, a, b, c, alpha, +// beta); +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + + +template +void spgemm(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// if (hipsparse::is_supported::value) { +// auto handle = exec->get_hipsparse_handle(); +// hipsparse::pointer_mode_guard pm_guard(handle); +// auto a_descr = hipsparse::create_mat_descr(); +// auto b_descr = hipsparse::create_mat_descr(); +// auto c_descr = hipsparse::create_mat_descr(); +// auto d_descr = hipsparse::create_mat_descr(); +// auto info = hipsparse::create_spgemm_info(); +// +// auto alpha = one(); +// auto a_nnz = static_cast(a->get_num_stored_elements()); +// auto a_vals = a->get_const_values(); +// auto a_row_ptrs = a->get_const_row_ptrs(); +// auto a_col_idxs = a->get_const_col_idxs(); +// auto b_nnz = static_cast(b->get_num_stored_elements()); +// auto b_vals = b->get_const_values(); +// auto b_row_ptrs = b->get_const_row_ptrs(); +// auto b_col_idxs = b->get_const_col_idxs(); +// auto null_value = static_cast(nullptr); +// auto null_index = static_cast(nullptr); +// auto zero_nnz = IndexType{}; +// auto m = static_cast(a->get_size()[0]); +// auto n = static_cast(b->get_size()[1]); +// auto k = static_cast(a->get_size()[1]); +// auto c_row_ptrs = c->get_row_ptrs(); +// matrix::FbcsrBuilder c_builder{c}; +// auto &c_col_idxs_array = c_builder.get_col_idx_array(); +// auto &c_vals_array = c_builder.get_value_array(); +// +// // allocate buffer +// size_type buffer_size{}; +// hipsparse::spgemm_buffer_size( +// handle, m, n, k, &alpha, a_descr, a_nnz, a_row_ptrs, a_col_idxs, +// b_descr, b_nnz, b_row_ptrs, b_col_idxs, null_value, d_descr, +// zero_nnz, null_index, null_index, info, buffer_size); +// Array buffer_array(exec, buffer_size); +// auto buffer = buffer_array.get_data(); +// +// // count nnz +// IndexType c_nnz{}; +// hipsparse::spgemm_nnz( +// handle, m, n, k, a_descr, a_nnz, a_row_ptrs, a_col_idxs, b_descr, +// b_nnz, b_row_ptrs, b_col_idxs, d_descr, zero_nnz, null_index, +// null_index, c_descr, c_row_ptrs, &c_nnz, info, buffer); +// +// // accumulate non-zeros +// c_col_idxs_array.resize_and_reset(c_nnz); +// c_vals_array.resize_and_reset(c_nnz); +// auto c_col_idxs = c_col_idxs_array.get_data(); +// auto c_vals = c_vals_array.get_data(); +// hipsparse::spgemm(handle, m, n, k, &alpha, a_descr, a_nnz, a_vals, +// a_row_ptrs, a_col_idxs, b_descr, b_nnz, b_vals, +// b_row_ptrs, b_col_idxs, null_value, d_descr, +// zero_nnz, null_value, null_index, null_index, +// c_descr, c_vals, c_row_ptrs, c_col_idxs, info, +// buffer); +// +// hipsparse::destroy_spgemm_info(info); +// hipsparse::destroy(d_descr); +// hipsparse::destroy(c_descr); +// hipsparse::destroy(b_descr); +// hipsparse::destroy(a_descr); +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEMM_KERNEL); + + +namespace { + + +template +void spgeam(syn::value_list, + std::shared_ptr exec, const ValueType *alpha, + const IndexType *a_row_ptrs, const IndexType *a_col_idxs, + const ValueType *a_vals, const ValueType *beta, + const IndexType *b_row_ptrs, const IndexType *b_col_idxs, + const ValueType *b_vals, + matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto m = static_cast(c->get_size()[0]); +// auto c_row_ptrs = c->get_row_ptrs(); +// // count nnz for alpha * A + beta * B +// auto subwarps_per_block = default_block_size / subwarp_size; +// auto num_blocks = ceildiv(m, subwarps_per_block); +// hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::spgeam_nnz), +// dim3(num_blocks), dim3(default_block_size), 0, 0, +// a_row_ptrs, a_col_idxs, b_row_ptrs, b_col_idxs, m, +// c_row_ptrs); +// +// // build row pointers +// components::prefix_sum(exec, c_row_ptrs, m + 1); +// +// // accumulate non-zeros for alpha * A + beta * B +// matrix::FbcsrBuilder c_builder{c}; +// auto c_nnz = exec->copy_val_to_host(c_row_ptrs + m); +// c_builder.get_col_idx_array().resize_and_reset(c_nnz); +// c_builder.get_value_array().resize_and_reset(c_nnz); +// auto c_col_idxs = c->get_col_idxs(); +// auto c_vals = c->get_values(); +// hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::spgeam), +// dim3(num_blocks), dim3(default_block_size), 0, 0, +// as_hip_type(alpha), a_row_ptrs, a_col_idxs, +// as_hip_type(a_vals), as_hip_type(beta), b_row_ptrs, +// b_col_idxs, as_hip_type(b_vals), m, c_row_ptrs, +// c_col_idxs, as_hip_type(c_vals)); +//} + +GKO_ENABLE_IMPLEMENTATION_SELECTION(select_spgeam, spgeam); + + +} // namespace + + +template +void advanced_spgemm(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + const matrix::Dense *beta, + const matrix::Fbcsr *d, + matrix::Fbcsr *c) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// if (hipsparse::is_supported::value) { +// auto handle = exec->get_hipsparse_handle(); +// hipsparse::pointer_mode_guard pm_guard(handle); +// auto a_descr = hipsparse::create_mat_descr(); +// auto b_descr = hipsparse::create_mat_descr(); +// auto c_descr = hipsparse::create_mat_descr(); +// auto d_descr = hipsparse::create_mat_descr(); +// auto info = hipsparse::create_spgemm_info(); +// +// auto a_nnz = static_cast(a->get_num_stored_elements()); +// auto a_vals = a->get_const_values(); +// auto a_row_ptrs = a->get_const_row_ptrs(); +// auto a_col_idxs = a->get_const_col_idxs(); +// auto b_nnz = static_cast(b->get_num_stored_elements()); +// auto b_vals = b->get_const_values(); +// auto b_row_ptrs = b->get_const_row_ptrs(); +// auto b_col_idxs = b->get_const_col_idxs(); +// auto d_vals = d->get_const_values(); +// auto d_row_ptrs = d->get_const_row_ptrs(); +// auto d_col_idxs = d->get_const_col_idxs(); +// auto null_value = static_cast(nullptr); +// auto null_index = static_cast(nullptr); +// auto one_value = one(); +// auto m = static_cast(a->get_size()[0]); +// auto n = static_cast(b->get_size()[1]); +// auto k = static_cast(a->get_size()[1]); +// +// // allocate buffer +// size_type buffer_size{}; +// hipsparse::spgemm_buffer_size( +// handle, m, n, k, &one_value, a_descr, a_nnz, a_row_ptrs, +// a_col_idxs, b_descr, b_nnz, b_row_ptrs, b_col_idxs, null_value, +// d_descr, IndexType{}, null_index, null_index, info, buffer_size); +// Array buffer_array(exec, buffer_size); +// auto buffer = buffer_array.get_data(); +// +// // count nnz +// Array c_tmp_row_ptrs_array(exec, m + 1); +// auto c_tmp_row_ptrs = c_tmp_row_ptrs_array.get_data(); +// IndexType c_nnz{}; +// hipsparse::spgemm_nnz( +// handle, m, n, k, a_descr, a_nnz, a_row_ptrs, a_col_idxs, b_descr, +// b_nnz, b_row_ptrs, b_col_idxs, d_descr, IndexType{}, null_index, +// null_index, c_descr, c_tmp_row_ptrs, &c_nnz, info, buffer); +// +// // accumulate non-zeros for A * B +// Array c_tmp_col_idxs_array(exec, c_nnz); +// Array c_tmp_vals_array(exec, c_nnz); +// auto c_tmp_col_idxs = c_tmp_col_idxs_array.get_data(); +// auto c_tmp_vals = c_tmp_vals_array.get_data(); +// hipsparse::spgemm(handle, m, n, k, &one_value, a_descr, a_nnz, a_vals, +// a_row_ptrs, a_col_idxs, b_descr, b_nnz, b_vals, +// b_row_ptrs, b_col_idxs, null_value, d_descr, +// IndexType{}, null_value, null_index, null_index, +// c_descr, c_tmp_vals, c_tmp_row_ptrs, c_tmp_col_idxs, +// info, buffer); +// +// // destroy hipsparse context +// hipsparse::destroy_spgemm_info(info); +// hipsparse::destroy(d_descr); +// hipsparse::destroy(c_descr); +// hipsparse::destroy(b_descr); +// hipsparse::destroy(a_descr); +// +// auto total_nnz = c_nnz + d->get_num_stored_elements(); +// auto nnz_per_row = total_nnz / m; +// select_spgeam( +// spgeam_kernels(), +// [&](int compiled_subwarp_size) { +// return compiled_subwarp_size >= nnz_per_row || +// compiled_subwarp_size == config::warp_size; +// }, +// syn::value_list(), syn::type_list<>(), exec, +// alpha->get_const_values(), c_tmp_row_ptrs, c_tmp_col_idxs, +// c_tmp_vals, beta->get_const_values(), d_row_ptrs, d_col_idxs, +// d_vals, c); +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL); + + +template +void spgeam(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *beta, + const matrix::Fbcsr *b, + matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto total_nnz = +// a->get_num_stored_elements() + b->get_num_stored_elements(); +// auto nnz_per_row = total_nnz / a->get_size()[0]; +// select_spgeam( +// spgeam_kernels(), +// [&](int compiled_subwarp_size) { +// return compiled_subwarp_size >= nnz_per_row || +// compiled_subwarp_size == config::warp_size; +// }, +// syn::value_list(), syn::type_list<>(), exec, +// alpha->get_const_values(), a->get_const_row_ptrs(), +// a->get_const_col_idxs(), a->get_const_values(), +// beta->get_const_values(), b->get_const_row_ptrs(), +// b->get_const_col_idxs(), b->get_const_values(), c); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEAM_KERNEL); + + +template +void convert_row_ptrs_to_idxs(std::shared_ptr exec, + const IndexType *ptrs, size_type num_rows, + IndexType *idxs) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto grid_dim = ceildiv(num_rows, default_block_size); +// +// hipLaunchKernelGGL(kernel::convert_row_ptrs_to_idxs, dim3(grid_dim), +// dim3(default_block_size), 0, 0, num_rows, +// as_hip_type(ptrs), as_hip_type(idxs)); +//} + + +template +void convert_to_coo(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Coo *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = result->get_size()[0]; +// +// auto row_idxs = result->get_row_idxs(); +// const auto source_row_ptrs = source->get_const_row_ptrs(); +// +// convert_row_ptrs_to_idxs(exec, source_row_ptrs, num_rows, row_idxs); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL); + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Dense *result) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto num_rows = result->get_size()[0]; +// const auto num_cols = result->get_size()[1]; +// const auto stride = result->get_stride(); +// const auto row_ptrs = source->get_const_row_ptrs(); +// const auto col_idxs = source->get_const_col_idxs(); +// const auto vals = source->get_const_values(); +// +// const dim3 block_size(config::warp_size, +// config::max_block_size / config::warp_size, 1); +// const dim3 init_grid_dim(ceildiv(num_cols, block_size.x), +// ceildiv(num_rows, block_size.y), 1); +// hipLaunchKernelGGL(kernel::initialize_zero_dense, dim3(init_grid_dim), +// dim3(block_size), 0, 0, num_rows, num_cols, stride, +// as_hip_type(result->get_values())); +// +// auto grid_dim = ceildiv(num_rows, default_block_size); +// hipLaunchKernelGGL( +// kernel::fill_in_dense, dim3(grid_dim), dim3(default_block_size), 0, 0, +// num_rows, as_hip_type(row_ptrs), as_hip_type(col_idxs), +// as_hip_type(vals), stride, as_hip_type(result->get_values())); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_sellp(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Sellp *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto num_rows = result->get_size()[0]; +// const auto num_cols = result->get_size()[1]; +// +// auto result_values = result->get_values(); +// auto result_col_idxs = result->get_col_idxs(); +// auto slice_lengths = result->get_slice_lengths(); +// auto slice_sets = result->get_slice_sets(); +// +// const auto slice_size = (result->get_slice_size() == 0) +// ? matrix::default_slice_size +// : result->get_slice_size(); +// const auto stride_factor = (result->get_stride_factor() == 0) +// ? matrix::default_stride_factor +// : result->get_stride_factor(); +// const int slice_num = ceildiv(num_rows, slice_size); +// +// const auto source_values = source->get_const_values(); +// const auto source_row_ptrs = source->get_const_row_ptrs(); +// const auto source_col_idxs = source->get_const_col_idxs(); +// +// auto nnz_per_row = Array(exec, num_rows); +// auto grid_dim = ceildiv(num_rows, default_block_size); +// +// if (grid_dim > 0) { +// hipLaunchKernelGGL(kernel::calculate_nnz_per_row, dim3(grid_dim), +// dim3(default_block_size), 0, 0, num_rows, +// as_hip_type(source_row_ptrs), +// as_hip_type(nnz_per_row.get_data())); +// } +// +// grid_dim = slice_num; +// +// if (grid_dim > 0) { +// hipLaunchKernelGGL(kernel::calculate_slice_lengths, dim3(grid_dim), +// dim3(config::warp_size), 0, 0, num_rows, +// slice_size, stride_factor, +// as_hip_type(nnz_per_row.get_const_data()), +// as_hip_type(slice_lengths), +// as_hip_type(slice_sets)); +// } +// +// components::prefix_sum(exec, slice_sets, slice_num + 1); +// +// grid_dim = ceildiv(num_rows, default_block_size); +// if (grid_dim > 0) { +// hipLaunchKernelGGL( +// kernel::fill_in_sellp, dim3(grid_dim), dim3(default_block_size), +// 0, 0, num_rows, slice_size, as_hip_type(source_values), +// as_hip_type(source_row_ptrs), as_hip_type(source_col_idxs), +// as_hip_type(slice_lengths), as_hip_type(slice_sets), +// as_hip_type(result_col_idxs), as_hip_type(result_values)); +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL); + + +template +void convert_to_ell(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Ell *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto source_values = source->get_const_values(); +// const auto source_row_ptrs = source->get_const_row_ptrs(); +// const auto source_col_idxs = source->get_const_col_idxs(); +// +// auto result_values = result->get_values(); +// auto result_col_idxs = result->get_col_idxs(); +// const auto stride = result->get_stride(); +// const auto max_nnz_per_row = result->get_num_stored_elements_per_row(); +// const auto num_rows = result->get_size()[0]; +// const auto num_cols = result->get_size()[1]; +// +// const auto init_grid_dim = +// ceildiv(max_nnz_per_row * num_rows, default_block_size); +// +// hipLaunchKernelGGL(kernel::initialize_zero_ell, dim3(init_grid_dim), +// dim3(default_block_size), 0, 0, max_nnz_per_row, +// stride, as_hip_type(result_values), +// as_hip_type(result_col_idxs)); +// +// const auto grid_dim = +// ceildiv(num_rows * config::warp_size, default_block_size); +// +// hipLaunchKernelGGL(kernel::fill_in_ell, dim3(grid_dim), +// dim3(default_block_size), 0, 0, num_rows, stride, +// as_hip_type(source_values), +// as_hip_type(source_row_ptrs), +// as_hip_type(source_col_idxs), +// as_hip_type(result_values), +// as_hip_type(result_col_idxs)); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL); + + +template +void calculate_total_cols(std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result, size_type stride_factor, + size_type slice_size) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto num_rows = source->get_size()[0]; +// +// if (num_rows == 0) { +// *result = 0; +// return; +// } +// +// const auto slice_num = ceildiv(num_rows, slice_size); +// const auto row_ptrs = source->get_const_row_ptrs(); +// +// auto nnz_per_row = Array(exec, num_rows); +// auto grid_dim = ceildiv(num_rows, default_block_size); +// +// hipLaunchKernelGGL(kernel::calculate_nnz_per_row, dim3(grid_dim), +// dim3(default_block_size), 0, 0, num_rows, +// as_hip_type(row_ptrs), +// as_hip_type(nnz_per_row.get_data())); +// +// grid_dim = ceildiv(slice_num * config::warp_size, default_block_size); +// auto max_nnz_per_slice = Array(exec, slice_num); +// +// hipLaunchKernelGGL(kernel::reduce_max_nnz_per_slice, dim3(grid_dim), +// dim3(default_block_size), 0, 0, num_rows, slice_size, +// stride_factor, +// as_hip_type(nnz_per_row.get_const_data()), +// as_hip_type(max_nnz_per_slice.get_data())); +// +// grid_dim = ceildiv(slice_num, default_block_size); +// auto block_results = Array(exec, grid_dim); +// +// hipLaunchKernelGGL(kernel::reduce_total_cols, dim3(grid_dim), +// dim3(default_block_size), 0, 0, slice_num, +// as_hip_type(max_nnz_per_slice.get_const_data()), +// as_hip_type(block_results.get_data())); +// +// auto d_result = Array(exec, 1); +// +// hipLaunchKernelGGL(kernel::reduce_total_cols, dim3(1), +// dim3(default_block_size), 0, 0, grid_dim, +// as_hip_type(block_results.get_const_data()), +// as_hip_type(d_result.get_data())); +// +// *result = exec->copy_val_to_host(d_result.get_const_data()); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL); + + +template +void transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// if (hipsparse::is_supported::value) { +// hipsparseAction_t copyValues = HIPSPARSE_ACTION_NUMERIC; +// hipsparseIndexBase_t idxBase = HIPSPARSE_INDEX_BASE_ZERO; +// +// hipsparse::transpose( +// exec->get_hipsparse_handle(), orig->get_size()[0], +// orig->get_size()[1], orig->get_num_stored_elements(), +// orig->get_const_values(), orig->get_const_row_ptrs(), +// orig->get_const_col_idxs(), trans->get_values(), +// trans->get_row_ptrs(), trans->get_col_idxs(), copyValues, +// idxBase); +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// if (hipsparse::is_supported::value) { +// const dim3 block_size(default_block_size, 1, 1); +// const dim3 grid_size( +// ceildiv(trans->get_num_stored_elements(), block_size.x), 1, 1); +// +// hipsparseAction_t copyValues = HIPSPARSE_ACTION_NUMERIC; +// hipsparseIndexBase_t idxBase = HIPSPARSE_INDEX_BASE_ZERO; +// +// hipsparse::transpose( +// exec->get_hipsparse_handle(), orig->get_size()[0], +// orig->get_size()[1], orig->get_num_stored_elements(), +// orig->get_const_values(), orig->get_const_row_ptrs(), +// orig->get_const_col_idxs(), trans->get_values(), +// trans->get_col_idxs(), trans->get_row_ptrs(), copyValues, +// idxBase); +// +// hipLaunchKernelGGL(conjugate_kernel, dim3(grid_size), +// dim3(block_size), +// 0, 0, trans->get_num_stored_elements(), +// as_hip_type(trans->get_values())); +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + + +template +void row_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *row_permuted) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); + + +template +void column_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *column_permuted) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL); + + +template +void inverse_row_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *row_permuted) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL); + + +template +void inverse_column_permute( + std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL); + + +template +void calculate_max_nnz_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto num_rows = source->get_size()[0]; +// +// auto nnz_per_row = Array(exec, num_rows); +// auto block_results = Array(exec, default_block_size); +// auto d_result = Array(exec, 1); +// +// const auto grid_dim = ceildiv(num_rows, default_block_size); +// hipLaunchKernelGGL(kernel::calculate_nnz_per_row, dim3(grid_dim), +// dim3(default_block_size), 0, 0, num_rows, +// as_hip_type(source->get_const_row_ptrs()), +// as_hip_type(nnz_per_row.get_data())); +// +// const auto n = ceildiv(num_rows, default_block_size); +// const auto reduce_dim = n <= default_block_size ? n : default_block_size; +// hipLaunchKernelGGL(kernel::reduce_max_nnz, dim3(reduce_dim), +// dim3(default_block_size), 0, 0, num_rows, +// as_hip_type(nnz_per_row.get_const_data()), +// as_hip_type(block_results.get_data())); +// +// hipLaunchKernelGGL(kernel::reduce_max_nnz, dim3(1), +// dim3(default_block_size), 0, 0, reduce_dim, +// as_hip_type(block_results.get_const_data()), +// as_hip_type(d_result.get_data())); +// +// *result = exec->copy_val_to_host(d_result.get_const_data()); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void convert_to_hybrid(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Hybrid *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto ell_val = result->get_ell_values(); +// auto ell_col = result->get_ell_col_idxs(); +// auto coo_val = result->get_coo_values(); +// auto coo_col = result->get_coo_col_idxs(); +// auto coo_row = result->get_coo_row_idxs(); +// const auto stride = result->get_ell_stride(); +// const auto max_nnz_per_row = +// result->get_ell_num_stored_elements_per_row(); const auto num_rows = +// result->get_size()[0]; const auto coo_num_stored_elements = +// result->get_coo_num_stored_elements(); auto grid_dim = +// ceildiv(max_nnz_per_row * num_rows, default_block_size); +// +// hipLaunchKernelGGL(kernel::initialize_zero_ell, dim3(grid_dim), +// dim3(default_block_size), 0, 0, max_nnz_per_row, +// stride, as_hip_type(ell_val), as_hip_type(ell_col)); +// +// grid_dim = ceildiv(num_rows, default_block_size); +// auto coo_offset = Array(exec, num_rows); +// hipLaunchKernelGGL(kernel::calculate_hybrid_coo_row_nnz, dim3(grid_dim), +// dim3(default_block_size), 0, 0, num_rows, +// max_nnz_per_row, +// as_hip_type(source->get_const_row_ptrs()), +// as_hip_type(coo_offset.get_data())); +// +// components::prefix_sum(exec, coo_offset.get_data(), num_rows); +// +// grid_dim = ceildiv(num_rows * config::warp_size, default_block_size); +// hipLaunchKernelGGL(kernel::fill_in_hybrid, dim3(grid_dim), +// dim3(default_block_size), 0, 0, num_rows, stride, +// max_nnz_per_row, +// as_hip_type(source->get_const_values()), +// as_hip_type(source->get_const_row_ptrs()), +// as_hip_type(source->get_const_col_idxs()), +// as_hip_type(coo_offset.get_const_data()), +// as_hip_type(ell_val), as_hip_type(ell_col), +// as_hip_type(coo_val), as_hip_type(coo_col), +// as_hip_type(coo_row)); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL); + + +template +void calculate_nonzeros_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + Array *result) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto num_rows = source->get_size()[0]; +// auto row_ptrs = source->get_const_row_ptrs(); +// auto grid_dim = ceildiv(num_rows, default_block_size); +// +// hipLaunchKernelGGL(kernel::calculate_nnz_per_row, dim3(grid_dim), +// dim3(default_block_size), 0, 0, num_rows, +// as_hip_type(row_ptrs), +// as_hip_type(result->get_data())); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void sort_by_column_index(std::shared_ptr exec, + matrix::Fbcsr *to_sort) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// if (hipsparse::is_supported::value) { +// auto handle = exec->get_hipsparse_handle(); +// auto descr = hipsparse::create_mat_descr(); +// auto m = IndexType(to_sort->get_size()[0]); +// auto n = IndexType(to_sort->get_size()[1]); +// auto nnz = IndexType(to_sort->get_num_stored_elements()); +// auto row_ptrs = to_sort->get_const_row_ptrs(); +// auto col_idxs = to_sort->get_col_idxs(); +// auto vals = to_sort->get_values(); +// +// // copy values +// Array tmp_vals_array(exec, nnz); +// exec->copy(nnz, vals, tmp_vals_array.get_data()); +// auto tmp_vals = tmp_vals_array.get_const_data(); +// +// // init identity permutation +// Array permutation_array(exec, nnz); +// auto permutation = permutation_array.get_data(); +// hipsparse::create_identity_permutation(handle, nnz, permutation); +// +// // allocate buffer +// size_type buffer_size{}; +// hipsparse::fbcsrsort_buffer_size(handle, m, n, nnz, row_ptrs, +// col_idxs, +// buffer_size); +// Array buffer_array{exec, buffer_size}; +// auto buffer = buffer_array.get_data(); +// +// // sort column indices +// hipsparse::fbcsrsort(handle, m, n, nnz, descr, row_ptrs, col_idxs, +// permutation, buffer); +// +// // sort values +// hipsparse::gather(handle, nnz, tmp_vals, vals, permutation); +// +// hipsparse::destroy(descr); +// } else { +// GKO_NOT_IMPLEMENTED; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::Fbcsr *to_check, + bool *is_sorted) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// *is_sorted = true; +// auto cpu_array = Array::view(exec->get_master(), 1, is_sorted); +// auto gpu_array = Array{exec, cpu_array}; +// auto block_size = default_block_size; +// auto num_rows = static_cast(to_check->get_size()[0]); +// auto num_blocks = ceildiv(num_rows, block_size); +// hipLaunchKernelGGL( +// HIP_KERNEL_NAME(kernel::check_unsorted), dim3(num_blocks), +// dim3(block_size), 0, 0, to_check->get_const_row_ptrs(), +// to_check->get_const_col_idxs(), num_rows, gpu_array.get_data()); +// cpu_array = gpu_array; +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto nnz = orig->get_num_stored_elements(); +// const auto diag_size = diag->get_size()[0]; +// const auto num_blocks = +// ceildiv(config::warp_size * diag_size, default_block_size); +// +// const auto orig_values = orig->get_const_values(); +// const auto orig_row_ptrs = orig->get_const_row_ptrs(); +// const auto orig_col_idxs = orig->get_const_col_idxs(); +// auto diag_values = diag->get_values(); +// +// hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::extract_diagonal), +// dim3(num_blocks), dim3(default_block_size), 0, 0, +// diag_size, nnz, as_hip_type(orig_values), +// as_hip_type(orig_row_ptrs), as_hip_type(orig_col_idxs), +// as_hip_type(diag_values)); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/test/matrix/CMakeLists.txt b/hip/test/matrix/CMakeLists.txt index 94e92f08f5c..4a32b5272f7 100644 --- a/hip/test/matrix/CMakeLists.txt +++ b/hip/test/matrix/CMakeLists.txt @@ -3,5 +3,6 @@ ginkgo_create_hip_test(csr_kernels) ginkgo_create_hip_test(dense_kernels) ginkgo_create_hip_test(diagonal_kernels) ginkgo_create_hip_test(ell_kernels) +ginkgo_create_hip_test(fbcsr_kernels) ginkgo_create_hip_test(hybrid_kernels) ginkgo_create_hip_test(sellp_kernels) diff --git a/hip/test/matrix/fbcsr_kernels.hip.cpp b/hip/test/matrix/fbcsr_kernels.hip.cpp new file mode 100644 index 00000000000..79b873cb02d --- /dev/null +++ b/hip/test/matrix/fbcsr_kernels.hip.cpp @@ -0,0 +1,866 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/fbcsr_kernels.hpp" +#include "hip/test/utils.hip.hpp" + + +namespace { + + +class Fbcsr : public ::testing::Test { +protected: + using Vec = gko::matrix::Dense<>; + using Mtx = gko::matrix::Fbcsr<>; + using ComplexVec = gko::matrix::Dense>; + using ComplexMtx = gko::matrix::Fbcsr>; + + Fbcsr() : mtx_size(532, 231), rand_engine(42) {} + + void SetUp() + { + ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); + ref = gko::ReferenceExecutor::create(); + hip = gko::HipExecutor::create(0, ref); + } + + void TearDown() + { + if (hip != nullptr) { + ASSERT_NO_THROW(hip->synchronize()); + } + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(min_nnz_row, num_cols), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + void set_up_apply_data(std::shared_ptr strategy, + int num_vectors = 1) + { + mtx = Mtx::create(ref, strategy); + mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 1)); + square_mtx = Mtx::create(ref, strategy); + square_mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[0], 1)); + expected = gen_mtx(mtx_size[0], num_vectors, 1); + y = gen_mtx(mtx_size[1], num_vectors, 1); + alpha = gko::initialize({2.0}, ref); + beta = gko::initialize({-1.0}, ref); + dmtx = Mtx::create(hip, strategy); + dmtx->copy_from(mtx.get()); + square_dmtx = Mtx::create(hip, strategy); + square_dmtx->copy_from(square_mtx.get()); + dresult = Vec::create(hip); + dresult->copy_from(expected.get()); + dy = Vec::create(hip); + dy->copy_from(y.get()); + dalpha = Vec::create(hip); + dalpha->copy_from(alpha.get()); + dbeta = Vec::create(hip); + dbeta->copy_from(beta.get()); + } + + void set_up_apply_complex_data( + std::shared_ptr strategy) + { + complex_mtx = ComplexMtx::create(ref, strategy); + complex_mtx->copy_from( + gen_mtx(mtx_size[0], mtx_size[1], 1)); + complex_dmtx = ComplexMtx::create(hip, strategy); + complex_dmtx->copy_from(complex_mtx.get()); + } + + struct matrix_pair { + std::unique_ptr ref; + std::unique_ptr hip; + }; + + matrix_pair gen_unsorted_mtx() + { + constexpr int min_nnz_per_row = 2; // Must be at least 2 + auto local_mtx_ref = + gen_mtx(mtx_size[0], mtx_size[1], min_nnz_per_row); + for (size_t row = 0; row < mtx_size[0]; ++row) { + const auto row_ptrs = local_mtx_ref->get_const_row_ptrs(); + const auto start_row = row_ptrs[row]; + auto col_idx = local_mtx_ref->get_col_idxs() + start_row; + auto vals = local_mtx_ref->get_values() + start_row; + const auto nnz_in_this_row = row_ptrs[row + 1] - row_ptrs[row]; + auto swap_idx_dist = + std::uniform_int_distribution<>(0, nnz_in_this_row - 1); + // shuffle `nnz_in_this_row / 2` times + for (size_t perm = 0; perm < nnz_in_this_row; perm += 2) { + const auto idx1 = swap_idx_dist(rand_engine); + const auto idx2 = swap_idx_dist(rand_engine); + std::swap(col_idx[idx1], col_idx[idx2]); + std::swap(vals[idx1], vals[idx2]); + } + } + auto local_mtx_hip = Mtx::create(hip); + local_mtx_hip->copy_from(local_mtx_ref.get()); + + return {std::move(local_mtx_ref), std::move(local_mtx_hip)}; + } + + std::shared_ptr ref; + std::shared_ptr hip; + + const gko::dim<2> mtx_size; + std::ranlux48 rand_engine; + + std::unique_ptr mtx; + std::unique_ptr complex_mtx; + std::unique_ptr square_mtx; + std::unique_ptr expected; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr beta; + + std::unique_ptr dmtx; + std::unique_ptr complex_dmtx; + std::unique_ptr square_dmtx; + std::unique_ptr dresult; + std::unique_ptr dy; + std::unique_ptr dalpha; + std::unique_ptr dbeta; +}; + + +TEST_F(Fbcsr, StrategyAfterCopyIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip)); +// +// ASSERT_EQ(mtx->get_strategy()->get_name(), +// dmtx->get_strategy()->get_name()); +//} + + +TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithLoadBalance) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip)); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithLoadBalance) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip)); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithHipsparse) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithHipsparse) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithMergePath) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithMergePath) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithClassical) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithClassical) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithAutomatical) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip)); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRefWithLoadBalance) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip), 3); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithLoadBalance) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip), 3); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRefWithClassical) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(), 3); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithClassical) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(), 3); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRefWithMergePath) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(), 3); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithMergePath) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(), 3); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyToFbcsrMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip)); +// auto trans = mtx->transpose(); +// auto d_trans = dmtx->transpose(); +// +// mtx->apply(alpha.get(), trans.get(), beta.get(), square_mtx.get()); +// dmtx->apply(dalpha.get(), d_trans.get(), dbeta.get(), square_dmtx.get()); +// +// GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, 1e-14); +// GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); +// ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, SimpleApplyToFbcsrMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip)); +// auto trans = mtx->transpose(); +// auto d_trans = dmtx->transpose(); +// +// mtx->apply(trans.get(), square_mtx.get()); +// dmtx->apply(d_trans.get(), square_dmtx.get()); +// +// GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, 1e-14); +// GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); +// ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, AdvancedApplyToIdentityMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip)); +// auto a = gen_mtx(mtx_size[0], mtx_size[1], 0); +// auto b = gen_mtx(mtx_size[0], mtx_size[1], 0); +// auto da = Mtx::create(hip); +// auto db = Mtx::create(hip); +// da->copy_from(a.get()); +// db->copy_from(b.get()); +// auto id = gko::matrix::Identity::create(ref, +// mtx_size[1]); auto did = +// gko::matrix::Identity::create(hip, mtx_size[1]); +// +// a->apply(alpha.get(), id.get(), beta.get(), b.get()); +// da->apply(dalpha.get(), did.get(), dbeta.get(), db.get()); +// +// GKO_ASSERT_MTX_NEAR(b, db, 1e-14); +// GKO_ASSERT_MTX_EQ_SPARSITY(b, db); +// ASSERT_TRUE(db->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, TransposeIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip)); +// +// auto trans = mtx->transpose(); +// auto d_trans = dmtx->transpose(); +// +// GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), +// static_cast(trans.get()), 0.0); +// ASSERT_TRUE(static_cast(d_trans.get())->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, ConvertToDenseIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto dense_mtx = gko::matrix::Dense<>::create(ref); +// auto ddense_mtx = gko::matrix::Dense<>::create(hip); +// +// mtx->convert_to(dense_mtx.get()); +// dmtx->convert_to(ddense_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToDenseIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto dense_mtx = gko::matrix::Dense<>::create(ref); +// auto ddense_mtx = gko::matrix::Dense<>::create(hip); +// +// mtx->move_to(dense_mtx.get()); +// dmtx->move_to(ddense_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, ConvertToEllIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto ell_mtx = gko::matrix::Ell<>::create(ref); +// auto dell_mtx = gko::matrix::Ell<>::create(hip); +// +// mtx->convert_to(ell_mtx.get()); +// dmtx->convert_to(dell_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(ell_mtx.get(), dell_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToEllIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto ell_mtx = gko::matrix::Ell<>::create(ref); +// auto dell_mtx = gko::matrix::Ell<>::create(hip); +// +// mtx->move_to(ell_mtx.get()); +// dmtx->move_to(dell_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(ell_mtx.get(), dell_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, ConvertToSparsityFbcsrIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(ref); +// auto d_sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(hip); +// +// mtx->convert_to(sparsity_mtx.get()); +// dmtx->convert_to(d_sparsity_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(sparsity_mtx.get(), d_sparsity_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToSparsityFbcsrIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(ref); +// auto d_sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(hip); +// +// mtx->move_to(sparsity_mtx.get()); +// dmtx->move_to(d_sparsity_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(sparsity_mtx.get(), d_sparsity_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, CalculateMaxNnzPerRowIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// gko::size_type max_nnz_per_row; +// gko::size_type dmax_nnz_per_row; +// +// gko::kernels::reference::fbcsr::calculate_max_nnz_per_row(ref, mtx.get(), +// &max_nnz_per_row); +// gko::kernels::hip::fbcsr::calculate_max_nnz_per_row(hip, dmtx.get(), +// &dmax_nnz_per_row); +// +// ASSERT_EQ(max_nnz_per_row, dmax_nnz_per_row); +//} + + +TEST_F(Fbcsr, ConvertToCooIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto coo_mtx = gko::matrix::Coo<>::create(ref); +// auto dcoo_mtx = gko::matrix::Coo<>::create(hip); +// +// mtx->convert_to(coo_mtx.get()); +// dmtx->convert_to(dcoo_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToCooIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto coo_mtx = gko::matrix::Coo<>::create(ref); +// auto dcoo_mtx = gko::matrix::Coo<>::create(hip); +// +// mtx->move_to(coo_mtx.get()); +// dmtx->move_to(dcoo_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, ConvertToSellpIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto sellp_mtx = gko::matrix::Sellp<>::create(ref); +// auto dsellp_mtx = gko::matrix::Sellp<>::create(hip); +// +// mtx->convert_to(sellp_mtx.get()); +// dmtx->convert_to(dsellp_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(sellp_mtx.get(), dsellp_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToSellpIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// auto sellp_mtx = gko::matrix::Sellp<>::create(ref); +// auto dsellp_mtx = gko::matrix::Sellp<>::create(hip); +// +// mtx->move_to(sellp_mtx.get()); +// dmtx->move_to(dsellp_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(sellp_mtx.get(), dsellp_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, ConvertsEmptyToSellp) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto dempty_mtx = Mtx::create(hip); +// auto dsellp_mtx = gko::matrix::Sellp<>::create(hip); +// +// dempty_mtx->convert_to(dsellp_mtx.get()); +// +// ASSERT_EQ(hip->copy_val_to_host(dsellp_mtx->get_const_slice_sets()), 0); +// ASSERT_FALSE(dsellp_mtx->get_size()); +//} + + +TEST_F(Fbcsr, CalculateTotalColsIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// gko::size_type total_cols; +// gko::size_type dtotal_cols; +// +// gko::kernels::reference::fbcsr::calculate_total_cols( +// ref, mtx.get(), &total_cols, 2, gko::matrix::default_slice_size); +// gko::kernels::hip::fbcsr::calculate_total_cols( +// hip, dmtx.get(), &dtotal_cols, 2, gko::matrix::default_slice_size); +// +// ASSERT_EQ(total_cols, dtotal_cols); +//} + + +TEST_F(Fbcsr, CalculatesNonzerosPerRow) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared()); +// gko::Array row_nnz(ref, mtx->get_size()[0]); +// gko::Array drow_nnz(hip, dmtx->get_size()[0]); +// +// gko::kernels::reference::fbcsr::calculate_nonzeros_per_row(ref, mtx.get(), +// &row_nnz); +// gko::kernels::hip::fbcsr::calculate_nonzeros_per_row(hip, dmtx.get(), +// &drow_nnz); +// +// GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +//} + + +TEST_F(Fbcsr, ConvertToHybridIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Hybrid_type = gko::matrix::Hybrid<>; +// set_up_apply_data(std::make_shared()); +// auto hybrid_mtx = Hybrid_type::create( +// ref, std::make_shared(2)); +// auto dhybrid_mtx = Hybrid_type::create( +// hip, std::make_shared(2)); +// +// mtx->convert_to(hybrid_mtx.get()); +// dmtx->convert_to(dhybrid_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToHybridIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Hybrid_type = gko::matrix::Hybrid<>; +// set_up_apply_data(std::make_shared()); +// auto hybrid_mtx = Hybrid_type::create( +// ref, std::make_shared(2)); +// auto dhybrid_mtx = Hybrid_type::create( +// hip, std::make_shared(2)); +// +// mtx->move_to(hybrid_mtx.get()); +// dmtx->move_to(dhybrid_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, RecognizeSortedMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip)); +// bool is_sorted_hip{}; +// bool is_sorted_ref{}; +// +// is_sorted_ref = mtx->is_sorted_by_column_index(); +// is_sorted_hip = dmtx->is_sorted_by_column_index(); +// +// ASSERT_EQ(is_sorted_ref, is_sorted_hip); +//} + + +TEST_F(Fbcsr, RecognizeUnsortedMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto uns_mtx = gen_unsorted_mtx(); +// bool is_sorted_hip{}; +// bool is_sorted_ref{}; +// +// is_sorted_ref = uns_mtx.ref->is_sorted_by_column_index(); +// is_sorted_hip = uns_mtx.hip->is_sorted_by_column_index(); +// +// ASSERT_EQ(is_sorted_ref, is_sorted_hip); +//} + + +TEST_F(Fbcsr, SortSortedMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip)); +// +// mtx->sort_by_column_index(); +// dmtx->sort_by_column_index(); +// +// // Values must be unchanged, therefore, tolerance is `0` +// GKO_ASSERT_MTX_NEAR(mtx, dmtx, 0); +//} + + +TEST_F(Fbcsr, SortUnsortedMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto uns_mtx = gen_unsorted_mtx(); +// +// uns_mtx.ref->sort_by_column_index(); +// uns_mtx.hip->sort_by_column_index(); +// +// // Values must be unchanged, therefore, tolerance is `0` +// GKO_ASSERT_MTX_NEAR(uns_mtx.ref, uns_mtx.hip, 0); +//} + + +TEST_F(Fbcsr, OneAutomaticalWorksWithDifferentMatrices) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto automatical = std::make_shared(hip); +// auto row_len_limit = std::max(automatical->nvidia_row_len_limit, +// automatical->amd_row_len_limit); +// auto load_balance_mtx = Mtx::create(ref); +// auto classical_mtx = Mtx::create(ref); +// load_balance_mtx->copy_from( +// gen_mtx(1, row_len_limit + 1000, row_len_limit + 1)); +// classical_mtx->copy_from(gen_mtx(50, 50, 1)); +// auto load_balance_mtx_d = Mtx::create(hip); +// auto classical_mtx_d = Mtx::create(hip); +// load_balance_mtx_d->copy_from(load_balance_mtx.get()); +// classical_mtx_d->copy_from(classical_mtx.get()); +// +// load_balance_mtx_d->set_strategy(automatical); +// classical_mtx_d->set_strategy(automatical); +// +// EXPECT_EQ("load_balance", load_balance_mtx_d->get_strategy()->get_name()); +// EXPECT_EQ("classical", classical_mtx_d->get_strategy()->get_name()); +// ASSERT_NE(load_balance_mtx_d->get_strategy().get(), +// classical_mtx_d->get_strategy().get()); +//} + + +TEST_F(Fbcsr, ExtractDiagonalIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip)); +// +// auto diag = mtx->extract_diagonal(); +// auto ddiag = dmtx->extract_diagonal(); +// +// GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +//} + + +TEST_F(Fbcsr, InplaceAbsoluteMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip)); +// +// mtx->compute_absolute_inplace(); +// dmtx->compute_absolute_inplace(); +// +// GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); +//} + + +TEST_F(Fbcsr, OutplaceAbsoluteMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(std::make_shared(hip)); +// +// auto abs_mtx = mtx->compute_absolute(); +// auto dabs_mtx = dmtx->compute_absolute(); +// +// GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +//} + + +TEST_F(Fbcsr, InplaceAbsoluteComplexMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_complex_data(std::make_shared(hip)); +// +// complex_mtx->compute_absolute_inplace(); +// complex_dmtx->compute_absolute_inplace(); +// +// GKO_ASSERT_MTX_NEAR(complex_mtx, complex_dmtx, 1e-14); +//} + + +TEST_F(Fbcsr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_complex_data(std::make_shared(hip)); +// +// auto abs_mtx = complex_mtx->compute_absolute(); +// auto dabs_mtx = complex_dmtx->compute_absolute(); +// +// GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +//} + + +} // namespace diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp new file mode 100644 index 00000000000..ac70964dfbc --- /dev/null +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -0,0 +1,569 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MATRIX_FBCSR_HPP_ +#define GKO_CORE_MATRIX_FBCSR_HPP_ + + +#include +#include +#include + +#include "matrix_strategies.hpp" + + +namespace gko { +namespace matrix { + + +template +class Dense; + +template +class Coo; + +template +class Ell; + +template +class Hybrid; + +template +class Sellp; + +template +class SparsityCsr; + +template +class Fbcsr; + +template +class FbcsrBuilder; + + +/** + * FBCSR is a matrix format which stores only the nonzero coefficients by + * compressing each row of the matrix (compressed sparse row format). + * However, unlike Csr, each non-zero location stores a small dense block of + * entries having a constant size. This reduces the number of integers that need + * to be stored in order to refer to a given non-zero entry, and enables + * efficient implementation of certain block methods. + * + * The entries within each dense block are stored row-major. + * + * @note The total number of rows and the number of columns are expected to be + * divisible by the block size. + * + * The nonzero elements are stored in a 1D array row-wise, and accompanied + * with a row pointer array which stores the starting index of each block-row. + * An additional block-column index array is used to identify the block-column + * of each nonzero block. + * + * The Fbcsr LinOp supports different operations: + * + * ```cpp + * matrix::Fbcsr *A, *B, *C; // matrices + * matrix::Dense *b, *x; // vectors tall-and-skinny matrices + * matrix::Dense *alpha, *beta; // scalars of dimension 1x1 + * matrix::Identity *I; // identity matrix + * + * // Applying to Dense matrices computes an SpMV/SpMM product + * A->apply(b, x) // x = A*b + * A->apply(alpha, b, beta, x) // x = alpha*A*b + beta*x + * + * // Applying to Fbcsr matrices computes a SpGEMM product of two sparse + * matrices A->apply(B, C) // C = A*B A->apply(alpha, B, beta, C) + * // C = alpha*A*B + beta*C + * + * // Applying to an Identity matrix computes a SpGEAM sparse matrix addition + * A->apply(alpha, I, beta, B) // B = alpha*A + beta*B + * ``` + * Both the SpGEMM and SpGEAM operation require the input matrices to be sorted + * by column index, otherwise the algorithms will produce incorrect results. + * + * @tparam ValueType precision of matrix elements + * @tparam IndexType precision of matrix indexes + * + * @ingroup fbcsr + * @ingroup mat_formats + * @ingroup LinOp + */ +template +class Fbcsr : public EnableLinOp>, + public EnableCreateMethod>, + public ConvertibleTo, IndexType>>, + public ConvertibleTo>, + public ConvertibleTo>, + // public ConvertibleTo>, + // public ConvertibleTo>, + // public ConvertibleTo>, + public ConvertibleTo>, + public DiagonalExtractable, + public ReadableFromMatrixData, + public WritableToMatrixData, + public Transposable, + public Permutable, + public EnableAbsoluteComputation< + remove_complex>> { + friend class EnableCreateMethod; + friend class EnablePolymorphicObject; + friend class Coo; + friend class Dense; + // friend class Ell; + // friend class Hybrid; + // friend class Sellp; + friend class SparsityCsr; + friend class FbcsrBuilder; + friend class Fbcsr, IndexType>; + +public: + using value_type = ValueType; + using index_type = IndexType; + using transposed_type = Fbcsr; + using mat_data = matrix_data; + using absolute_type = remove_complex; + + using strategy_type = + matrix_strategy::strategy_type>; + + + void convert_to(Fbcsr *result) const override + { + bool same_executor = this->get_executor() == result->get_executor(); + // NOTE: as soon as strategies are improved, this can be reverted + result->values_ = this->values_; + result->col_idxs_ = this->col_idxs_; + result->row_ptrs_ = this->row_ptrs_; + result->startrow_ = this->startrow_; + result->set_size(this->get_size()); + result->bs_ = this->bs_; + if (!same_executor) { + convert_strategy_helper(result); + } else { + result->set_strategy(std::move(this->get_strategy()->copy())); + } + // END NOTE + } + + void move_to(Fbcsr *result) override + { + bool same_executor = this->get_executor() == result->get_executor(); + EnableLinOp::move_to(result); + if (!same_executor) { + matrix_strategy::strategy_rebuild_helper(result); + } + } + friend class Fbcsr, IndexType>; + + void convert_to( + Fbcsr, IndexType> *result) const override; + + void move_to(Fbcsr, IndexType> *result) override; + + void convert_to(Dense *other) const override; + + void move_to(Dense *other) override; + + void convert_to(Coo *result) const override; + + void move_to(Coo *result) override; + + // void convert_to(Ell *result) const override; + + // void move_to(Ell *result) override; + + // void convert_to(Hybrid *result) const override; + + // void move_to(Hybrid *result) override; + + // void convert_to(Sellp *result) const override; + + // void move_to(Sellp *result) override; + + void convert_to(SparsityCsr *result) const override; + + void move_to(SparsityCsr *result) override; + + /// Convert COO data into block CSR + /** @warning Unlike Csr::read, here explicit non-zeros are NOT dropped. + */ + void read(const mat_data &data) override; + + void write(mat_data &data) const override; + + std::unique_ptr transpose() const override; + + std::unique_ptr conj_transpose() const override; + + std::unique_ptr row_permute( + const Array *permutation_indices) const override; + + std::unique_ptr column_permute( + const Array *permutation_indices) const override; + + std::unique_ptr inverse_row_permute( + const Array *inverse_permutation_indices) const override; + + std::unique_ptr inverse_column_permute( + const Array *inverse_permutation_indices) const override; + + std::unique_ptr> extract_diagonal() const override; + + std::unique_ptr compute_absolute() const override; + + void compute_absolute_inplace() override; + + /** + * Sorts all (value, col_idx) pairs in each row by column index + */ + void sort_by_column_index(); + + /* + * Tests if all row entry pairs (value, col_idx) are sorted by column index + * + * @returns True if all row entry pairs (value, col_idx) are sorted by + * column index + */ + bool is_sorted_by_column_index() const; + + /** + * Returns the values of the matrix. + * + * @return the values of the matrix. + */ + value_type *get_values() noexcept { return values_.get_data(); } + + /// @see Fbcsr::get_const_values() + const value_type *get_values() const noexcept + { + return values_.get_const_data(); + } + + /** + * @copydoc Fbcsr::get_values() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const value_type *get_const_values() const noexcept + { + return values_.get_const_data(); + } + + /** + * Returns the column indexes of the matrix. + * + * @return the column indexes of the matrix. + */ + index_type *get_col_idxs() noexcept { return col_idxs_.get_data(); } + + /// @see Fbcsr::get_const_col_idxs() + const index_type *get_col_idxs() const noexcept + { + return col_idxs_.get_const_data(); + } + + /** + * @copydoc Fbcsr::get_col_idxs() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const index_type *get_const_col_idxs() const noexcept + { + return col_idxs_.get_const_data(); + } + + /** + * Returns the row pointers of the matrix. + * + * @return the row pointers of the matrix. + */ + index_type *get_row_ptrs() noexcept { return row_ptrs_.get_data(); } + + /// @see Fbcsr::get_const_row_ptrs() + const index_type *get_row_ptrs() const noexcept + { + return row_ptrs_.get_const_data(); + } + + /** + * @copydoc Fbcsr::get_row_ptrs() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const index_type *get_const_row_ptrs() const noexcept + { + return row_ptrs_.get_const_data(); + } + + /** + * Returns the starting rows. + * + * @return the starting rows. + */ + index_type *get_srow() noexcept { return startrow_.get_data(); } + + const index_type *get_srow() const noexcept + { + return startrow_.get_const_data(); + } + + /** + * @copydoc Fbcsr::get_srow() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const index_type *get_const_srow() const noexcept + { + return startrow_.get_const_data(); + } + + /** + * Returns the number of the srow stored elements (involved warps) + * + * @return the number of the srow stored elements (involved warps) + */ + size_type get_num_srow_elements() const noexcept + { + return startrow_.get_num_elems(); + } + + /** + * Returns the number of elements explicitly stored in the matrix. + * + * @return the number of elements explicitly stored in the matrix + */ + size_type get_num_stored_elements() const noexcept + { + return values_.get_num_elems(); + } + + /** Returns the strategy + * + * @return the strategy + */ + std::shared_ptr get_strategy() const noexcept + { + return strategy_; + } + + /** + * Set the strategy + * + * @param strategy the fbcsr strategy + */ + void set_strategy(std::shared_ptr strategy) + { + strategy_ = std::move(strategy->copy()); + this->make_srow(); + } + + int get_block_size() const { return bs_; } + + void set_block_size(const int block_size) { bs_ = block_size; } + +protected: + using classical = matrix_strategy::classical>; + + /** + * Creates an uninitialized FBCSR matrix with a block size of 1. + * + * @param exec Executor associated to the matrix + * @param strategy the strategy of FBCSR + */ + Fbcsr(std::shared_ptr exec, + std::shared_ptr strategy) + : Fbcsr(std::move(exec), dim<2>{}, {}, 1, std::move(strategy)) + {} + + /** + * Creates an uninitialized FBCSR matrix of the specified size. + * + * @param exec Executor associated to the matrix + * @param size size of the matrix + * @param num_nonzeros number of nonzeros + * @param block_size Size of the small dense square blocks + * @param strategy the strategy of FBCSR + */ + Fbcsr(std::shared_ptr exec, const dim<2> &size = dim<2>{}, + size_type num_nonzeros = {}, int block_size = 1, + std::shared_ptr strategy = + std::make_shared()); + + /** + * Creates a FBCSR matrix from already allocated (and initialized) row + * pointer, column index and value arrays. + * + * @tparam ValuesArray type of `values` array + * @tparam ColIdxsArray type of `col_idxs` array + * @tparam RowPtrsArray type of `row_ptrs` array + * + * @param exec Executor associated to the matrix + * @param size size of the matrix + * @param block_size + * @param values array of matrix values + * @param col_idxs array of column indexes + * @param row_ptrs array of row pointers + * + * @note If one of `row_ptrs`, `col_idxs` or `values` is not an rvalue, not + * an array of IndexType, IndexType and ValueType, respectively, or + * is on the wrong executor, an internal copy of that array will be + * created, and the original array data will not be used in the + * matrix. + */ + template + Fbcsr( + std::shared_ptr exec, const dim<2> &size, + int block_size, ValuesArray &&values, ColIdxsArray &&col_idxs, + RowPtrsArray &&row_ptrs, + std::shared_ptr strategy = std::make_shared()) + : EnableLinOp(exec, size), + bs_{block_size}, + values_{exec, std::forward(values)}, + col_idxs_{exec, std::forward(col_idxs)}, + row_ptrs_{exec, std::forward(row_ptrs)}, + startrow_(exec), + strategy_(strategy->copy()) + { + GKO_ASSERT_EQ(values_.get_num_elems(), + col_idxs_.get_num_elems() * bs_ * bs_); + GKO_ASSERT_EQ(this->get_size()[0] / bs_ + 1, row_ptrs_.get_num_elems()); + this->make_srow(); + } + + void apply_impl(const LinOp *b, LinOp *x) const override; + + void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, + LinOp *x) const override; + + // TODO clean this up as soon as we improve strategy_type + template + void convert_strategy_helper(FbcsrType *result) const + { + auto strat = this->get_strategy().get(); + std::shared_ptr> + new_strat; + using classical = matrix_strategy::classical; + using load_balance = matrix_strategy::load_balance; + using automatic = matrix_strategy::automatic; + + if (dynamic_cast(strat)) { + new_strat = std::make_shared(); + } else { + auto rexec = result->get_executor(); + auto cuda_exec = + std::dynamic_pointer_cast(rexec); + auto hip_exec = std::dynamic_pointer_cast(rexec); + auto lb = dynamic_cast(strat); + if (cuda_exec) { + if (lb) { + new_strat = std::make_shared(cuda_exec); + } else { + new_strat = std::make_shared(cuda_exec); + } + } else if (hip_exec) { + if (lb) { + new_strat = std::make_shared(hip_exec); + } else { + new_strat = std::make_shared(hip_exec); + } + } else { + // Try to preserve this executor's configuration + auto this_cuda_exec = + std::dynamic_pointer_cast( + this->get_executor()); + auto this_hip_exec = + std::dynamic_pointer_cast( + this->get_executor()); + if (this_cuda_exec) { + if (lb) { + new_strat = + std::make_shared(this_cuda_exec); + } else { + new_strat = std::make_shared(this_cuda_exec); + } + } else if (this_hip_exec) { + if (lb) { + new_strat = + std::make_shared(this_hip_exec); + } else { + new_strat = std::make_shared(this_hip_exec); + } + } else { + // We had a load balance or automatic strategy from a non + // HIP or Cuda executor and are moving to a non HIP or Cuda + // executor. + // FIXME this creates a long delay + if (lb) { + new_strat = std::make_shared(); + } else { + new_strat = std::make_shared(); + } + } + } + } + result->set_strategy(new_strat); + } + + /** + * Computes srow. It should be run after changing any row_ptrs_ value. + */ + void make_srow() + { + startrow_.resize_and_reset( + strategy_->calc_size(values_.get_num_elems() / bs_ / bs_)); + strategy_->process(row_ptrs_, &startrow_); + } + +private: + int bs_; ///< Block size + Array values_; + Array col_idxs_; + Array row_ptrs_; + Array startrow_; + std::shared_ptr strategy_; +}; + + +} // namespace matrix +} // namespace gko + + +#endif // GKO_CORE_MATRIX_FBCSR_HPP_ diff --git a/include/ginkgo/core/matrix/matrix_strategies.hpp b/include/ginkgo/core/matrix/matrix_strategies.hpp new file mode 100644 index 00000000000..c774209f7d7 --- /dev/null +++ b/include/ginkgo/core/matrix/matrix_strategies.hpp @@ -0,0 +1,503 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MATRIX_MATRIX_STRATEGY_HPP_ +#define GKO_CORE_MATRIX_MATRIX_STRATEGY_HPP_ + + +#include +#include + + +namespace gko { +namespace matrix { + + +namespace matrix_strategy { + + +template +class automatic; + +/** + * strategy_type is to decide how to set the fbcsr algorithm. + * + * The practical strategy method should inherit strategy_type and implement + * its `process`, `calc_size` function and the corresponding device kernel. + */ +template +class strategy_type { + friend class automatic; + +public: + using index_type = typename MtxType::index_type; + + /** + * Creates a strategy_type. + * + * @param name the name of strategy + */ + strategy_type(std::string name) : name_(name) {} + + /** + * Returns the name of strategy + * + * @return the name of strategy + */ + std::string get_name() { return name_; } + + /** + * Computes srow according to row pointers. + * + * @param mtx_row_ptrs the row pointers of the matrix + * @param mtx_srow the srow of the matrix + */ + virtual void process(const Array &mtx_row_ptrs, + Array *mtx_srow) = 0; + + /** + * Computes the srow size according to the number of nonzeros. + * + * @param nnz the number of nonzeros + * + * @return the size of srow + */ + virtual int64_t calc_size(const int64_t nnz) = 0; + + /** + * Copy a strategy. This is a workaround until strategies are revamped, + * since strategies like `automatic` do not work when actually shared. + */ + virtual std::shared_ptr copy() = 0; + +protected: + void set_name(std::string name) { name_ = name; } + +private: + std::string name_; +}; + +/** + * classical is a strategy_type which uses the same number of threads on + * each block-row. Classical strategy uses multithreads to calculate on parts of + * rows and then do a reduction of these threads results. The number of + * threads per row depends on the max number of stored elements per row. + */ +template +class classical : public strategy_type { +public: + using index_type = typename strategy_type::index_type; + + /** + * Creates a classical strategy. + */ + classical() : strategy_type("classical"), max_length_per_row_(0) {} + + void process(const Array &mtx_row_ptrs, + Array *mtx_srow) override + { + auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master(); + Array row_ptrs_host(host_mtx_exec); + const bool is_mtx_on_host{host_mtx_exec == mtx_row_ptrs.get_executor()}; + const index_type *row_ptrs{}; + if (is_mtx_on_host) { + row_ptrs = mtx_row_ptrs.get_const_data(); + } else { + row_ptrs_host = mtx_row_ptrs; + row_ptrs = row_ptrs_host.get_const_data(); + } + auto num_rows = mtx_row_ptrs.get_num_elems() - 1; + max_length_per_row_ = 0; + for (index_type i = 1; i < num_rows + 1; i++) { + max_length_per_row_ = + std::max(max_length_per_row_, row_ptrs[i] - row_ptrs[i - 1]); + } + } + + int64_t calc_size(const int64_t nnz) override { return 0; } + + index_type get_max_length_per_row() const noexcept + { + return max_length_per_row_; + } + + std::shared_ptr> copy() override + { + return std::make_shared>(); + } + +private: + index_type max_length_per_row_; +}; + +/** + * load_balance is a strategy_type which uses the load balance algorithm. + */ +template +class load_balance : public strategy_type { +public: + using index_type = typename strategy_type::index_type; + + /** + * Creates a load_balance strategy. + */ + load_balance() + : load_balance(std::move( + gko::CudaExecutor::create(0, gko::OmpExecutor::create()))) + {} + + /** + * Creates a load_balance strategy with CUDA executor. + * + * @param exec the CUDA executor + */ + load_balance(std::shared_ptr exec) + : load_balance(exec->get_num_warps(), exec->get_warp_size()) + {} + + /** + * Creates a load_balance strategy with HIP executor. + * + * @param exec the HIP executor + */ + load_balance(std::shared_ptr exec) + : load_balance(exec->get_num_warps(), exec->get_warp_size(), false) + {} + + /** + * Creates a load_balance strategy with specified parameters + * + * @param nwarps The number of warps in the executor + * @param warp_size The warp size of the executor + * @param cuda_params Whether Nvidia-based warp parameters should be used. + * + * @note The warp_size must be the size of full warp. When using this + * constructor, set_strategy needs to be called with correct + * parameters which is replaced during the conversion. + */ + load_balance(int64_t nwarps, int warp_size = 32, bool cuda_params = true) + : strategy_type("load_balance"), + nwarps_(nwarps), + warp_size_(warp_size), + cuda_params_(cuda_params) + {} + + void process(const Array &mtx_row_ptrs, + Array *mtx_srow) override + { + auto nwarps = mtx_srow->get_num_elems(); + + if (nwarps > 0) { + auto host_srow_exec = mtx_srow->get_executor()->get_master(); + auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master(); + const bool is_srow_on_host{host_srow_exec == + mtx_srow->get_executor()}; + const bool is_mtx_on_host{host_mtx_exec == + mtx_row_ptrs.get_executor()}; + Array row_ptrs_host(host_mtx_exec); + Array srow_host(host_srow_exec); + const index_type *row_ptrs{}; + index_type *srow{}; + if (is_srow_on_host) { + srow = mtx_srow->get_data(); + } else { + srow_host = *mtx_srow; + srow = srow_host.get_data(); + } + if (is_mtx_on_host) { + row_ptrs = mtx_row_ptrs.get_const_data(); + } else { + row_ptrs_host = mtx_row_ptrs; + row_ptrs = row_ptrs_host.get_const_data(); + } + for (size_type i = 0; i < nwarps; i++) { + srow[i] = 0; + } + const size_type num_rows = mtx_row_ptrs.get_num_elems() - 1; + const index_type num_elems = row_ptrs[num_rows]; + for (size_type i = 0; i < num_rows; i++) { + const auto num = + (ceildiv(row_ptrs[i + 1], warp_size_) * nwarps); + const auto den = ceildiv(num_elems, warp_size_); + auto bucket = ceildiv(num, den); + if (bucket < nwarps) { + srow[bucket]++; + } + } + // find starting row for thread i + for (size_type i = 1; i < nwarps; i++) { + srow[i] += srow[i - 1]; + } + if (!is_srow_on_host) { + *mtx_srow = srow_host; + } + } + } + + int64_t calc_size(const int64_t nnz) override + { + if (warp_size_ > 0) { + int multiple = 8; + if (nnz >= 2e8) { + multiple = 2048; + } else if (nnz >= 2e7) { + multiple = 512; + } else if (nnz >= 2e6) { + multiple = 128; + } else if (nnz >= 2e5) { + multiple = 32; + } + +#if GINKGO_HIP_PLATFORM_HCC + if (!cuda_params_) { + multiple = 8; + if (nnz >= 1e7) { + multiple = 64; + } else if (nnz >= 1e6) { + multiple = 16; + } + } +#endif // GINKGO_HIP_PLATFORM_HCC + + auto nwarps = nwarps_ * multiple; + return min(ceildiv(nnz, warp_size_), int64_t(nwarps)); + } else { + return 0; + } + } + + std::shared_ptr> copy() override + { + return std::make_shared>(nwarps_, warp_size_, + cuda_params_); + } + +private: + int64_t nwarps_; + int warp_size_; + bool cuda_params_; +}; + +template +class automatic : public strategy_type { +public: + using index_type = typename strategy_type::index_type; + + /* Use imbalance strategy when the maximum number of nonzero per row is + * more than 1024 on NVIDIA hardware */ + const index_type nvidia_row_len_limit = 1024; + /* Use imbalance strategy when the matrix has more more than 1e6 on + * NVIDIA hardware */ + const index_type nvidia_nnz_limit = 1e6; + /* Use imbalance strategy when the maximum number of nonzero per row is + * more than 768 on AMD hardware */ + const index_type amd_row_len_limit = 768; + /* Use imbalance strategy when the matrix has more more than 1e8 on AMD + * hardware */ + const index_type amd_nnz_limit = 1e8; + + /** + * Creates an automatic strategy. + */ + automatic() + : automatic(std::move( + gko::CudaExecutor::create(0, gko::OmpExecutor::create()))) + {} + + /** + * Creates an automatic strategy with CUDA executor. + * + * @param exec the CUDA executor + */ + automatic(std::shared_ptr exec) + : automatic(exec->get_num_warps(), exec->get_warp_size()) + {} + + /** + * Creates an automatic strategy with HIP executor. + * + * @param exec the HIP executor + */ + automatic(std::shared_ptr exec) + : automatic(exec->get_num_warps(), exec->get_warp_size(), false) + {} + + /** + * Creates an automatic strategy with specified parameters + * + * @param nwarps the number of warps in the executor + * @param warp_size the warp size of the executor + * @param cuda_strategy whether the `cuda_strategy` needs to be used. + * + * @note The warp_size must be the size of full warp. When using this + * constructor, set_strategy needs to be called with correct + * parameters which is replaced during the conversion. + */ + automatic(int64_t nwarps, int warp_size = 32, bool cuda_strategy = true) + : strategy_type("automatic"), + nwarps_(nwarps), + warp_size_(warp_size), + cuda_strategy_(cuda_strategy), + max_length_per_row_(0) + {} + + void process(const Array &mtx_row_ptrs, + Array *mtx_srow) override + { + // if the number of stored elements is larger than or + // the maximum number of stored elements per row is larger than + // , use load_balance otherwise use classical + index_type nnz_limit = nvidia_nnz_limit; + index_type row_len_limit = nvidia_row_len_limit; +#if GINKGO_HIP_PLATFORM_HCC + if (!cuda_strategy_) { + nnz_limit = amd_nnz_limit; + row_len_limit = amd_row_len_limit; + } +#endif // GINKGO_HIP_PLATFORM_HCC + auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master(); + const bool is_mtx_on_host{host_mtx_exec == mtx_row_ptrs.get_executor()}; + Array row_ptrs_host(host_mtx_exec); + const index_type *row_ptrs{}; + if (is_mtx_on_host) { + row_ptrs = mtx_row_ptrs.get_const_data(); + } else { + row_ptrs_host = mtx_row_ptrs; + row_ptrs = row_ptrs_host.get_const_data(); + } + const auto num_rows = mtx_row_ptrs.get_num_elems() - 1; + if (row_ptrs[num_rows] > nnz_limit) { + load_balance actual_strategy(nwarps_, warp_size_, + cuda_strategy_); + if (is_mtx_on_host) { + actual_strategy.process(mtx_row_ptrs, mtx_srow); + } else { + actual_strategy.process(row_ptrs_host, mtx_srow); + } + this->set_name(actual_strategy.get_name()); + } else { + index_type maxnum = 0; + for (index_type i = 1; i < num_rows + 1; i++) { + maxnum = std::max(maxnum, row_ptrs[i] - row_ptrs[i - 1]); + } + if (maxnum > row_len_limit) { + load_balance actual_strategy(nwarps_, warp_size_, + cuda_strategy_); + if (is_mtx_on_host) { + actual_strategy.process(mtx_row_ptrs, mtx_srow); + } else { + actual_strategy.process(row_ptrs_host, mtx_srow); + } + this->set_name(actual_strategy.get_name()); + } else { + classical actual_strategy; + if (is_mtx_on_host) { + actual_strategy.process(mtx_row_ptrs, mtx_srow); + max_length_per_row_ = + actual_strategy.get_max_length_per_row(); + } else { + actual_strategy.process(row_ptrs_host, mtx_srow); + max_length_per_row_ = + actual_strategy.get_max_length_per_row(); + } + this->set_name(actual_strategy.get_name()); + } + } + } + + int64_t calc_size(const int64_t nnz) override + { + return std::make_shared>(nwarps_, warp_size_, + cuda_strategy_) + ->calc_size(nnz); + } + + index_type get_max_length_per_row() const noexcept + { + return max_length_per_row_; + } + + std::shared_ptr> copy() override + { + return std::make_shared>(nwarps_, warp_size_, + cuda_strategy_); + } + +private: + int64_t nwarps_; + int warp_size_; + bool cuda_strategy_; + index_type max_length_per_row_; +}; + + +/** + * When strategy is load_balance or automatic, rebuild the strategy + * according to executor's property. + * + * @param result the matrix. + */ +template +void strategy_rebuild_helper(MtxType *const result) +{ + // TODO (script:fbcsr): change the code imported from matrix/csr if needed + // using load_balance = typename Fbcsr::load_balance; + // using automatic = typename Fbcsr::automatic; + auto strategy = result->get_strategy(); + auto executor = result->get_executor(); + if (std::dynamic_pointer_cast>(strategy)) { + if (auto exec = + std::dynamic_pointer_cast(executor)) { + result->set_strategy(std::make_shared>(exec)); + } else if (auto exec = std::dynamic_pointer_cast( + executor)) { + result->set_strategy(std::make_shared>(exec)); + } + } else if (std::dynamic_pointer_cast>(strategy)) { + if (auto exec = + std::dynamic_pointer_cast(executor)) { + result->set_strategy(std::make_shared>(exec)); + } else if (auto exec = std::dynamic_pointer_cast( + executor)) { + result->set_strategy(std::make_shared>(exec)); + } + } +} + + +} // namespace matrix_strategy + + +} // namespace matrix +} // namespace gko + +#endif diff --git a/include/ginkgo/core/matrix/sparsity_csr.hpp b/include/ginkgo/core/matrix/sparsity_csr.hpp index 087c0538b7d..b5f16f5ae3e 100644 --- a/include/ginkgo/core/matrix/sparsity_csr.hpp +++ b/include/ginkgo/core/matrix/sparsity_csr.hpp @@ -48,6 +48,9 @@ namespace matrix { template class Csr; +template +class Fbcsr; + /** * SparsityCsr is a matrix format which stores only the sparsity pattern of a @@ -77,6 +80,7 @@ class SparsityCsr friend class EnableCreateMethod; friend class EnablePolymorphicObject; friend class Csr; + friend class Fbcsr; public: using EnableLinOp::convert_to; diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 3ae5bcfdd96..30408b8970b 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -81,8 +81,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include +#include #include #include #include diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index 80c6262c30c..acfe33d3ef6 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -20,6 +20,7 @@ target_sources(ginkgo_omp matrix/dense_kernels.cpp matrix/diagonal_kernels.cpp matrix/ell_kernels.cpp + matrix/fbcsr_kernels.cpp matrix/hybrid_kernels.cpp matrix/sellp_kernels.cpp matrix/sparsity_csr_kernels.cpp diff --git a/omp/components/fbcsr_spgeam.hpp b/omp/components/fbcsr_spgeam.hpp new file mode 100644 index 00000000000..e4a06532ed3 --- /dev/null +++ b/omp/components/fbcsr_spgeam.hpp @@ -0,0 +1,31 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ diff --git a/omp/matrix/fbcsr_kernels.cpp b/omp/matrix/fbcsr_kernels.cpp new file mode 100644 index 00000000000..3ad97e0e6bc --- /dev/null +++ b/omp/matrix/fbcsr_kernels.cpp @@ -0,0 +1,884 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_kernels.hpp" + + +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/base/allocator.hpp" +#include "core/base/iterator_factory.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/fbcsr_builder.hpp" +#include "omp/components/fbcsr_spgeam.hpp" +#include "omp/components/format_conversion.hpp" + + +namespace gko { +namespace kernels { +namespace omp { +/** + * @brief The Compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +template +void spmv(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto row_ptrs = a->get_const_row_ptrs(); +// auto col_idxs = a->get_const_col_idxs(); +// auto vals = a->get_const_values(); +// +//#pragma omp parallel for +// for (size_type row = 0; row < a->get_size()[0]; ++row) { +// for (size_type j = 0; j < c->get_size()[1]; ++j) { +// c->at(row, j) = zero(); +// } +// for (size_type k = row_ptrs[row]; +// k < static_cast(row_ptrs[row + 1]); ++k) { +// auto val = vals[k]; +// auto col = col_idxs[k]; +// for (size_type j = 0; j < c->get_size()[1]; ++j) { +// c->at(row, j) += val * b->at(col, j); +// } +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto row_ptrs = a->get_const_row_ptrs(); +// auto col_idxs = a->get_const_col_idxs(); +// auto vals = a->get_const_values(); +// auto valpha = alpha->at(0, 0); +// auto vbeta = beta->at(0, 0); +// +//#pragma omp parallel for +// for (size_type row = 0; row < a->get_size()[0]; ++row) { +// for (size_type j = 0; j < c->get_size()[1]; ++j) { +// c->at(row, j) *= vbeta; +// } +// for (size_type k = row_ptrs[row]; +// k < static_cast(row_ptrs[row + 1]); ++k) { +// auto val = vals[k]; +// auto col = col_idxs[k]; +// for (size_type j = 0; j < c->get_size()[1]; ++j) { +// c->at(row, j) += valpha * val * b->at(col, j); +// } +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + + +template +void spgemm_insert_row(unordered_set &cols, + const matrix::Fbcsr *c, + size_type row) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto row_ptrs = c->get_const_row_ptrs(); +// auto col_idxs = c->get_const_col_idxs(); +// cols.insert(col_idxs + row_ptrs[row], col_idxs + row_ptrs[row + 1]); +//} + + +template +void spgemm_insert_row2(unordered_set &cols, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + size_type row) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto a_row_ptrs = a->get_const_row_ptrs(); +// auto a_col_idxs = a->get_const_col_idxs(); +// auto b_row_ptrs = b->get_const_row_ptrs(); +// auto b_col_idxs = b->get_const_col_idxs(); +// for (size_type a_nz = a_row_ptrs[row]; +// a_nz < size_type(a_row_ptrs[row + 1]); ++a_nz) { +// auto a_col = a_col_idxs[a_nz]; +// auto b_row = a_col; +// cols.insert(b_col_idxs + b_row_ptrs[b_row], +// b_col_idxs + b_row_ptrs[b_row + 1]); +// } +//} + + +template +void spgemm_accumulate_row(map &cols, + const matrix::Fbcsr *c, + ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto row_ptrs = c->get_const_row_ptrs(); +// auto col_idxs = c->get_const_col_idxs(); +// auto vals = c->get_const_values(); +// for (size_type c_nz = row_ptrs[row]; c_nz < size_type(row_ptrs[row + 1]); +// ++c_nz) { +// auto c_col = col_idxs[c_nz]; +// auto c_val = vals[c_nz]; +// cols[c_col] += scale * c_val; +// } +//} + + +template +void spgemm_accumulate_row2(map &cols, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto a_row_ptrs = a->get_const_row_ptrs(); +// auto a_col_idxs = a->get_const_col_idxs(); +// auto a_vals = a->get_const_values(); +// auto b_row_ptrs = b->get_const_row_ptrs(); +// auto b_col_idxs = b->get_const_col_idxs(); +// auto b_vals = b->get_const_values(); +// for (size_type a_nz = a_row_ptrs[row]; +// a_nz < size_type(a_row_ptrs[row + 1]); ++a_nz) { +// auto a_col = a_col_idxs[a_nz]; +// auto a_val = a_vals[a_nz]; +// auto b_row = a_col; +// for (size_type b_nz = b_row_ptrs[b_row]; +// b_nz < size_type(b_row_ptrs[b_row + 1]); ++b_nz) { +// auto b_col = b_col_idxs[b_nz]; +// auto b_val = b_vals[b_nz]; +// cols[b_col] += scale * a_val * b_val; +// } +// } +//} + + +template +void spgemm(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = a->get_size()[0]; +// +// // first sweep: count nnz for each row +// auto c_row_ptrs = c->get_row_ptrs(); +// +// unordered_set local_col_idxs(exec); +//#pragma omp parallel for firstprivate(local_col_idxs) +// for (size_type a_row = 0; a_row < num_rows; ++a_row) { +// local_col_idxs.clear(); +// spgemm_insert_row2(local_col_idxs, a, b, a_row); +// c_row_ptrs[a_row] = local_col_idxs.size(); +// } +// +// // build row pointers +// components::prefix_sum(exec, c_row_ptrs, num_rows + 1); +// +// // second sweep: accumulate non-zeros +// auto new_nnz = c_row_ptrs[num_rows]; +// matrix::FbcsrBuilder c_builder{c}; +// auto &c_col_idxs_array = c_builder.get_col_idx_array(); +// auto &c_vals_array = c_builder.get_value_array(); +// c_col_idxs_array.resize_and_reset(new_nnz); +// c_vals_array.resize_and_reset(new_nnz); +// auto c_col_idxs = c_col_idxs_array.get_data(); +// auto c_vals = c_vals_array.get_data(); +// +// map local_row_nzs(exec); +//#pragma omp parallel for firstprivate(local_row_nzs) +// for (size_type a_row = 0; a_row < num_rows; ++a_row) { +// local_row_nzs.clear(); +// spgemm_accumulate_row2(local_row_nzs, a, b, one(), a_row); +// // store result +// auto c_nz = c_row_ptrs[a_row]; +// for (auto pair : local_row_nzs) { +// c_col_idxs[c_nz] = pair.first; +// c_vals[c_nz] = pair.second; +// ++c_nz; +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEMM_KERNEL); + + +template +void advanced_spgemm(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + const matrix::Dense *beta, + const matrix::Fbcsr *d, + matrix::Fbcsr *c) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = a->get_size()[0]; +// auto valpha = alpha->at(0, 0); +// auto vbeta = beta->at(0, 0); +// +// // first sweep: count nnz for each row +// auto c_row_ptrs = c->get_row_ptrs(); +// +// unordered_set local_col_idxs(exec); +//#pragma omp parallel for firstprivate(local_col_idxs) +// for (size_type a_row = 0; a_row < num_rows; ++a_row) { +// local_col_idxs.clear(); +// spgemm_insert_row(local_col_idxs, d, a_row); +// spgemm_insert_row2(local_col_idxs, a, b, a_row); +// c_row_ptrs[a_row] = local_col_idxs.size(); +// } +// +// // build row pointers +// components::prefix_sum(exec, c_row_ptrs, num_rows + 1); +// +// // second sweep: accumulate non-zeros +// auto new_nnz = c_row_ptrs[num_rows]; +// matrix::FbcsrBuilder c_builder{c}; +// auto &c_col_idxs_array = c_builder.get_col_idx_array(); +// auto &c_vals_array = c_builder.get_value_array(); +// c_col_idxs_array.resize_and_reset(new_nnz); +// c_vals_array.resize_and_reset(new_nnz); +// auto c_col_idxs = c_col_idxs_array.get_data(); +// auto c_vals = c_vals_array.get_data(); +// +// map local_row_nzs(exec); +//#pragma omp parallel for firstprivate(local_row_nzs) +// for (size_type a_row = 0; a_row < num_rows; ++a_row) { +// local_row_nzs.clear(); +// spgemm_accumulate_row(local_row_nzs, d, vbeta, a_row); +// spgemm_accumulate_row2(local_row_nzs, a, b, valpha, a_row); +// // store result +// auto c_nz = c_row_ptrs[a_row]; +// for (auto pair : local_row_nzs) { +// c_col_idxs[c_nz] = pair.first; +// c_vals[c_nz] = pair.second; +// ++c_nz; +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL); + + +template +void spgeam(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *beta, + const matrix::Fbcsr *b, + matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = a->get_size()[0]; +// auto valpha = alpha->at(0, 0); +// auto vbeta = beta->at(0, 0); +// +// // first sweep: count nnz for each row +// auto c_row_ptrs = c->get_row_ptrs(); +// +// abstract_spgeam( +// a, b, [](IndexType) { return IndexType{}; }, +// [](IndexType, IndexType, ValueType, ValueType, IndexType &nnz) { +// ++nnz; +// }, +// [&](IndexType row, IndexType nnz) { c_row_ptrs[row] = nnz; }); +// +// // build row pointers +// components::prefix_sum(exec, c_row_ptrs, num_rows + 1); +// +// // second sweep: accumulate non-zeros +// auto new_nnz = c_row_ptrs[num_rows]; +// matrix::FbcsrBuilder c_builder{c}; +// auto &c_col_idxs_array = c_builder.get_col_idx_array(); +// auto &c_vals_array = c_builder.get_value_array(); +// c_col_idxs_array.resize_and_reset(new_nnz); +// c_vals_array.resize_and_reset(new_nnz); +// auto c_col_idxs = c_col_idxs_array.get_data(); +// auto c_vals = c_vals_array.get_data(); +// +// abstract_spgeam( +// a, b, [&](IndexType row) { return c_row_ptrs[row]; }, +// [&](IndexType, IndexType col, ValueType a_val, ValueType b_val, +// IndexType &nz) { +// c_vals[nz] = valpha * a_val + vbeta * b_val; +// c_col_idxs[nz] = col; +// ++nz; +// }, +// [](IndexType, IndexType) {}); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEAM_KERNEL); + + +template +void convert_row_ptrs_to_idxs(std::shared_ptr exec, + const IndexType *ptrs, size_type num_rows, + IndexType *idxs) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// convert_ptrs_to_idxs(ptrs, num_rows, idxs); +//} + + +template +void convert_to_coo(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Coo *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = result->get_size()[0]; +// +// auto row_idxs = result->get_row_idxs(); +// const auto source_row_ptrs = source->get_const_row_ptrs(); +// +// convert_row_ptrs_to_idxs(exec, source_row_ptrs, num_rows, row_idxs); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL); + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Dense *result) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = source->get_size()[0]; +// auto num_cols = source->get_size()[1]; +// auto row_ptrs = source->get_const_row_ptrs(); +// auto col_idxs = source->get_const_col_idxs(); +// auto vals = source->get_const_values(); +// +//#pragma omp parallel for +// for (size_type row = 0; row < num_rows; ++row) { +// for (size_type col = 0; col < num_cols; ++col) { +// result->at(row, col) = zero(); +// } +// for (size_type i = row_ptrs[row]; +// i < static_cast(row_ptrs[row + 1]); ++i) { +// result->at(row, col_idxs[i]) = vals[i]; +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_sellp(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Sellp *result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL); + + +template +void convert_to_ell(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Ell *result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL); + + +template +inline void convert_fbcsr_to_csc(size_type num_rows, const IndexType *row_ptrs, + const IndexType *col_idxs, + const ValueType *fbcsr_vals, + IndexType *row_idxs, IndexType *col_ptrs, + ValueType *csc_vals, + UnaryOperator op) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// for (size_type row = 0; row < num_rows; ++row) { +// for (auto i = row_ptrs[row]; i < row_ptrs[row + 1]; ++i) { +// const auto dest_idx = col_ptrs[col_idxs[i]]++; +// row_idxs[dest_idx] = row; +// csc_vals[dest_idx] = op(fbcsr_vals[i]); +// } +// } +//} + + +template +void transpose_and_transform(std::shared_ptr exec, + matrix::Fbcsr *trans, + const matrix::Fbcsr *orig, + UnaryOperator op) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto trans_row_ptrs = trans->get_row_ptrs(); +// auto orig_row_ptrs = orig->get_const_row_ptrs(); +// auto trans_col_idxs = trans->get_col_idxs(); +// auto orig_col_idxs = orig->get_const_col_idxs(); +// auto trans_vals = trans->get_values(); +// auto orig_vals = orig->get_const_values(); +// +// auto orig_num_cols = orig->get_size()[1]; +// auto orig_num_rows = orig->get_size()[0]; +// auto orig_nnz = orig_row_ptrs[orig_num_rows]; +// +// trans_row_ptrs[0] = 0; +// convert_unsorted_idxs_to_ptrs(orig_col_idxs, orig_nnz, trans_row_ptrs + 1, +// orig_num_cols); +// +// convert_fbcsr_to_csc(orig_num_rows, orig_row_ptrs, orig_col_idxs, +// orig_vals, +// trans_col_idxs, trans_row_ptrs + 1, trans_vals, op); +//} + + +template +void transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// transpose_and_transform(exec, trans, orig, +// [](const ValueType x) { return x; }); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// transpose_and_transform(exec, trans, orig, +// [](const ValueType x) { return conj(x); }); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + + +template +void calculate_total_cols(std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result, size_type stride_factor, + size_type slice_size) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL); + + +template +void calculate_max_nnz_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void convert_to_hybrid(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Hybrid *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = result->get_size()[0]; +// auto num_cols = result->get_size()[1]; +// auto strategy = result->get_strategy(); +// auto ell_lim = strategy->get_ell_num_stored_elements_per_row(); +// auto coo_lim = strategy->get_coo_nnz(); +// auto coo_val = result->get_coo_values(); +// auto coo_col = result->get_coo_col_idxs(); +// auto coo_row = result->get_coo_row_idxs(); +// const auto max_nnz_per_row = +// result->get_ell_num_stored_elements_per_row(); +// +//// Initial Hybrid Matrix +//#pragma omp parallel for +// for (size_type i = 0; i < max_nnz_per_row; i++) { +// for (size_type j = 0; j < result->get_ell_stride(); j++) { +// result->ell_val_at(j, i) = zero(); +// result->ell_col_at(j, i) = 0; +// } +// } +// +// const auto fbcsr_row_ptrs = source->get_const_row_ptrs(); +// const auto fbcsr_vals = source->get_const_values(); +// auto coo_offset = Array(exec, num_rows); +// auto coo_offset_val = coo_offset.get_data(); +// +// coo_offset_val[0] = 0; +//#pragma omp parallel for +// for (size_type i = 1; i < num_rows; i++) { +// auto temp = fbcsr_row_ptrs[i] - fbcsr_row_ptrs[i - 1]; +// coo_offset_val[i] = (temp > max_nnz_per_row) * (temp - +// max_nnz_per_row); +// } +// +// auto workspace = Array(exec, num_rows); +// auto workspace_val = workspace.get_data(); +// for (size_type i = 1; i < num_rows; i <<= 1) { +//#pragma omp parallel for +// for (size_type j = i; j < num_rows; j++) { +// workspace_val[j] = coo_offset_val[j] + coo_offset_val[j - i]; +// } +//#pragma omp parallel for +// for (size_type j = i; j < num_rows; j++) { +// coo_offset_val[j] = workspace_val[j]; +// } +// } +// +//#pragma omp parallel for +// for (IndexType row = 0; row < num_rows; row++) { +// size_type ell_idx = 0; +// size_type fbcsr_idx = fbcsr_row_ptrs[row]; +// size_type coo_idx = coo_offset_val[row]; +// while (fbcsr_idx < fbcsr_row_ptrs[row + 1]) { +// const auto val = fbcsr_vals[fbcsr_idx]; +// if (ell_idx < ell_lim) { +// result->ell_val_at(row, ell_idx) = val; +// result->ell_col_at(row, ell_idx) = +// source->get_const_col_idxs()[fbcsr_idx]; +// ell_idx++; +// } else { +// coo_val[coo_idx] = val; +// coo_col[coo_idx] = source->get_const_col_idxs()[fbcsr_idx]; +// coo_row[coo_idx] = row; +// coo_idx++; +// } +// fbcsr_idx++; +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL); + + +template +void row_permute_impl(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *row_permuted) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto perm = permutation_indices->get_const_data(); +// auto orig_row_ptrs = orig->get_const_row_ptrs(); +// auto orig_col_idxs = orig->get_const_col_idxs(); +// auto orig_vals = orig->get_const_values(); +// auto rp_row_ptrs = row_permuted->get_row_ptrs(); +// auto rp_col_idxs = row_permuted->get_col_idxs(); +// auto rp_vals = row_permuted->get_values(); +// size_type num_rows = orig->get_size()[0]; +// size_type num_nnz = orig->get_num_stored_elements(); +// +// size_type cur_ptr = 0; +// rp_row_ptrs[0] = cur_ptr; +// vector orig_num_nnz_per_row(num_rows, 0, exec); +//#pragma omp parallel for +// for (size_type row = 0; row < num_rows; ++row) { +// orig_num_nnz_per_row[row] = orig_row_ptrs[row + 1] - +// orig_row_ptrs[row]; +// } +// for (size_type row = 0; row < num_rows; ++row) { +// rp_row_ptrs[row + 1] = +// rp_row_ptrs[row] + orig_num_nnz_per_row[perm[row]]; +// } +// rp_row_ptrs[num_rows] = orig_row_ptrs[num_rows]; +//#pragma omp parallel for +// for (size_type row = 0; row < num_rows; ++row) { +// auto new_row = perm[row]; +// auto new_k = orig_row_ptrs[new_row]; +// for (size_type k = rp_row_ptrs[row]; +// k < size_type(rp_row_ptrs[row + 1]); ++k) { +// rp_col_idxs[k] = orig_col_idxs[new_k]; +// rp_vals[k] = orig_vals[new_k]; +// new_k++; +// } +// } +//} + + +template +void row_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *row_permuted) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// row_permute_impl(exec, permutation_indices, orig, row_permuted); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); + + +template +void inverse_row_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *row_permuted) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto perm = permutation_indices->get_const_data(); +// Array inv_perm(*permutation_indices); +// auto iperm = inv_perm.get_data(); +//#pragma omp parallel for +// for (size_type ind = 0; ind < inv_perm.get_num_elems(); ++ind) { +// iperm[perm[ind]] = ind; +// } +// +// row_permute_impl(exec, &inv_perm, orig, row_permuted); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL); + + +template +void column_permute_impl(const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *column_permuted) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto perm = permutation_indices->get_const_data(); +// auto orig_row_ptrs = orig->get_const_row_ptrs(); +// auto orig_col_idxs = orig->get_const_col_idxs(); +// auto orig_vals = orig->get_const_values(); +// auto cp_row_ptrs = column_permuted->get_row_ptrs(); +// auto cp_col_idxs = column_permuted->get_col_idxs(); +// auto cp_vals = column_permuted->get_values(); +// auto num_nnz = orig->get_num_stored_elements(); +// size_type num_rows = orig->get_size()[0]; +// size_type num_cols = orig->get_size()[1]; +// +//#pragma omp parallel for +// for (size_type row = 0; row < num_rows; ++row) { +// cp_row_ptrs[row] = orig_row_ptrs[row]; +// for (size_type k = orig_row_ptrs[row]; +// k < size_type(orig_row_ptrs[row + 1]); ++k) { +// cp_col_idxs[k] = perm[orig_col_idxs[k]]; +// cp_vals[k] = orig_vals[k]; +// } +// } +// cp_row_ptrs[num_rows] = orig_row_ptrs[num_rows]; +//} + + +template +void column_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *column_permuted) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto perm = permutation_indices->get_const_data(); +// Array inv_perm(*permutation_indices); +// auto iperm = inv_perm.get_data(); +//#pragma omp parallel for +// for (size_type ind = 0; ind < inv_perm.get_num_elems(); ++ind) { +// iperm[perm[ind]] = ind; +// } +// column_permute_impl(&inv_perm, orig, column_permuted); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL); + + +template +void inverse_column_permute( + std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// column_permute_impl(permutation_indices, orig, column_permuted); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL); + + +template +void calculate_nonzeros_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + Array *result) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto row_ptrs = source->get_const_row_ptrs(); +// auto row_nnz_val = result->get_data(); +// +//#pragma omp parallel for +// for (size_type i = 0; i < result->get_num_elems(); i++) { +// row_nnz_val[i] = row_ptrs[i + 1] - row_ptrs[i]; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void sort_by_column_index(std::shared_ptr exec, + matrix::Fbcsr *to_sort) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto values = to_sort->get_values(); +// auto row_ptrs = to_sort->get_row_ptrs(); +// auto col_idxs = to_sort->get_col_idxs(); +// const auto number_rows = to_sort->get_size()[0]; +//#pragma omp parallel for +// for (size_type i = 0; i < number_rows; ++i) { +// auto start_row_idx = row_ptrs[i]; +// auto row_nnz = row_ptrs[i + 1] - start_row_idx; +// auto helper = detail::IteratorFactory( +// col_idxs + start_row_idx, values + start_row_idx, row_nnz); +// std::sort(helper.begin(), helper.end()); +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::Fbcsr *to_check, + bool *is_sorted) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto row_ptrs = to_check->get_const_row_ptrs(); +// const auto col_idxs = to_check->get_const_col_idxs(); +// const auto size = to_check->get_size(); +// bool local_is_sorted = true; +//#pragma omp parallel for reduction(&& : local_is_sorted) +// for (size_type i = 0; i < size[0]; ++i) { +// // Skip comparison if any thread detects that it is not sorted +// if (local_is_sorted) { +// for (auto idx = row_ptrs[i] + 1; idx < row_ptrs[i + 1]; ++idx) { +// if (col_idxs[idx - 1] > col_idxs[idx]) { +// local_is_sorted = false; +// break; +// } +// } +// } +// } +// *is_sorted = local_is_sorted; +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto row_ptrs = orig->get_const_row_ptrs(); +// const auto col_idxs = orig->get_const_col_idxs(); +// const auto values = orig->get_const_values(); +// const auto diag_size = diag->get_size()[0]; +// auto diag_values = diag->get_values(); +// +//#pragma omp parallel for +// for (size_type row = 0; row < diag_size; ++row) { +// for (size_type idx = row_ptrs[row]; idx < row_ptrs[row + 1]; ++idx) { +// if (col_idxs[idx] == row) { +// diag_values[row] = values[idx]; +// break; +// } +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr +} // namespace omp +} // namespace kernels +} // namespace gko diff --git a/omp/test/matrix/CMakeLists.txt b/omp/test/matrix/CMakeLists.txt index 0c9fd00eb30..ff8ed2d0118 100644 --- a/omp/test/matrix/CMakeLists.txt +++ b/omp/test/matrix/CMakeLists.txt @@ -3,6 +3,7 @@ ginkgo_create_test(csr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) +ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(hybrid_kernels) ginkgo_create_test(sellp_kernels) ginkgo_create_test(sparsity_csr_kernels) diff --git a/omp/test/matrix/fbcsr_kernels.cpp b/omp/test/matrix/fbcsr_kernels.cpp new file mode 100644 index 00000000000..a1826bfa899 --- /dev/null +++ b/omp/test/matrix/fbcsr_kernels.cpp @@ -0,0 +1,662 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/fbcsr_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +class Fbcsr : public ::testing::Test { +protected: + using Arr = gko::Array; + using Mtx = gko::matrix::Fbcsr<>; + using Vec = gko::matrix::Dense<>; + using ComplexVec = gko::matrix::Dense>; + using ComplexMtx = gko::matrix::Fbcsr>; + + Fbcsr() : mtx_size(532, 231), rand_engine(42) {} + + void SetUp() + { + ref = gko::ReferenceExecutor::create(); + omp = gko::OmpExecutor::create(); + } + + void TearDown() + { + if (omp != nullptr) { + ASSERT_NO_THROW(omp->synchronize()); + } + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(min_nnz_row, num_cols), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + void set_up_apply_data(int num_vectors = 1) + { + mtx = Mtx::create(ref); + mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 1)); + complex_mtx = ComplexMtx::create(ref); + complex_mtx->copy_from( + gen_mtx(mtx_size[0], mtx_size[1], 1)); + square_mtx = Mtx::create(ref); + square_mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[0], 1)); + expected = gen_mtx(mtx_size[0], num_vectors, 1); + y = gen_mtx(mtx_size[1], num_vectors, 1); + alpha = gko::initialize({2.0}, ref); + beta = gko::initialize({-1.0}, ref); + dmtx = Mtx::create(omp); + dmtx->copy_from(mtx.get()); + complex_dmtx = ComplexMtx::create(omp); + complex_dmtx->copy_from(complex_mtx.get()); + square_dmtx = Mtx::create(omp); + square_dmtx->copy_from(square_mtx.get()); + dresult = Vec::create(omp); + dresult->copy_from(expected.get()); + dy = Vec::create(omp); + dy->copy_from(y.get()); + dalpha = Vec::create(omp); + dalpha->copy_from(alpha.get()); + dbeta = Vec::create(omp); + dbeta->copy_from(beta.get()); + + std::vector tmp(mtx->get_size()[0], 0); + auto rng = std::default_random_engine{}; + std::iota(tmp.begin(), tmp.end(), 0); + std::shuffle(tmp.begin(), tmp.end(), rng); + std::vector tmp2(mtx->get_size()[1], 0); + std::iota(tmp2.begin(), tmp2.end(), 0); + std::shuffle(tmp2.begin(), tmp2.end(), rng); + rpermute_idxs = + std::unique_ptr(new Arr{ref, tmp.begin(), tmp.end()}); + drpermute_idxs = + std::unique_ptr(new Arr{omp, tmp.begin(), tmp.end()}); + cpermute_idxs = + std::unique_ptr(new Arr{ref, tmp2.begin(), tmp2.end()}); + dcpermute_idxs = + std::unique_ptr(new Arr{omp, tmp2.begin(), tmp2.end()}); + } + + struct matrix_pair { + std::unique_ptr ref; + std::unique_ptr omp; + }; + + matrix_pair gen_unsorted_mtx() + { + constexpr int min_nnz_per_row = 2; // Must be at least 2 + auto local_mtx_ref = + gen_mtx(mtx_size[0], mtx_size[1], min_nnz_per_row); + for (size_t row = 0; row < mtx_size[0]; ++row) { + const auto row_ptrs = local_mtx_ref->get_const_row_ptrs(); + const auto start_row = row_ptrs[row]; + auto col_idx = local_mtx_ref->get_col_idxs() + start_row; + auto vals = local_mtx_ref->get_values() + start_row; + const auto nnz_in_this_row = row_ptrs[row + 1] - row_ptrs[row]; + auto swap_idx_dist = + std::uniform_int_distribution<>(0, nnz_in_this_row - 1); + // shuffle `nnz_in_this_row / 2` times + for (size_t perm = 0; perm < nnz_in_this_row; perm += 2) { + const auto idx1 = swap_idx_dist(rand_engine); + const auto idx2 = swap_idx_dist(rand_engine); + std::swap(col_idx[idx1], col_idx[idx2]); + std::swap(vals[idx1], vals[idx2]); + } + } + auto local_mtx_omp = Mtx::create(omp); + local_mtx_omp->copy_from(local_mtx_ref.get()); + + return {std::move(local_mtx_ref), std::move(local_mtx_omp)}; + } + + std::shared_ptr ref; + std::shared_ptr omp; + + const gko::dim<2> mtx_size; + std::ranlux48 rand_engine; + + std::unique_ptr mtx; + std::unique_ptr complex_mtx; + std::unique_ptr square_mtx; + std::unique_ptr expected; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr beta; + + std::unique_ptr dmtx; + std::unique_ptr complex_dmtx; + std::unique_ptr square_dmtx; + std::unique_ptr dresult; + std::unique_ptr dy; + std::unique_ptr dalpha; + std::unique_ptr dbeta; + std::unique_ptr rpermute_idxs; + std::unique_ptr drpermute_idxs; + std::unique_ptr cpermute_idxs; + std::unique_ptr dcpermute_idxs; +}; + + +TEST_F(Fbcsr, SimpleApplyIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(3); +// +// mtx->apply(y.get(), expected.get()); +// dmtx->apply(dy.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, AdvancedApplyToFbcsrMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// auto trans = mtx->transpose(); +// auto d_trans = dmtx->transpose(); +// +// mtx->apply(alpha.get(), trans.get(), beta.get(), square_mtx.get()); +// dmtx->apply(dalpha.get(), d_trans.get(), dbeta.get(), square_dmtx.get()); +// +// GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, 1e-14); +// GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); +// ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, SimpleApplyToFbcsrMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// auto trans = mtx->transpose(); +// auto d_trans = dmtx->transpose(); +// +// mtx->apply(trans.get(), square_mtx.get()); +// dmtx->apply(d_trans.get(), square_dmtx.get()); +// +// GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, 1e-14); +// GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); +// ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, AdvancedApplyToIdentityMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// auto a = gen_mtx(mtx_size[0], mtx_size[1], 0); +// auto b = gen_mtx(mtx_size[0], mtx_size[1], 0); +// auto da = Mtx::create(omp); +// auto db = Mtx::create(omp); +// da->copy_from(a.get()); +// db->copy_from(b.get()); +// auto id = gko::matrix::Identity::create(ref, +// mtx_size[1]); auto did = +// gko::matrix::Identity::create(omp, mtx_size[1]); +// +// a->apply(alpha.get(), id.get(), beta.get(), b.get()); +// da->apply(dalpha.get(), did.get(), dbeta.get(), db.get()); +// +// GKO_ASSERT_MTX_NEAR(b, db, 1e-14); +// GKO_ASSERT_MTX_EQ_SPARSITY(b, db); +// ASSERT_TRUE(db->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(3); +// +// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); +// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); +// +// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +//} + + +TEST_F(Fbcsr, TransposeIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// +// auto trans = mtx->transpose(); +// auto d_trans = dmtx->transpose(); +// +// GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), +// static_cast(trans.get()), 0.0); +// ASSERT_TRUE(static_cast(d_trans.get())->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, ConjugateTransposeIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// +// auto trans = complex_mtx->conj_transpose(); +// auto d_trans = complex_dmtx->conj_transpose(); +// +// GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), +// static_cast(trans.get()), 0.0); +// ASSERT_TRUE( +// static_cast(d_trans.get())->is_sorted_by_column_index()); +//} + + +TEST_F(Fbcsr, ConvertToCooIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// auto coo_mtx = gko::matrix::Coo<>::create(ref); +// auto dcoo_mtx = gko::matrix::Coo<>::create(omp); +// +// mtx->convert_to(coo_mtx.get()); +// dmtx->convert_to(dcoo_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToCooIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// auto coo_mtx = gko::matrix::Coo<>::create(ref); +// auto dcoo_mtx = gko::matrix::Coo<>::create(omp); +// +// mtx->move_to(coo_mtx.get()); +// dmtx->move_to(dcoo_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, ConvertToDenseIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// auto dense_mtx = gko::matrix::Dense<>::create(ref); +// auto ddense_mtx = gko::matrix::Dense<>::create(omp); +// +// mtx->convert_to(dense_mtx.get()); +// dmtx->convert_to(ddense_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(ddense_mtx.get(), dense_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToDenseIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// auto dense_mtx = gko::matrix::Dense<>::create(ref); +// auto ddense_mtx = gko::matrix::Dense<>::create(omp); +// +// mtx->move_to(dense_mtx.get()); +// dmtx->move_to(ddense_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(ddense_mtx.get(), dense_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, ConvertToSparsityFbcsrIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// auto sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(ref); +// auto d_sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(omp); +// +// mtx->convert_to(sparsity_mtx.get()); +// dmtx->convert_to(d_sparsity_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(d_sparsity_mtx.get(), sparsity_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToSparsityFbcsrIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// auto sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(ref); +// auto d_sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(omp); +// +// mtx->move_to(sparsity_mtx.get()); +// dmtx->move_to(d_sparsity_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(d_sparsity_mtx.get(), sparsity_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, CalculatesNonzerosPerRow) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// gko::Array row_nnz(ref, mtx->get_size()[0]); +// gko::Array drow_nnz(omp, dmtx->get_size()[0]); +// +// gko::kernels::reference::fbcsr::calculate_nonzeros_per_row(ref, mtx.get(), +// &row_nnz); +// gko::kernels::omp::fbcsr::calculate_nonzeros_per_row(omp, dmtx.get(), +// &drow_nnz); +// +// GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +//} + + +TEST_F(Fbcsr, ConvertToHybridIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Hybrid_type = gko::matrix::Hybrid<>; +// set_up_apply_data(); +// auto hybrid_mtx = Hybrid_type::create( +// ref, std::make_shared(2)); +// auto dhybrid_mtx = Hybrid_type::create( +// omp, std::make_shared(2)); +// +// mtx->convert_to(hybrid_mtx.get()); +// dmtx->convert_to(dhybrid_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, MoveToHybridIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Hybrid_type = gko::matrix::Hybrid<>; +// set_up_apply_data(); +// auto hybrid_mtx = Hybrid_type::create( +// ref, std::make_shared(2)); +// auto dhybrid_mtx = Hybrid_type::create( +// omp, std::make_shared(2)); +// +// mtx->move_to(hybrid_mtx.get()); +// dmtx->move_to(dhybrid_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), 1e-14); +//} + + +TEST_F(Fbcsr, IsRowPermutable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// auto r_permute = mtx->row_permute(rpermute_idxs.get()); +// auto dr_permute = dmtx->row_permute(drpermute_idxs.get()); +// +// GKO_ASSERT_MTX_NEAR(static_cast(r_permute.get()), +// static_cast(dr_permute.get()), 0); +//} + + +TEST_F(Fbcsr, IsColPermutable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// auto c_permute = mtx->column_permute(cpermute_idxs.get()); +// auto dc_permute = dmtx->column_permute(dcpermute_idxs.get()); +// +// GKO_ASSERT_MTX_NEAR(static_cast(c_permute.get()), +// static_cast(dc_permute.get()), 0); +//} + + +TEST_F(Fbcsr, IsInverseRowPermutable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// auto inverse_r_permute = mtx->inverse_row_permute(rpermute_idxs.get()); +// auto d_inverse_r_permute = +// dmtx->inverse_row_permute(drpermute_idxs.get()); +// +// GKO_ASSERT_MTX_NEAR(static_cast(inverse_r_permute.get()), +// static_cast(d_inverse_r_permute.get()), 0); +//} + + +TEST_F(Fbcsr, IsInverseColPermutable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// auto inverse_c_permute = mtx->inverse_column_permute(cpermute_idxs.get()); +// auto d_inverse_c_permute = +// dmtx->inverse_column_permute(dcpermute_idxs.get()); +// +// GKO_ASSERT_MTX_NEAR(static_cast(inverse_c_permute.get()), +// static_cast(d_inverse_c_permute.get()), 0); +//} + + +TEST_F(Fbcsr, RecognizeSortedMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// bool is_sorted_omp{}; +// bool is_sorted_ref{}; +// +// is_sorted_ref = mtx->is_sorted_by_column_index(); +// is_sorted_omp = dmtx->is_sorted_by_column_index(); +// +// ASSERT_EQ(is_sorted_ref, is_sorted_omp); +//} + + +TEST_F(Fbcsr, RecognizeUnsortedMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto uns_mtx = gen_unsorted_mtx(); +// bool is_sorted_omp{}; +// bool is_sorted_ref{}; +// +// is_sorted_ref = uns_mtx.ref->is_sorted_by_column_index(); +// is_sorted_omp = uns_mtx.omp->is_sorted_by_column_index(); +// +// ASSERT_EQ(is_sorted_ref, is_sorted_omp); +//} + + +TEST_F(Fbcsr, SortSortedMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// +// mtx->sort_by_column_index(); +// dmtx->sort_by_column_index(); +// +// // Values must be unchanged, therefore, tolerance is `0` +// GKO_ASSERT_MTX_NEAR(mtx, dmtx, 0); +//} + + +TEST_F(Fbcsr, SortUnsortedMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto uns_mtx = gen_unsorted_mtx(); +// +// uns_mtx.ref->sort_by_column_index(); +// uns_mtx.omp->sort_by_column_index(); +// +// // Values must be unchanged, therefore, tolerance is `0` +// GKO_ASSERT_MTX_NEAR(uns_mtx.ref, uns_mtx.omp, 0); +//} + + +TEST_F(Fbcsr, ExtractDiagonalIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// +// auto diag = mtx->extract_diagonal(); +// auto ddiag = dmtx->extract_diagonal(); +// +// GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +//} + + +TEST_F(Fbcsr, InplaceAbsoluteMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// +// mtx->compute_absolute_inplace(); +// dmtx->compute_absolute_inplace(); +// +// GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); +//} + + +TEST_F(Fbcsr, OutplaceAbsoluteMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// +// auto abs_mtx = mtx->compute_absolute(); +// auto dabs_mtx = dmtx->compute_absolute(); +// +// GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +//} + + +TEST_F(Fbcsr, InplaceAbsoluteComplexMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// +// complex_mtx->compute_absolute_inplace(); +// complex_dmtx->compute_absolute_inplace(); +// +// GKO_ASSERT_MTX_NEAR(complex_mtx, complex_dmtx, 1e-14); +//} + + +TEST_F(Fbcsr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// set_up_apply_data(); +// +// auto abs_mtx = complex_mtx->compute_absolute(); +// auto dabs_mtx = complex_dmtx->compute_absolute(); +// +// GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +//} + + +} // namespace diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index 573b555506a..e84453b8664 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -18,6 +18,7 @@ target_sources(ginkgo_reference matrix/dense_kernels.cpp matrix/diagonal_kernels.cpp matrix/ell_kernels.cpp + matrix/fbcsr_kernels.cpp matrix/hybrid_kernels.cpp matrix/sellp_kernels.cpp matrix/sparsity_csr_kernels.cpp diff --git a/reference/components/fbcsr_spgeam.hpp b/reference/components/fbcsr_spgeam.hpp new file mode 100644 index 00000000000..e4a06532ed3 --- /dev/null +++ b/reference/components/fbcsr_spgeam.hpp @@ -0,0 +1,31 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp new file mode 100644 index 00000000000..af0f19a8947 --- /dev/null +++ b/reference/matrix/fbcsr_kernels.cpp @@ -0,0 +1,968 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_kernels.hpp" + + +#include +#include +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/base/allocator.hpp" +#include "core/base/iterator_factory.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/fbcsr_builder.hpp" +#include "reference/components/fbcsr_spgeam.hpp" +#include "reference/components/format_conversion.hpp" + + +namespace gko { +namespace kernels { +namespace reference { +/** + * @brief The Compressed sparse row matrix format namespace. + * @ref Fbcsr + * @ingroup fbcsr + */ +namespace fbcsr { + + +template +void spmv(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto row_ptrs = a->get_const_row_ptrs(); +// auto col_idxs = a->get_const_col_idxs(); +// auto vals = a->get_const_values(); +// +// for (size_type row = 0; row < a->get_size()[0]; ++row) { +// for (size_type j = 0; j < c->get_size()[1]; ++j) { +// c->at(row, j) = zero(); +// } +// for (size_type k = row_ptrs[row]; +// k < static_cast(row_ptrs[row + 1]); ++k) { +// auto val = vals[k]; +// auto col = col_idxs[k]; +// for (size_type j = 0; j < c->get_size()[1]; ++j) { +// c->at(row, j) += val * b->at(col, j); +// } +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto row_ptrs = a->get_const_row_ptrs(); +// auto col_idxs = a->get_const_col_idxs(); +// auto vals = a->get_const_values(); +// auto valpha = alpha->at(0, 0); +// auto vbeta = beta->at(0, 0); +// +// for (size_type row = 0; row < a->get_size()[0]; ++row) { +// for (size_type j = 0; j < c->get_size()[1]; ++j) { +// c->at(row, j) *= vbeta; +// } +// for (size_type k = row_ptrs[row]; +// k < static_cast(row_ptrs[row + 1]); ++k) { +// auto val = vals[k]; +// auto col = col_idxs[k]; +// for (size_type j = 0; j < c->get_size()[1]; ++j) { +// c->at(row, j) += valpha * val * b->at(col, j); +// } +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + + +template +void spgemm_insert_row(unordered_set &cols, + const matrix::Fbcsr *c, + size_type row) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto row_ptrs = c->get_const_row_ptrs(); +// auto col_idxs = c->get_const_col_idxs(); +// cols.insert(col_idxs + row_ptrs[row], col_idxs + row_ptrs[row + 1]); +//} + + +template +void spgemm_insert_row2(unordered_set &cols, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + size_type row) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto a_row_ptrs = a->get_const_row_ptrs(); +// auto a_col_idxs = a->get_const_col_idxs(); +// auto b_row_ptrs = b->get_const_row_ptrs(); +// auto b_col_idxs = b->get_const_col_idxs(); +// for (size_type a_nz = a_row_ptrs[row]; +// a_nz < size_type(a_row_ptrs[row + 1]); ++a_nz) { +// auto a_col = a_col_idxs[a_nz]; +// auto b_row = a_col; +// cols.insert(b_col_idxs + b_row_ptrs[b_row], +// b_col_idxs + b_row_ptrs[b_row + 1]); +// } +//} + + +template +void spgemm_accumulate_row(map &cols, + const matrix::Fbcsr *c, + ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto row_ptrs = c->get_const_row_ptrs(); +// auto col_idxs = c->get_const_col_idxs(); +// auto vals = c->get_const_values(); +// for (size_type c_nz = row_ptrs[row]; c_nz < size_type(row_ptrs[row + 1]); +// ++c_nz) { +// auto c_col = col_idxs[c_nz]; +// auto c_val = vals[c_nz]; +// cols[c_col] += scale * c_val; +// } +//} + + +template +void spgemm_accumulate_row2(map &cols, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto a_row_ptrs = a->get_const_row_ptrs(); +// auto a_col_idxs = a->get_const_col_idxs(); +// auto a_vals = a->get_const_values(); +// auto b_row_ptrs = b->get_const_row_ptrs(); +// auto b_col_idxs = b->get_const_col_idxs(); +// auto b_vals = b->get_const_values(); +// for (size_type a_nz = a_row_ptrs[row]; +// a_nz < size_type(a_row_ptrs[row + 1]); ++a_nz) { +// auto a_col = a_col_idxs[a_nz]; +// auto a_val = a_vals[a_nz]; +// auto b_row = a_col; +// for (size_type b_nz = b_row_ptrs[b_row]; +// b_nz < size_type(b_row_ptrs[b_row + 1]); ++b_nz) { +// auto b_col = b_col_idxs[b_nz]; +// auto b_val = b_vals[b_nz]; +// cols[b_col] += scale * a_val * b_val; +// } +// } +//} + + +template +void spgemm(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = a->get_size()[0]; +// +// // first sweep: count nnz for each row +// auto c_row_ptrs = c->get_row_ptrs(); +// +// unordered_set local_col_idxs(exec); +// for (size_type a_row = 0; a_row < num_rows; ++a_row) { +// local_col_idxs.clear(); +// spgemm_insert_row2(local_col_idxs, a, b, a_row); +// c_row_ptrs[a_row] = local_col_idxs.size(); +// } +// +// // build row pointers +// components::prefix_sum(exec, c_row_ptrs, num_rows + 1); +// +// // second sweep: accumulate non-zeros +// auto new_nnz = c_row_ptrs[num_rows]; +// matrix::FbcsrBuilder c_builder{c}; +// auto &c_col_idxs_array = c_builder.get_col_idx_array(); +// auto &c_vals_array = c_builder.get_value_array(); +// c_col_idxs_array.resize_and_reset(new_nnz); +// c_vals_array.resize_and_reset(new_nnz); +// auto c_col_idxs = c_col_idxs_array.get_data(); +// auto c_vals = c_vals_array.get_data(); +// +// map local_row_nzs(exec); +// for (size_type a_row = 0; a_row < num_rows; ++a_row) { +// local_row_nzs.clear(); +// spgemm_accumulate_row2(local_row_nzs, a, b, one(), a_row); +// // store result +// auto c_nz = c_row_ptrs[a_row]; +// for (auto pair : local_row_nzs) { +// c_col_idxs[c_nz] = pair.first; +// c_vals[c_nz] = pair.second; +// ++c_nz; +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEMM_KERNEL); + + +template +void advanced_spgemm(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + const matrix::Dense *beta, + const matrix::Fbcsr *d, + matrix::Fbcsr *c) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = a->get_size()[0]; +// auto valpha = alpha->at(0, 0); +// auto vbeta = beta->at(0, 0); +// +// // first sweep: count nnz for each row +// auto c_row_ptrs = c->get_row_ptrs(); +// +// unordered_set local_col_idxs(exec); +// for (size_type a_row = 0; a_row < num_rows; ++a_row) { +// local_col_idxs.clear(); +// spgemm_insert_row(local_col_idxs, d, a_row); +// spgemm_insert_row2(local_col_idxs, a, b, a_row); +// c_row_ptrs[a_row] = local_col_idxs.size(); +// } +// +// // build row pointers +// components::prefix_sum(exec, c_row_ptrs, num_rows + 1); +// +// // second sweep: accumulate non-zeros +// auto new_nnz = c_row_ptrs[num_rows]; +// matrix::FbcsrBuilder c_builder{c}; +// auto &c_col_idxs_array = c_builder.get_col_idx_array(); +// auto &c_vals_array = c_builder.get_value_array(); +// c_col_idxs_array.resize_and_reset(new_nnz); +// c_vals_array.resize_and_reset(new_nnz); +// auto c_col_idxs = c_col_idxs_array.get_data(); +// auto c_vals = c_vals_array.get_data(); +// +// map local_row_nzs(exec); +// for (size_type a_row = 0; a_row < num_rows; ++a_row) { +// local_row_nzs.clear(); +// spgemm_accumulate_row(local_row_nzs, d, vbeta, a_row); +// spgemm_accumulate_row2(local_row_nzs, a, b, valpha, a_row); +// // store result +// auto c_nz = c_row_ptrs[a_row]; +// for (auto pair : local_row_nzs) { +// c_col_idxs[c_nz] = pair.first; +// c_vals[c_nz] = pair.second; +// ++c_nz; +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL); + + +template +void spgeam(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *beta, + const matrix::Fbcsr *b, + matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = a->get_size()[0]; +// auto valpha = alpha->at(0, 0); +// auto vbeta = beta->at(0, 0); +// +// // first sweep: count nnz for each row +// auto c_row_ptrs = c->get_row_ptrs(); +// +// abstract_spgeam( +// a, b, [](IndexType) { return IndexType{}; }, +// [](IndexType, IndexType, ValueType, ValueType, IndexType &nnz) { +// ++nnz; +// }, +// [&](IndexType row, IndexType nnz) { c_row_ptrs[row] = nnz; }); +// +// // build row pointers +// components::prefix_sum(exec, c_row_ptrs, num_rows + 1); +// +// // second sweep: accumulate non-zeros +// auto new_nnz = c_row_ptrs[num_rows]; +// matrix::FbcsrBuilder c_builder{c}; +// auto &c_col_idxs_array = c_builder.get_col_idx_array(); +// auto &c_vals_array = c_builder.get_value_array(); +// c_col_idxs_array.resize_and_reset(new_nnz); +// c_vals_array.resize_and_reset(new_nnz); +// auto c_col_idxs = c_col_idxs_array.get_data(); +// auto c_vals = c_vals_array.get_data(); +// +// abstract_spgeam( +// a, b, [&](IndexType row) { return c_row_ptrs[row]; }, +// [&](IndexType, IndexType col, ValueType a_val, ValueType b_val, +// IndexType &nz) { +// c_vals[nz] = valpha * a_val + vbeta * b_val; +// c_col_idxs[nz] = col; +// ++nz; +// }, +// [](IndexType, IndexType) {}); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEAM_KERNEL); + + +template +void convert_row_ptrs_to_idxs(std::shared_ptr exec, + const IndexType *ptrs, size_type num_rows, + IndexType *idxs) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// convert_ptrs_to_idxs(ptrs, num_rows, idxs); +//} + + +template +void convert_to_coo(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Coo *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = result->get_size()[0]; +// +// auto row_idxs = result->get_row_idxs(); +// const auto source_row_ptrs = source->get_const_row_ptrs(); +// +// convert_row_ptrs_to_idxs(exec, source_row_ptrs, num_rows, row_idxs); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL); + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Dense *result) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = source->get_size()[0]; +// auto num_cols = source->get_size()[1]; +// auto row_ptrs = source->get_const_row_ptrs(); +// auto col_idxs = source->get_const_col_idxs(); +// auto vals = source->get_const_values(); +// +// for (size_type row = 0; row < num_rows; ++row) { +// for (size_type col = 0; col < num_cols; ++col) { +// result->at(row, col) = zero(); +// } +// for (size_type i = row_ptrs[row]; +// i < static_cast(row_ptrs[row + 1]); ++i) { +// result->at(row, col_idxs[i]) = vals[i]; +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_sellp(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Sellp *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = result->get_size()[0]; +// auto num_cols = result->get_size()[1]; +// auto vals = result->get_values(); +// auto col_idxs = result->get_col_idxs(); +// auto slice_lengths = result->get_slice_lengths(); +// auto slice_sets = result->get_slice_sets(); +// auto slice_size = (result->get_slice_size() == 0) +// ? matrix::default_slice_size +// : result->get_slice_size(); +// auto stride_factor = (result->get_stride_factor() == 0) +// ? matrix::default_stride_factor +// : result->get_stride_factor(); +// +// const auto source_row_ptrs = source->get_const_row_ptrs(); +// const auto source_col_idxs = source->get_const_col_idxs(); +// const auto source_values = source->get_const_values(); +// +// int slice_num = ceildiv(num_rows, slice_size); +// slice_sets[0] = 0; +// for (size_type slice = 0; slice < slice_num; slice++) { +// if (slice > 0) { +// slice_sets[slice] = +// slice_sets[slice - 1] + slice_lengths[slice - 1]; +// } +// slice_lengths[slice] = 0; +// for (size_type row = 0; row < slice_size; row++) { +// size_type global_row = slice * slice_size + row; +// if (global_row >= num_rows) { +// break; +// } +// slice_lengths[slice] = +// (slice_lengths[slice] > +// source_row_ptrs[global_row + 1] - +// source_row_ptrs[global_row]) +// ? slice_lengths[slice] +// : source_row_ptrs[global_row + 1] - +// source_row_ptrs[global_row]; +// } +// slice_lengths[slice] = +// stride_factor * ceildiv(slice_lengths[slice], stride_factor); +// for (size_type row = 0; row < slice_size; row++) { +// size_type global_row = slice * slice_size + row; +// if (global_row >= num_rows) { +// break; +// } +// size_type sellp_ind = slice_sets[slice] * slice_size + row; +// for (size_type fbcsr_ind = source_row_ptrs[global_row]; +// fbcsr_ind < source_row_ptrs[global_row + 1]; fbcsr_ind++) { +// vals[sellp_ind] = source_values[fbcsr_ind]; +// col_idxs[sellp_ind] = source_col_idxs[fbcsr_ind]; +// sellp_ind += slice_size; +// } +// for (size_type i = sellp_ind; +// i < +// (slice_sets[slice] + slice_lengths[slice]) * slice_size + +// row; i += slice_size) { +// col_idxs[i] = 0; +// vals[i] = zero(); +// } +// } +// } +// if (slice_num > 0) { +// slice_sets[slice_num] = +// slice_sets[slice_num - 1] + slice_lengths[slice_num - 1]; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL); + + +template +void calculate_total_cols(std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result, size_type stride_factor, + size_type slice_size) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// size_type total_cols = 0; +// const auto num_rows = source->get_size()[0]; +// const auto slice_num = ceildiv(num_rows, slice_size); +// +// const auto row_ptrs = source->get_const_row_ptrs(); +// +// for (size_type slice = 0; slice < slice_num; slice++) { +// IndexType max_nnz_per_row_in_this_slice = 0; +// for (size_type row = 0; +// row < slice_size && row + slice * slice_size < num_rows; row++) { +// size_type global_row = slice * slice_size + row; +// max_nnz_per_row_in_this_slice = +// max(row_ptrs[global_row + 1] - row_ptrs[global_row], +// max_nnz_per_row_in_this_slice); +// } +// total_cols += ceildiv(max_nnz_per_row_in_this_slice, stride_factor) * +// stride_factor; +// } +// +// *result = total_cols; +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL); + + +template +void convert_to_ell(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Ell *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto num_rows = source->get_size()[0]; +// const auto num_cols = source->get_size()[1]; +// const auto vals = source->get_const_values(); +// const auto col_idxs = source->get_const_col_idxs(); +// const auto row_ptrs = source->get_const_row_ptrs(); +// +// const auto num_stored_elements_per_row = +// result->get_num_stored_elements_per_row(); +// +// for (size_type row = 0; row < num_rows; row++) { +// for (size_type i = 0; i < num_stored_elements_per_row; i++) { +// result->val_at(row, i) = zero(); +// result->col_at(row, i) = 0; +// } +// for (size_type col_idx = 0; col_idx < row_ptrs[row + 1] - +// row_ptrs[row]; +// col_idx++) { +// result->val_at(row, col_idx) = vals[row_ptrs[row] + col_idx]; +// result->col_at(row, col_idx) = col_idxs[row_ptrs[row] + col_idx]; +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL); + + +template +inline void convert_fbcsr_to_csc(size_type num_rows, const IndexType *row_ptrs, + const IndexType *col_idxs, + const ValueType *fbcsr_vals, + IndexType *row_idxs, IndexType *col_ptrs, + ValueType *csc_vals, + UnaryOperator op) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// for (size_type row = 0; row < num_rows; ++row) { +// for (auto i = row_ptrs[row]; i < row_ptrs[row + 1]; ++i) { +// const auto dest_idx = col_ptrs[col_idxs[i]]++; +// row_idxs[dest_idx] = row; +// csc_vals[dest_idx] = op(fbcsr_vals[i]); +// } +// } +//} + + +template +void transpose_and_transform(std::shared_ptr exec, + matrix::Fbcsr *trans, + const matrix::Fbcsr *orig, + UnaryOperator op) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto trans_row_ptrs = trans->get_row_ptrs(); +// auto orig_row_ptrs = orig->get_const_row_ptrs(); +// auto trans_col_idxs = trans->get_col_idxs(); +// auto orig_col_idxs = orig->get_const_col_idxs(); +// auto trans_vals = trans->get_values(); +// auto orig_vals = orig->get_const_values(); +// +// auto orig_num_cols = orig->get_size()[1]; +// auto orig_num_rows = orig->get_size()[0]; +// auto orig_nnz = orig_row_ptrs[orig_num_rows]; +// +// trans_row_ptrs[0] = 0; +// convert_idxs_to_ptrs(orig_col_idxs, orig_nnz, trans_row_ptrs + 1, +// orig_num_cols); +// +// convert_fbcsr_to_csc(orig_num_rows, orig_row_ptrs, orig_col_idxs, +// orig_vals, +// trans_col_idxs, trans_row_ptrs + 1, trans_vals, op); +//} + + +template +void transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// transpose_and_transform(exec, trans, orig, +// [](const ValueType x) { return x; }); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// transpose_and_transform(exec, trans, orig, +// [](const ValueType x) { return conj(x); }); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + + +template +void calculate_max_nnz_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto num_rows = source->get_size()[0]; +// const auto row_ptrs = source->get_const_row_ptrs(); +// IndexType max_nnz = 0; +// +// for (size_type i = 0; i < num_rows; i++) { +// max_nnz = std::max(row_ptrs[i + 1] - row_ptrs[i], max_nnz); +// } +// +// *result = max_nnz; +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void convert_to_hybrid(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Hybrid *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto num_rows = result->get_size()[0]; +// auto num_cols = result->get_size()[1]; +// auto strategy = result->get_strategy(); +// auto ell_lim = strategy->get_ell_num_stored_elements_per_row(); +// auto coo_lim = strategy->get_coo_nnz(); +// auto coo_val = result->get_coo_values(); +// auto coo_col = result->get_coo_col_idxs(); +// auto coo_row = result->get_coo_row_idxs(); +// +// // Initial Hybrid Matrix +// for (size_type i = 0; i < result->get_ell_num_stored_elements_per_row(); +// i++) { +// for (size_type j = 0; j < result->get_ell_stride(); j++) { +// result->ell_val_at(j, i) = zero(); +// result->ell_col_at(j, i) = 0; +// } +// } +// for (size_type i = 0; i < result->get_coo_num_stored_elements(); i++) { +// coo_val[i] = zero(); +// coo_col[i] = 0; +// coo_row[i] = 0; +// } +// +// const auto fbcsr_row_ptrs = source->get_const_row_ptrs(); +// const auto fbcsr_vals = source->get_const_values(); +// size_type fbcsr_idx = 0; +// size_type coo_idx = 0; +// for (IndexType row = 0; row < num_rows; row++) { +// size_type ell_idx = 0; +// while (fbcsr_idx < fbcsr_row_ptrs[row + 1]) { +// const auto val = fbcsr_vals[fbcsr_idx]; +// if (ell_idx < ell_lim) { +// result->ell_val_at(row, ell_idx) = val; +// result->ell_col_at(row, ell_idx) = +// source->get_const_col_idxs()[fbcsr_idx]; +// ell_idx++; +// } else { +// coo_val[coo_idx] = val; +// coo_col[coo_idx] = source->get_const_col_idxs()[fbcsr_idx]; +// coo_row[coo_idx] = row; +// coo_idx++; +// } +// fbcsr_idx++; +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL); + + +template +void row_permute_impl(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *row_permuted) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto perm = permutation_indices->get_const_data(); +// auto orig_row_ptrs = orig->get_const_row_ptrs(); +// auto orig_col_idxs = orig->get_const_col_idxs(); +// auto orig_vals = orig->get_const_values(); +// auto rp_row_ptrs = row_permuted->get_row_ptrs(); +// auto rp_col_idxs = row_permuted->get_col_idxs(); +// auto rp_vals = row_permuted->get_values(); +// size_type num_rows = orig->get_size()[0]; +// size_type num_nnz = orig->get_num_stored_elements(); +// +// size_type cur_ptr = 0; +// rp_row_ptrs[0] = cur_ptr; +// vector orig_num_nnz_per_row(num_rows, 0, exec); +// for (size_type row = 0; row < num_rows; ++row) { +// orig_num_nnz_per_row[row] = orig_row_ptrs[row + 1] - +// orig_row_ptrs[row]; +// } +// for (size_type row = 0; row < num_rows; ++row) { +// rp_row_ptrs[row + 1] = +// rp_row_ptrs[row] + orig_num_nnz_per_row[perm[row]]; +// } +// rp_row_ptrs[num_rows] = orig_row_ptrs[num_rows]; +// for (size_type row = 0; row < num_rows; ++row) { +// auto new_row = perm[row]; +// auto new_k = orig_row_ptrs[new_row]; +// for (size_type k = rp_row_ptrs[row]; +// k < size_type(rp_row_ptrs[row + 1]); ++k) { +// rp_col_idxs[k] = orig_col_idxs[new_k]; +// rp_vals[k] = orig_vals[new_k]; +// new_k++; +// } +// } +//} + + +template +void row_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *row_permuted) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// row_permute_impl(exec, permutation_indices, orig, row_permuted); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); + + +template +void inverse_row_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *row_permuted) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto perm = permutation_indices->get_const_data(); +// Array inv_perm(*permutation_indices); +// auto iperm = inv_perm.get_data(); +// for (size_type ind = 0; ind < inv_perm.get_num_elems(); ++ind) { +// iperm[perm[ind]] = ind; +// } +// +// row_permute_impl(exec, &inv_perm, orig, row_permuted); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL); + + +template +void column_permute_impl(const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *column_permuted) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto perm = permutation_indices->get_const_data(); +// auto orig_row_ptrs = orig->get_const_row_ptrs(); +// auto orig_col_idxs = orig->get_const_col_idxs(); +// auto orig_vals = orig->get_const_values(); +// auto cp_row_ptrs = column_permuted->get_row_ptrs(); +// auto cp_col_idxs = column_permuted->get_col_idxs(); +// auto cp_vals = column_permuted->get_values(); +// auto num_nnz = orig->get_num_stored_elements(); +// size_type num_rows = orig->get_size()[0]; +// size_type num_cols = orig->get_size()[1]; +// +// for (size_type row = 0; row < num_rows; ++row) { +// cp_row_ptrs[row] = orig_row_ptrs[row]; +// for (size_type k = orig_row_ptrs[row]; +// k < size_type(orig_row_ptrs[row + 1]); ++k) { +// cp_col_idxs[k] = perm[orig_col_idxs[k]]; +// cp_vals[k] = orig_vals[k]; +// } +// } +// cp_row_ptrs[num_rows] = orig_row_ptrs[num_rows]; +//} + + +template +void column_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *column_permuted) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto perm = permutation_indices->get_const_data(); +// Array inv_perm(*permutation_indices); +// auto iperm = inv_perm.get_data(); +// for (size_type ind = 0; ind < inv_perm.get_num_elems(); ++ind) { +// iperm[perm[ind]] = ind; +// } +// column_permute_impl(&inv_perm, orig, column_permuted); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL); + + +template +void inverse_column_permute( + std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Fbcsr *orig, + matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// column_permute_impl(permutation_indices, orig, column_permuted); +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL); + + +template +void calculate_nonzeros_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + Array *result) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto row_ptrs = source->get_const_row_ptrs(); +// auto row_nnz_val = result->get_data(); +// for (size_type i = 0; i < result->get_num_elems(); i++) { +// row_nnz_val[i] = row_ptrs[i + 1] - row_ptrs[i]; +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void sort_by_column_index(std::shared_ptr exec, + matrix::Fbcsr *to_sort) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto values = to_sort->get_values(); +// auto row_ptrs = to_sort->get_row_ptrs(); +// auto col_idxs = to_sort->get_col_idxs(); +// const auto number_rows = to_sort->get_size()[0]; +// for (size_type i = 0; i < number_rows; ++i) { +// auto start_row_idx = row_ptrs[i]; +// auto row_nnz = row_ptrs[i + 1] - start_row_idx; +// auto helper = detail::IteratorFactory( +// col_idxs + start_row_idx, values + start_row_idx, row_nnz); +// std::sort(helper.begin(), helper.end()); +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::Fbcsr *to_check, + bool *is_sorted) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto row_ptrs = to_check->get_const_row_ptrs(); +// const auto col_idxs = to_check->get_const_col_idxs(); +// const auto size = to_check->get_size(); +// for (size_type i = 0; i < size[0]; ++i) { +// for (auto idx = row_ptrs[i] + 1; idx < row_ptrs[i + 1]; ++idx) { +// if (col_idxs[idx - 1] > col_idxs[idx]) { +// *is_sorted = false; +// return; +// } +// } +// } +// *is_sorted = true; +// return; +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// const auto row_ptrs = orig->get_const_row_ptrs(); +// const auto col_idxs = orig->get_const_col_idxs(); +// const auto values = orig->get_const_values(); +// const auto diag_size = diag->get_size()[0]; +// auto diag_values = diag->get_values(); +// +// for (size_type row = 0; row < diag_size; ++row) { +// for (size_type idx = row_ptrs[row]; idx < row_ptrs[row + 1]; ++idx) { +// if (col_idxs[idx] == row) { +// diag_values[row] = values[idx]; +// break; +// } +// } +// } +//} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr +} // namespace reference +} // namespace kernels +} // namespace gko diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt index 81298ba86cd..c826a9eaca7 100644 --- a/reference/test/matrix/CMakeLists.txt +++ b/reference/test/matrix/CMakeLists.txt @@ -3,6 +3,7 @@ ginkgo_create_test(csr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) +ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(hybrid_kernels) ginkgo_create_test(identity) ginkgo_create_test(permutation) diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp new file mode 100644 index 00000000000..b5fd2162da5 --- /dev/null +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -0,0 +1,1550 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/fbcsr_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +namespace matstr = gko::matrix::matrix_strategy; + + +constexpr int mat_bs = 1; + + +template +class Fbcsr : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using Coo = gko::matrix::Coo; + using Mtx = gko::matrix::Fbcsr; + using Sellp = gko::matrix::Sellp; + using SparsityCsr = gko::matrix::SparsityCsr; + using Ell = gko::matrix::Ell; + using Hybrid = gko::matrix::Hybrid; + using Vec = gko::matrix::Dense; + + Fbcsr() + : exec(gko::ReferenceExecutor::create()), + mtx(Mtx::create(exec, gko::dim<2>{2, 3}, 4, mat_bs, + std::make_shared>(2))), + mtx2(Mtx::create(exec, gko::dim<2>{2, 3}, 5, mat_bs, + std::make_shared>())), + mtx3_sorted(Mtx::create(exec, gko::dim<2>(3, 3), 7, mat_bs, + std::make_shared>())), + mtx3_unsorted(Mtx::create(exec, gko::dim<2>(3, 3), 7, mat_bs, + std::make_shared>())) + { + this->create_mtx(mtx.get()); + this->create_mtx2(mtx2.get()); + this->create_mtx3(mtx3_sorted.get(), mtx3_unsorted.get()); + } + + void create_mtx(Mtx *m) + { + value_type *v = m->get_values(); + index_type *c = m->get_col_idxs(); + index_type *r = m->get_row_ptrs(); + auto *s = m->get_srow(); + /* + * 1 3 2 + * 0 5 0 + */ + r[0] = 0; + r[1] = 3; + r[2] = 4; + c[0] = 0; + c[1] = 1; + c[2] = 2; + c[3] = 1; + v[0] = 1.0; + v[1] = 3.0; + v[2] = 2.0; + v[3] = 5.0; + s[0] = 0; + } + + void create_mtx2(Mtx *m) + { + value_type *v = m->get_values(); + index_type *c = m->get_col_idxs(); + index_type *r = m->get_row_ptrs(); + // It keeps an explict zero + /* + * 1 3 2 + * {0} 5 0 + */ + r[0] = 0; + r[1] = 3; + r[2] = 5; + c[0] = 0; + c[1] = 1; + c[2] = 2; + c[3] = 0; + c[4] = 1; + v[0] = 1.0; + v[1] = 3.0; + v[2] = 2.0; + v[3] = 0.0; + v[4] = 5.0; + } + + void create_mtx3(Mtx *sorted, Mtx *unsorted) + { + auto vals_s = sorted->get_values(); + auto cols_s = sorted->get_col_idxs(); + auto rows_s = sorted->get_row_ptrs(); + auto vals_u = unsorted->get_values(); + auto cols_u = unsorted->get_col_idxs(); + auto rows_u = unsorted->get_row_ptrs(); + /* For both versions (sorted and unsorted), this matrix is stored: + * 0 2 1 + * 3 1 8 + * 2 0 3 + * The unsorted matrix will have the (value, column) pair per row not + * sorted, which we still consider a valid FBCSR format. + */ + rows_s[0] = 0; + rows_s[1] = 2; + rows_s[2] = 5; + rows_s[3] = 7; + rows_u[0] = 0; + rows_u[1] = 2; + rows_u[2] = 5; + rows_u[3] = 7; + + vals_s[0] = 2.; + vals_s[1] = 1.; + vals_s[2] = 3.; + vals_s[3] = 1.; + vals_s[4] = 8.; + vals_s[5] = 2.; + vals_s[6] = 3.; + // Each row is stored rotated once to the left + vals_u[0] = 1.; + vals_u[1] = 2.; + vals_u[2] = 1.; + vals_u[3] = 8.; + vals_u[4] = 3.; + vals_u[5] = 3.; + vals_u[6] = 2.; + + cols_s[0] = 1; + cols_s[1] = 2; + cols_s[2] = 0; + cols_s[3] = 1; + cols_s[4] = 2; + cols_s[5] = 0; + cols_s[6] = 2; + // The same applies for the columns + cols_u[0] = 2; + cols_u[1] = 1; + cols_u[2] = 1; + cols_u[3] = 2; + cols_u[4] = 0; + cols_u[5] = 2; + cols_u[6] = 0; + } + + void assert_equal_to_mtx(const Coo *m) + { + auto v = m->get_const_values(); + auto c = m->get_const_col_idxs(); + auto r = m->get_const_row_idxs(); + + ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_num_stored_elements(), 4); + EXPECT_EQ(r[0], 0); + EXPECT_EQ(r[1], 0); + EXPECT_EQ(r[2], 0); + EXPECT_EQ(r[3], 1); + EXPECT_EQ(c[0], 0); + EXPECT_EQ(c[1], 1); + EXPECT_EQ(c[2], 2); + EXPECT_EQ(c[3], 1); + EXPECT_EQ(v[0], value_type{1.0}); + EXPECT_EQ(v[1], value_type{3.0}); + EXPECT_EQ(v[2], value_type{2.0}); + EXPECT_EQ(v[3], value_type{5.0}); + } + + void assert_equal_to_mtx(const Sellp *m) + { + auto v = m->get_const_values(); + auto c = m->get_const_col_idxs(); + auto slice_sets = m->get_const_slice_sets(); + auto slice_lengths = m->get_const_slice_lengths(); + auto stride_factor = m->get_stride_factor(); + auto slice_size = m->get_slice_size(); + + ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(stride_factor, 1); + ASSERT_EQ(slice_size, 64); + EXPECT_EQ(slice_sets[0], 0); + EXPECT_EQ(slice_lengths[0], 3); + EXPECT_EQ(c[0], 0); + EXPECT_EQ(c[1], 1); + EXPECT_EQ(c[64], 1); + EXPECT_EQ(c[65], 0); + EXPECT_EQ(c[128], 2); + EXPECT_EQ(c[129], 0); + EXPECT_EQ(v[0], value_type{1.0}); + EXPECT_EQ(v[1], value_type{5.0}); + EXPECT_EQ(v[64], value_type{3.0}); + EXPECT_EQ(v[65], value_type{0.0}); + EXPECT_EQ(v[128], value_type{2.0}); + EXPECT_EQ(v[129], value_type{0.0}); + } + + void assert_equal_to_mtx(const SparsityCsr *m) + { + auto *c = m->get_const_col_idxs(); + auto *r = m->get_const_row_ptrs(); + + ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_num_nonzeros(), 4); + EXPECT_EQ(r[0], 0); + EXPECT_EQ(r[1], 3); + EXPECT_EQ(r[2], 4); + EXPECT_EQ(c[0], 0); + EXPECT_EQ(c[1], 1); + EXPECT_EQ(c[2], 2); + EXPECT_EQ(c[3], 1); + } + + void assert_equal_to_mtx(const Ell *m) + { + auto v = m->get_const_values(); + auto c = m->get_const_col_idxs(); + + ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_num_stored_elements(), 6); + EXPECT_EQ(c[0], 0); + EXPECT_EQ(c[1], 1); + EXPECT_EQ(c[2], 1); + EXPECT_EQ(c[3], 0); + EXPECT_EQ(c[4], 2); + EXPECT_EQ(c[5], 0); + EXPECT_EQ(v[0], value_type{1.0}); + EXPECT_EQ(v[1], value_type{5.0}); + EXPECT_EQ(v[2], value_type{3.0}); + EXPECT_EQ(v[3], value_type{0.0}); + EXPECT_EQ(v[4], value_type{2.0}); + EXPECT_EQ(v[5], value_type{0.0}); + } + + void assert_equal_to_mtx(const Hybrid *m) + { + auto v = m->get_const_coo_values(); + auto c = m->get_const_coo_col_idxs(); + auto r = m->get_const_coo_row_idxs(); + auto n = m->get_ell_num_stored_elements_per_row(); + auto p = m->get_ell_stride(); + + ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_ell_num_stored_elements(), 0); + ASSERT_EQ(m->get_coo_num_stored_elements(), 4); + EXPECT_EQ(n, 0); + EXPECT_EQ(p, 2); + EXPECT_EQ(r[0], 0); + EXPECT_EQ(r[1], 0); + EXPECT_EQ(r[2], 0); + EXPECT_EQ(r[3], 1); + EXPECT_EQ(c[0], 0); + EXPECT_EQ(c[1], 1); + EXPECT_EQ(c[2], 2); + EXPECT_EQ(c[3], 1); + EXPECT_EQ(v[0], value_type{1.0}); + EXPECT_EQ(v[1], value_type{3.0}); + EXPECT_EQ(v[2], value_type{2.0}); + EXPECT_EQ(v[3], value_type{5.0}); + } + + void assert_equal_to_mtx2(const Hybrid *m) + { + auto v = m->get_const_coo_values(); + auto c = m->get_const_coo_col_idxs(); + auto r = m->get_const_coo_row_idxs(); + auto n = m->get_ell_num_stored_elements_per_row(); + auto p = m->get_ell_stride(); + auto ell_v = m->get_const_ell_values(); + auto ell_c = m->get_const_ell_col_idxs(); + + ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); + // Test Coo values + ASSERT_EQ(m->get_coo_num_stored_elements(), 1); + EXPECT_EQ(r[0], 0); + EXPECT_EQ(c[0], 2); + EXPECT_EQ(v[0], value_type{2.0}); + // Test Ell values + ASSERT_EQ(m->get_ell_num_stored_elements(), 4); + EXPECT_EQ(n, 2); + EXPECT_EQ(p, 2); + EXPECT_EQ(ell_v[0], value_type{1}); + EXPECT_EQ(ell_v[1], value_type{0}); + EXPECT_EQ(ell_v[2], value_type{3}); + EXPECT_EQ(ell_v[3], value_type{5}); + EXPECT_EQ(ell_c[0], 0); + EXPECT_EQ(ell_c[1], 0); + EXPECT_EQ(ell_c[2], 1); + EXPECT_EQ(ell_c[3], 1); + } + + std::shared_ptr exec; + std::unique_ptr mtx; + std::unique_ptr mtx2; + std::unique_ptr mtx3_sorted; + std::unique_ptr mtx3_unsorted; +}; + +TYPED_TEST_CASE(Fbcsr, gko::test::ValueIndexTypes); + + +TYPED_TEST(Fbcsr, AppliesToDenseVector) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Vec = typename TestFixture::Vec; +// using T = typename TestFixture::value_type; +// auto x = gko::initialize({2.0, 1.0, 4.0}, this->exec); +// auto y = Vec::create(this->exec, gko::dim<2>{2, 1}); +// +// this->mtx->apply(x.get(), y.get()); +// +// EXPECT_EQ(y->at(0), T{13.0}); +// EXPECT_EQ(y->at(1), T{5.0}); +//} + + +TYPED_TEST(Fbcsr, AppliesToDenseMatrix) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Vec = typename TestFixture::Vec; +// using T = typename TestFixture::value_type; +// auto x = gko::initialize( +// {I{2.0, 3.0}, I{1.0, -1.5}, I{4.0, 2.5}}, this->exec); +// auto y = Vec::create(this->exec, gko::dim<2>{2}); +// +// this->mtx->apply(x.get(), y.get()); +// +// EXPECT_EQ(y->at(0, 0), T{13.0}); +// EXPECT_EQ(y->at(1, 0), T{5.0}); +// EXPECT_EQ(y->at(0, 1), T{3.5}); +// EXPECT_EQ(y->at(1, 1), T{-7.5}); +//} + + +TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseVector) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Vec = typename TestFixture::Vec; +// using T = typename TestFixture::value_type; +// auto alpha = gko::initialize({-1.0}, this->exec); +// auto beta = gko::initialize({2.0}, this->exec); +// auto x = gko::initialize({2.0, 1.0, 4.0}, this->exec); +// auto y = gko::initialize({1.0, 2.0}, this->exec); +// +// this->mtx->apply(alpha.get(), x.get(), beta.get(), y.get()); +// +// EXPECT_EQ(y->at(0), T{-11.0}); +// EXPECT_EQ(y->at(1), T{-1.0}); +//} + + +TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseMatrix) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Vec = typename TestFixture::Vec; +// using T = typename TestFixture::value_type; +// auto alpha = gko::initialize({-1.0}, this->exec); +// auto beta = gko::initialize({2.0}, this->exec); +// auto x = gko::initialize( +// {I{2.0, 3.0}, I{1.0, -1.5}, I{4.0, 2.5}}, this->exec); +// auto y = +// gko::initialize({I{1.0, 0.5}, I{2.0, -1.5}}, this->exec); +// +// this->mtx->apply(alpha.get(), x.get(), beta.get(), y.get()); +// +// EXPECT_EQ(y->at(0, 0), T{-11.0}); +// EXPECT_EQ(y->at(1, 0), T{-1.0}); +// EXPECT_EQ(y->at(0, 1), T{-2.5}); +// EXPECT_EQ(y->at(1, 1), T{4.5}); +//} + + +TYPED_TEST(Fbcsr, AppliesToFbcsrMatrix) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using T = typename TestFixture::value_type; +// this->mtx->apply(this->mtx3_unsorted.get(), this->mtx2.get()); +// +// ASSERT_EQ(this->mtx2->get_size(), gko::dim<2>(2, 3)); +// ASSERT_EQ(this->mtx2->get_num_stored_elements(), 6); +// ASSERT_TRUE(this->mtx2->is_sorted_by_column_index()); +// auto r = this->mtx2->get_const_row_ptrs(); +// auto c = this->mtx2->get_const_col_idxs(); +// auto v = this->mtx2->get_const_values(); +// // 13 5 31 +// // 15 5 40 +// EXPECT_EQ(r[0], 0); +// EXPECT_EQ(r[1], 3); +// EXPECT_EQ(r[2], 6); +// EXPECT_EQ(c[0], 0); +// EXPECT_EQ(c[1], 1); +// EXPECT_EQ(c[2], 2); +// EXPECT_EQ(c[3], 0); +// EXPECT_EQ(c[4], 1); +// EXPECT_EQ(c[5], 2); +// EXPECT_EQ(v[0], T{13}); +// EXPECT_EQ(v[1], T{5}); +// EXPECT_EQ(v[2], T{31}); +// EXPECT_EQ(v[3], T{15}); +// EXPECT_EQ(v[4], T{5}); +// EXPECT_EQ(v[5], T{40}); +//} + + +TYPED_TEST(Fbcsr, AppliesLinearCombinationToFbcsrMatrix) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Vec = typename TestFixture::Vec; +// using T = typename TestFixture::value_type; +// auto alpha = gko::initialize({-1.0}, this->exec); +// auto beta = gko::initialize({2.0}, this->exec); +// +// this->mtx->apply(alpha.get(), this->mtx3_unsorted.get(), beta.get(), +// this->mtx2.get()); +// +// ASSERT_EQ(this->mtx2->get_size(), gko::dim<2>(2, 3)); +// ASSERT_EQ(this->mtx2->get_num_stored_elements(), 6); +// ASSERT_TRUE(this->mtx2->is_sorted_by_column_index()); +// auto r = this->mtx2->get_const_row_ptrs(); +// auto c = this->mtx2->get_const_col_idxs(); +// auto v = this->mtx2->get_const_values(); +// // -11 1 -27 +// // -15 5 -40 +// EXPECT_EQ(r[0], 0); +// EXPECT_EQ(r[1], 3); +// EXPECT_EQ(r[2], 6); +// EXPECT_EQ(c[0], 0); +// EXPECT_EQ(c[1], 1); +// EXPECT_EQ(c[2], 2); +// EXPECT_EQ(c[3], 0); +// EXPECT_EQ(c[4], 1); +// EXPECT_EQ(c[5], 2); +// EXPECT_EQ(v[0], T{-11}); +// EXPECT_EQ(v[1], T{1}); +// EXPECT_EQ(v[2], T{-27}); +// EXPECT_EQ(v[3], T{-15}); +// EXPECT_EQ(v[4], T{5}); +// EXPECT_EQ(v[5], T{-40}); +//} + + +TYPED_TEST(Fbcsr, AppliesLinearCombinationToIdentityMatrix) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using T = typename TestFixture::value_type; +// using Vec = typename TestFixture::Vec; +// using Mtx = typename TestFixture::Mtx; +// auto alpha = gko::initialize({-3.0}, this->exec); +// auto beta = gko::initialize({2.0}, this->exec); +// auto a = gko::initialize( +// {I{2.0, 0.0, 3.0}, I{0.0, 1.0, -1.5}, I{0.0, -2.0, 0.0}, +// I{5.0, 0.0, 0.0}, I{1.0, 0.0, 4.0}, I{2.0, -2.0, 0.0}, +// I{0.0, 0.0, 0.0}}, +// this->exec); +// auto b = gko::initialize( +// {I{2.0, -2.0, 0.0}, I{1.0, 0.0, 4.0}, I{2.0, 0.0, 3.0}, +// I{0.0, 1.0, -1.5}, I{1.0, 0.0, 0.0}, I{0.0, 0.0, 0.0}, +// I{0.0, 0.0, 0.0}}, +// this->exec); +// auto expect = gko::initialize( +// {I{-2.0, -4.0, -9.0}, I{2.0, -3.0, 12.5}, I{4.0, 6.0, 6.0}, +// I{-15.0, 2.0, -3.0}, I{-1.0, 0.0, -12.0}, I{-6.0, 6.0, 0.0}, +// I{0.0, 0.0, 0.0}}, +// this->exec); +// auto id = gko::matrix::Identity::create(this->exec, a->get_size()[1]); +// +// a->apply(gko::lend(alpha), gko::lend(id), gko::lend(beta), gko::lend(b)); +// +// GKO_ASSERT_MTX_NEAR(b, expect, r::value); +// GKO_ASSERT_MTX_EQ_SPARSITY(b, expect); +// ASSERT_TRUE(b->is_sorted_by_column_index()); +//} + + +TYPED_TEST(Fbcsr, ApplyFailsOnWrongInnerDimension) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Vec = typename TestFixture::Vec; +// auto x = Vec::create(this->exec, gko::dim<2>{2}); +// auto y = Vec::create(this->exec, gko::dim<2>{2}); +// +// ASSERT_THROW(this->mtx->apply(x.get(), y.get()), gko::DimensionMismatch); +//} + + +TYPED_TEST(Fbcsr, ApplyFailsOnWrongNumberOfRows) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Vec = typename TestFixture::Vec; +// auto x = Vec::create(this->exec, gko::dim<2>{3, 2}); +// auto y = Vec::create(this->exec, gko::dim<2>{3, 2}); +// +// ASSERT_THROW(this->mtx->apply(x.get(), y.get()), gko::DimensionMismatch); +//} + + +TYPED_TEST(Fbcsr, ApplyFailsOnWrongNumberOfCols) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Vec = typename TestFixture::Vec; +// auto x = Vec::create(this->exec, gko::dim<2>{3}); +// auto y = Vec::create(this->exec, gko::dim<2>{2}); +// +// ASSERT_THROW(this->mtx->apply(x.get(), y.get()), gko::DimensionMismatch); +//} + + +TYPED_TEST(Fbcsr, ConvertsToPrecision) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using OtherType = typename gko::next_precision; +// using Fbcsr = typename TestFixture::Mtx; +// using OtherFbcsr = gko::matrix::Fbcsr; +// auto tmp = OtherFbcsr::create(this->exec); +// auto res = Fbcsr::create(this->exec); +// // If OtherType is more precise: 0, otherwise r +// auto residual = r::value < r::value +// ? gko::remove_complex{0} +// : gko::remove_complex{r::value}; +// +// // use mtx2 as mtx's strategy would involve creating a CudaExecutor +// this->mtx2->convert_to(tmp.get()); +// tmp->convert_to(res.get()); +// +// GKO_ASSERT_MTX_NEAR(this->mtx2, res, residual); +// ASSERT_EQ(typeid(*this->mtx2->get_strategy()), +// typeid(*res->get_strategy())); +//} + + +TYPED_TEST(Fbcsr, MovesToPrecision) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using OtherType = typename gko::next_precision; +// using Fbcsr = typename TestFixture::Mtx; +// using OtherFbcsr = gko::matrix::Fbcsr; +// auto tmp = OtherFbcsr::create(this->exec); +// auto res = Fbcsr::create(this->exec); +// // If OtherType is more precise: 0, otherwise r +// auto residual = r::value < r::value +// ? gko::remove_complex{0} +// : gko::remove_complex{r::value}; +// +// // use mtx2 as mtx's strategy would involve creating a CudaExecutor +// this->mtx2->move_to(tmp.get()); +// tmp->move_to(res.get()); +// +// GKO_ASSERT_MTX_NEAR(this->mtx2, res, residual); +// ASSERT_EQ(typeid(*this->mtx2->get_strategy()), +// typeid(*res->get_strategy())); +//} + + +TYPED_TEST(Fbcsr, ConvertsToDense) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Dense = typename TestFixture::Vec; +// auto dense_mtx = Dense::create(this->mtx->get_executor()); +// auto dense_other = gko::initialize( +// 4, {{1.0, 3.0, 2.0}, {0.0, 5.0, 0.0}}, this->exec); +// +// this->mtx->convert_to(dense_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(dense_mtx, dense_other, 0.0); +//} + + +TYPED_TEST(Fbcsr, MovesToDense) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Dense = typename TestFixture::Vec; +// auto dense_mtx = Dense::create(this->mtx->get_executor()); +// auto dense_other = gko::initialize( +// 4, {{1.0, 3.0, 2.0}, {0.0, 5.0, 0.0}}, this->exec); +// +// this->mtx->move_to(dense_mtx.get()); +// +// GKO_ASSERT_MTX_NEAR(dense_mtx, dense_other, 0.0); +//} + + +TYPED_TEST(Fbcsr, ConvertsToCoo) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Coo = typename TestFixture::Coo; +// auto coo_mtx = Coo::create(this->mtx->get_executor()); +// +// this->mtx->convert_to(coo_mtx.get()); +// +// this->assert_equal_to_mtx(coo_mtx.get()); +//} + + +TYPED_TEST(Fbcsr, MovesToCoo) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Coo = typename TestFixture::Coo; +// auto coo_mtx = Coo::create(this->mtx->get_executor()); +// +// this->mtx->move_to(coo_mtx.get()); +// +// this->assert_equal_to_mtx(coo_mtx.get()); +//} + + +TYPED_TEST(Fbcsr, ConvertsToSellp) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Sellp = typename TestFixture::Sellp; +// auto sellp_mtx = Sellp::create(this->mtx->get_executor()); +// +// this->mtx->convert_to(sellp_mtx.get()); +// +// this->assert_equal_to_mtx(sellp_mtx.get()); +//} + + +TYPED_TEST(Fbcsr, MovesToSellp) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Sellp = typename TestFixture::Sellp; +// using Fbcsr = typename TestFixture::Mtx; +// auto sellp_mtx = Sellp::create(this->mtx->get_executor()); +// auto fbcsr_ref = Fbcsr::create(this->mtx->get_executor()); +// +// fbcsr_ref->copy_from(this->mtx.get()); +// fbcsr_ref->move_to(sellp_mtx.get()); +// +// this->assert_equal_to_mtx(sellp_mtx.get()); +//} + + +TYPED_TEST(Fbcsr, ConvertsToSparsityFbcsr) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using SparsityFbcsr = typename TestFixture::SparsityFbcsr; +// auto sparsity_mtx = SparsityFbcsr::create(this->mtx->get_executor()); +// +// this->mtx->convert_to(sparsity_mtx.get()); +// +// this->assert_equal_to_mtx(sparsity_mtx.get()); +//} + + +TYPED_TEST(Fbcsr, MovesToSparsityFbcsr) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using SparsityFbcsr = typename TestFixture::SparsityFbcsr; +// using Fbcsr = typename TestFixture::Mtx; +// auto sparsity_mtx = SparsityFbcsr::create(this->mtx->get_executor()); +// auto fbcsr_ref = Fbcsr::create(this->mtx->get_executor()); +// +// fbcsr_ref->copy_from(this->mtx.get()); +// fbcsr_ref->move_to(sparsity_mtx.get()); +// +// this->assert_equal_to_mtx(sparsity_mtx.get()); +//} + + +TYPED_TEST(Fbcsr, ConvertsToHybridAutomatically) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Hybrid = typename TestFixture::Hybrid; +// auto hybrid_mtx = Hybrid::create(this->mtx->get_executor()); +// +// this->mtx->convert_to(hybrid_mtx.get()); +// +// this->assert_equal_to_mtx(hybrid_mtx.get()); +//} + + +TYPED_TEST(Fbcsr, MovesToHybridAutomatically) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Hybrid = typename TestFixture::Hybrid; +// using Fbcsr = typename TestFixture::Mtx; +// auto hybrid_mtx = Hybrid::create(this->mtx->get_executor()); +// auto fbcsr_ref = Fbcsr::create(this->mtx->get_executor()); +// +// fbcsr_ref->copy_from(this->mtx.get()); +// fbcsr_ref->move_to(hybrid_mtx.get()); +// +// this->assert_equal_to_mtx(hybrid_mtx.get()); +//} + + +TYPED_TEST(Fbcsr, ConvertsToHybridByColumn2) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Hybrid = typename TestFixture::Hybrid; +// auto hybrid_mtx = +// Hybrid::create(this->mtx2->get_executor(), +// std::make_shared(2)); +// +// this->mtx2->convert_to(hybrid_mtx.get()); +// +// this->assert_equal_to_mtx2(hybrid_mtx.get()); +//} + + +TYPED_TEST(Fbcsr, MovesToHybridByColumn2) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Hybrid = typename TestFixture::Hybrid; +// using Fbcsr = typename TestFixture::Mtx; +// auto hybrid_mtx = +// Hybrid::create(this->mtx2->get_executor(), +// std::make_shared(2)); +// auto fbcsr_ref = Fbcsr::create(this->mtx2->get_executor()); +// +// fbcsr_ref->copy_from(this->mtx2.get()); +// fbcsr_ref->move_to(hybrid_mtx.get()); +// +// this->assert_equal_to_mtx2(hybrid_mtx.get()); +//} + + +TYPED_TEST(Fbcsr, ConvertsEmptyToPrecision) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using OtherType = typename gko::next_precision; +// using Fbcsr = typename TestFixture::Mtx; +// using OtherFbcsr = gko::matrix::Fbcsr; +// auto empty = OtherFbcsr::create(this->exec); +// empty->get_row_ptrs()[0] = 0; +// auto res = Fbcsr::create(this->exec); +// +// empty->convert_to(res.get()); +// +// ASSERT_EQ(res->get_num_stored_elements(), 0); +// ASSERT_EQ(*res->get_const_row_ptrs(), 0); +// ASSERT_FALSE(res->get_size()); +//} + + +TYPED_TEST(Fbcsr, MovesEmptyToPrecision) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using OtherType = typename gko::next_precision; +// using Fbcsr = typename TestFixture::Mtx; +// using OtherFbcsr = gko::matrix::Fbcsr; +// auto empty = OtherFbcsr::create(this->exec); +// empty->get_row_ptrs()[0] = 0; +// auto res = Fbcsr::create(this->exec); +// +// empty->move_to(res.get()); +// +// ASSERT_EQ(res->get_num_stored_elements(), 0); +// ASSERT_EQ(*res->get_const_row_ptrs(), 0); +// ASSERT_FALSE(res->get_size()); +//} + + +TYPED_TEST(Fbcsr, ConvertsEmptyToDense) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using Fbcsr = typename TestFixture::Mtx; +// using Dense = gko::matrix::Dense; +// auto empty = Fbcsr::create(this->exec); +// auto res = Dense::create(this->exec); +// +// empty->convert_to(res.get()); +// +// ASSERT_FALSE(res->get_size()); +//} + + +TYPED_TEST(Fbcsr, MovesEmptyToDense) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using Fbcsr = typename TestFixture::Mtx; +// using Dense = gko::matrix::Dense; +// auto empty = Fbcsr::create(this->exec); +// auto res = Dense::create(this->exec); +// +// empty->move_to(res.get()); +// +// ASSERT_FALSE(res->get_size()); +//} + + +TYPED_TEST(Fbcsr, ConvertsEmptyToCoo) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using Fbcsr = typename TestFixture::Mtx; +// using Coo = gko::matrix::Coo; +// auto empty = Fbcsr::create(this->exec); +// auto res = Coo::create(this->exec); +// +// empty->convert_to(res.get()); +// +// ASSERT_EQ(res->get_num_stored_elements(), 0); +// ASSERT_FALSE(res->get_size()); +//} + + +TYPED_TEST(Fbcsr, MovesEmptyToCoo) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using Fbcsr = typename TestFixture::Mtx; +// using Coo = gko::matrix::Coo; +// auto empty = Fbcsr::create(this->exec); +// auto res = Coo::create(this->exec); +// +// empty->move_to(res.get()); +// +// ASSERT_EQ(res->get_num_stored_elements(), 0); +// ASSERT_FALSE(res->get_size()); +//} + + +TYPED_TEST(Fbcsr, ConvertsEmptyToEll) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using Fbcsr = typename TestFixture::Mtx; +// using Ell = gko::matrix::Ell; +// auto empty = Fbcsr::create(this->exec); +// auto res = Ell::create(this->exec); +// +// empty->convert_to(res.get()); +// +// ASSERT_EQ(res->get_num_stored_elements(), 0); +// ASSERT_FALSE(res->get_size()); +//} + + +TYPED_TEST(Fbcsr, MovesEmptyToEll) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using Fbcsr = typename TestFixture::Mtx; +// using Ell = gko::matrix::Ell; +// auto empty = Fbcsr::create(this->exec); +// auto res = Ell::create(this->exec); +// +// empty->move_to(res.get()); +// +// ASSERT_EQ(res->get_num_stored_elements(), 0); +// ASSERT_FALSE(res->get_size()); +//} + + +TYPED_TEST(Fbcsr, ConvertsEmptyToSellp) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using Fbcsr = typename TestFixture::Mtx; +// using Sellp = gko::matrix::Sellp; +// auto empty = Fbcsr::create(this->exec); +// auto res = Sellp::create(this->exec); +// +// empty->convert_to(res.get()); +// +// ASSERT_EQ(res->get_num_stored_elements(), 0); +// ASSERT_EQ(*res->get_const_slice_sets(), 0); +// ASSERT_FALSE(res->get_size()); +//} + + +TYPED_TEST(Fbcsr, MovesEmptyToSellp) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using Fbcsr = typename TestFixture::Mtx; +// using Sellp = gko::matrix::Sellp; +// auto empty = Fbcsr::create(this->exec); +// auto res = Sellp::create(this->exec); +// +// empty->move_to(res.get()); +// +// ASSERT_EQ(res->get_num_stored_elements(), 0); +// ASSERT_EQ(*res->get_const_slice_sets(), 0); +// ASSERT_FALSE(res->get_size()); +//} + + +TYPED_TEST(Fbcsr, ConvertsEmptyToSparsityFbcsr) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using Fbcsr = typename TestFixture::Mtx; +// using SparsityFbcsr = gko::matrix::SparsityFbcsr; +// auto empty = Fbcsr::create(this->exec); +// empty->get_row_ptrs()[0] = 0; +// auto res = SparsityFbcsr::create(this->exec); +// +// empty->convert_to(res.get()); +// +// ASSERT_EQ(res->get_num_nonzeros(), 0); +// ASSERT_EQ(*res->get_const_row_ptrs(), 0); +//} + + +TYPED_TEST(Fbcsr, MovesEmptyToSparsityFbcsr) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using Fbcsr = typename TestFixture::Mtx; +// using SparsityFbcsr = gko::matrix::SparsityFbcsr; +// auto empty = Fbcsr::create(this->exec); +// empty->get_row_ptrs()[0] = 0; +// auto res = SparsityFbcsr::create(this->exec); +// +// empty->move_to(res.get()); +// +// ASSERT_EQ(res->get_num_nonzeros(), 0); +// ASSERT_EQ(*res->get_const_row_ptrs(), 0); +//} + + +TYPED_TEST(Fbcsr, ConvertsEmptyToHybrid) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using Fbcsr = typename TestFixture::Mtx; +// using Hybrid = gko::matrix::Hybrid; +// auto empty = Fbcsr::create(this->exec); +// auto res = Hybrid::create(this->exec); +// +// empty->convert_to(res.get()); +// +// ASSERT_EQ(res->get_num_stored_elements(), 0); +// ASSERT_FALSE(res->get_size()); +//} + + +TYPED_TEST(Fbcsr, MovesEmptyToHybrid) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using ValueType = typename TestFixture::value_type; +// using IndexType = typename TestFixture::index_type; +// using Fbcsr = typename TestFixture::Mtx; +// using Hybrid = gko::matrix::Hybrid; +// auto empty = Fbcsr::create(this->exec); +// auto res = Hybrid::create(this->exec); +// +// empty->move_to(res.get()); +// +// ASSERT_EQ(res->get_num_stored_elements(), 0); +// ASSERT_FALSE(res->get_size()); +//} + + +TYPED_TEST(Fbcsr, CalculatesNonzerosPerRow) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// gko::Array row_nnz(this->exec, this->mtx->get_size()[0]); +// +// gko::kernels::reference::fbcsr::calculate_nonzeros_per_row( +// this->exec, this->mtx.get(), &row_nnz); +// +// auto row_nnz_val = row_nnz.get_data(); +// ASSERT_EQ(row_nnz_val[0], 3); +// ASSERT_EQ(row_nnz_val[1], 1); +//} + + +TYPED_TEST(Fbcsr, CalculatesTotalCols) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// gko::size_type total_cols; +// gko::size_type stride_factor = gko::matrix::default_stride_factor; +// gko::size_type slice_size = gko::matrix::default_slice_size; +// +// gko::kernels::reference::fbcsr::calculate_total_cols( +// this->exec, this->mtx.get(), &total_cols, stride_factor, slice_size); +// +// ASSERT_EQ(total_cols, 3); +//} + + +TYPED_TEST(Fbcsr, ConvertsToEll) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Ell = typename TestFixture::Ell; +// using Dense = typename TestFixture::Vec; +// auto ell_mtx = Ell::create(this->mtx->get_executor()); +// auto dense_mtx = Dense::create(this->mtx->get_executor()); +// auto ref_dense_mtx = Dense::create(this->mtx->get_executor()); +// +// this->mtx->convert_to(ell_mtx.get()); +// +// this->assert_equal_to_mtx(ell_mtx.get()); +//} + + +TYPED_TEST(Fbcsr, MovesToEll) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Ell = typename TestFixture::Ell; +// using Dense = typename TestFixture::Vec; +// auto ell_mtx = Ell::create(this->mtx->get_executor()); +// auto dense_mtx = Dense::create(this->mtx->get_executor()); +// auto ref_dense_mtx = Dense::create(this->mtx->get_executor()); +// +// this->mtx->move_to(ell_mtx.get()); +// +// this->assert_equal_to_mtx(ell_mtx.get()); +//} + + +TYPED_TEST(Fbcsr, SquareMtxIsTransposable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Fbcsr = typename TestFixture::Mtx; +// // clang-format off +// auto mtx2 = gko::initialize( +// {{1.0, 3.0, 2.0}, +// {0.0, 5.0, 0.0}, +// {0.0, 1.5, 2.0}}, this->exec); +// // clang-format on +// +// auto trans = mtx2->transpose(); +// auto trans_as_fbcsr = static_cast(trans.get()); +// +// // clang-format off +// GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, +// l({{1.0, 0.0, 0.0}, +// {3.0, 5.0, 1.5}, +// {2.0, 0.0, 2.0}}), 0.0); +// // clang-format on +//} + + +TYPED_TEST(Fbcsr, NonSquareMtxIsTransposable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Fbcsr = typename TestFixture::Mtx; +// auto trans = this->mtx->transpose(); +// auto trans_as_fbcsr = static_cast(trans.get()); +// +// // clang-format off +// GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, +// l({{1.0, 0.0}, +// {3.0, 5.0}, +// {2.0, 0.0}}), 0.0); +// // clang-format on +//} + + +TYPED_TEST(Fbcsr, SquareMatrixIsRowPermutable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Fbcsr = typename TestFixture::Mtx; +// using index_type = typename TestFixture::index_type; +// // clang-format off +// auto p_mtx = gko::initialize({{1.0, 3.0, 2.0}, +// {0.0, 5.0, 0.0}, +// {0.0, 1.5, 2.0}}, this->exec); +// // clang-format on +// gko::Array permute_idxs{this->exec, {1, 2, 0}}; +// +// auto row_permute = p_mtx->row_permute(&permute_idxs); +// +// auto row_permute_fbcsr = static_cast(row_permute.get()); +// // clang-format off +// GKO_ASSERT_MTX_NEAR(row_permute_fbcsr, +// l({{0.0, 5.0, 0.0}, +// {0.0, 1.5, 2.0}, +// {1.0, 3.0, 2.0}}), +// 0.0); +// // clang-format on +//} + + +TYPED_TEST(Fbcsr, NonSquareMatrixIsRowPermutable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Fbcsr = typename TestFixture::Mtx; +// using index_type = typename TestFixture::index_type; +// // clang-format off +// auto p_mtx = gko::initialize({{1.0, 3.0, 2.0}, +// {0.0, 5.0, 0.0}}, this->exec); +// // clang-format on +// gko::Array permute_idxs{this->exec, {1, 0}}; +// +// auto row_permute = p_mtx->row_permute(&permute_idxs); +// +// auto row_permute_fbcsr = static_cast(row_permute.get()); +// // clang-format off +// GKO_ASSERT_MTX_NEAR(row_permute_fbcsr, +// l({{0.0, 5.0, 0.0}, +// {1.0, 3.0, 2.0}}), +// 0.0); +// // clang-format on +//} + + +TYPED_TEST(Fbcsr, SquareMatrixIsColPermutable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Fbcsr = typename TestFixture::Mtx; +// using index_type = typename TestFixture::index_type; +// // clang-format off +// auto p_mtx = gko::initialize({{1.0, 3.0, 2.0}, +// {0.0, 5.0, 0.0}, +// {0.0, 1.5, 2.0}}, this->exec); +// // clang-format on +// gko::Array permute_idxs{this->exec, {1, 2, 0}}; +// +// auto c_permute = p_mtx->column_permute(&permute_idxs); +// +// auto c_permute_fbcsr = static_cast(c_permute.get()); +// // clang-format off +// GKO_ASSERT_MTX_NEAR(c_permute_fbcsr, +// l({{3.0, 2.0, 1.0}, +// {5.0, 0.0, 0.0}, +// {1.5, 2.0, 0.0}}), +// 0.0); +// // clang-format on +//} + + +TYPED_TEST(Fbcsr, NonSquareMatrixIsColPermutable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Fbcsr = typename TestFixture::Mtx; +// using index_type = typename TestFixture::index_type; +// // clang-format off +// auto p_mtx = gko::initialize({{1.0, 0.0, 2.0}, +// {0.0, 5.0, 0.0}}, this->exec); +// // clang-format on +// gko::Array permute_idxs{this->exec, {1, 2, 0}}; +// +// auto c_permute = p_mtx->column_permute(&permute_idxs); +// +// auto c_permute_fbcsr = static_cast(c_permute.get()); +// // clang-format off +// GKO_ASSERT_MTX_NEAR(c_permute_fbcsr, +// l({{0.0, 2.0, 1.0}, +// {5.0, 0.0, 0.0}}), +// 0.0); +// // clang-format on +//} + + +TYPED_TEST(Fbcsr, SquareMatrixIsInverseRowPermutable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Fbcsr = typename TestFixture::Mtx; +// using index_type = typename TestFixture::index_type; +// // clang-format off +// auto inverse_p_mtx = gko::initialize({{1.0, 3.0, 2.0}, +// {0.0, 5.0, 0.0}, +// {0.0, 1.5, 2.0}}, this->exec); +// // clang-format on +// gko::Array inverse_permute_idxs{this->exec, {1, 2, 0}}; +// +// auto inverse_row_permute = +// inverse_p_mtx->inverse_row_permute(&inverse_permute_idxs); +// +// auto inverse_row_permute_fbcsr = +// static_cast(inverse_row_permute.get()); +// // clang-format off +// GKO_ASSERT_MTX_NEAR(inverse_row_permute_fbcsr, +// l({{0.0, 1.5, 2.0}, +// {1.0, 3.0, 2.0}, +// {0.0, 5.0, 0.0}}), +// 0.0); +// // clang-format on +//} + + +TYPED_TEST(Fbcsr, NonSquareMatrixIsInverseRowPermutable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Fbcsr = typename TestFixture::Mtx; +// using index_type = typename TestFixture::index_type; +// // clang-format off +// auto inverse_p_mtx = gko::initialize({{1.0, 3.0, 2.0}, +// {0.0, 5.0, 0.0}}, this->exec); +// // clang-format on +// gko::Array inverse_permute_idxs{this->exec, {1, 0}}; +// +// auto inverse_row_permute = +// inverse_p_mtx->inverse_row_permute(&inverse_permute_idxs); +// +// auto inverse_row_permute_fbcsr = +// static_cast(inverse_row_permute.get()); +// // clang-format off +// GKO_ASSERT_MTX_NEAR(inverse_row_permute_fbcsr, +// l({{0.0, 5.0, 0.0}, +// {1.0, 3.0, 2.0}}), +// 0.0); +// // clang-format on +//} + + +TYPED_TEST(Fbcsr, SquareMatrixIsInverseColPermutable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Fbcsr = typename TestFixture::Mtx; +// using index_type = typename TestFixture::index_type; +// // clang-format off +// auto inverse_p_mtx = gko::initialize({{1.0, 3.0, 2.0}, +// {0.0, 5.0, 0.0}, +// {0.0, 1.5, 2.0}}, this->exec); +// // clang-format on +// gko::Array inverse_permute_idxs{this->exec, {1, 2, 0}}; +// +// auto inverse_c_permute = +// inverse_p_mtx->inverse_column_permute(&inverse_permute_idxs); +// +// auto inverse_c_permute_fbcsr = static_cast(inverse_c_permute.get()); +// // clang-format off +// GKO_ASSERT_MTX_NEAR(inverse_c_permute_fbcsr, +// l({{2.0, 1.0, 3.0}, +// {0.0, 0.0, 5.0}, +// {2.0, 0.0, 1.5}}), +// 0.0); +// // clang-format on +//} + + +TYPED_TEST(Fbcsr, NonSquareMatrixIsInverseColPermutable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Fbcsr = typename TestFixture::Mtx; +// using index_type = typename TestFixture::index_type; +// // clang-format off +// auto inverse_p_mtx = gko::initialize({{1.0, 3.0, 2.0}, +// {0.0, 5.0, 0.0}}, this->exec); +// // clang-format on +// gko::Array inverse_permute_idxs{this->exec, {1, 2, 0}}; +// +// auto inverse_c_permute = +// inverse_p_mtx->inverse_column_permute(&inverse_permute_idxs); +// +// auto inverse_c_permute_fbcsr = static_cast(inverse_c_permute.get()); +// // clang-format off +// GKO_ASSERT_MTX_NEAR(inverse_c_permute_fbcsr, +// l({{2.0, 1.0, 3.0}, +// {0.0, 0.0, 5.0}}), +// 0.0); +// // clang-format on +//} + + +TYPED_TEST(Fbcsr, RecognizeSortedMatrix) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// ASSERT_TRUE(this->mtx->is_sorted_by_column_index()); +// ASSERT_TRUE(this->mtx2->is_sorted_by_column_index()); +// ASSERT_TRUE(this->mtx3_sorted->is_sorted_by_column_index()); +//} + + +TYPED_TEST(Fbcsr, RecognizeUnsortedMatrix) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// ASSERT_FALSE(this->mtx3_unsorted->is_sorted_by_column_index()); +//} + + +TYPED_TEST(Fbcsr, SortSortedMatrix) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto matrix = this->mtx3_sorted->clone(); +// +// matrix->sort_by_column_index(); +// +// GKO_ASSERT_MTX_NEAR(matrix, this->mtx3_sorted, 0.0); +//} + + +TYPED_TEST(Fbcsr, SortUnsortedMatrix) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto matrix = this->mtx3_unsorted->clone(); +// +// matrix->sort_by_column_index(); +// +// GKO_ASSERT_MTX_NEAR(matrix, this->mtx3_sorted, 0.0); +//} + + +TYPED_TEST(Fbcsr, ExtractsDiagonal) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using T = typename TestFixture::value_type; +// auto matrix = this->mtx3_unsorted->clone(); +// auto diag = matrix->extract_diagonal(); +// +// ASSERT_EQ(diag->get_size()[0], 3); +// ASSERT_EQ(diag->get_size()[1], 3); +// ASSERT_EQ(diag->get_values()[0], T{0.}); +// ASSERT_EQ(diag->get_values()[1], T{1.}); +// ASSERT_EQ(diag->get_values()[2], T{3.}); +//} + + +TYPED_TEST(Fbcsr, InplaceAbsolute) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Mtx = typename TestFixture::Mtx; +// auto mtx = gko::initialize( +// {{1.0, 2.0, -2.0}, {3.0, -5.0, 0.0}, {0.0, 1.0, -1.5}}, this->exec); +// +// mtx->compute_absolute_inplace(); +// +// GKO_ASSERT_MTX_NEAR( +// mtx, l({{1.0, 2.0, 2.0}, {3.0, 5.0, 0.0}, {0.0, 1.0, 1.5}}), 0.0); +//} + + +TYPED_TEST(Fbcsr, OutplaceAbsolute) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Mtx = typename TestFixture::Mtx; +// auto mtx = gko::initialize( +// {{1.0, 2.0, -2.0}, {3.0, -5.0, 0.0}, {0.0, 1.0, -1.5}}, this->exec); +// +// auto abs_mtx = mtx->compute_absolute(); +// +// GKO_ASSERT_MTX_NEAR( +// abs_mtx, l({{1.0, 2.0, 2.0}, {3.0, 5.0, 0.0}, {0.0, 1.0, 1.5}}), 0.0); +// ASSERT_EQ(mtx->get_strategy()->get_name(), +// abs_mtx->get_strategy()->get_name()); +//} + + +template +class FbcsrComplex : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using Mtx = gko::matrix::Fbcsr; +}; + +TYPED_TEST_CASE(FbcsrComplex, gko::test::ComplexValueIndexTypes); + + +TYPED_TEST(FbcsrComplex, MtxIsConjugateTransposable) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Fbcsr = typename TestFixture::Mtx; +// using T = typename TestFixture::value_type; +// +// auto exec = gko::ReferenceExecutor::create(); +// // clang-format off +// auto mtx2 = gko::initialize( +// {{T{1.0, 2.0}, T{3.0, 0.0}, T{2.0, 0.0}}, +// {T{0.0, 0.0}, T{5.0, - 3.5}, T{0.0,0.0}}, +// {T{0.0, 0.0}, T{0.0, 1.5}, T{2.0,0.0}}}, exec); +// // clang-format on +// +// auto trans = mtx2->conj_transpose(); +// auto trans_as_fbcsr = static_cast(trans.get()); +// +// // clang-format off +// GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, +// l({{T{1.0, - 2.0}, T{0.0, 0.0}, T{0.0, 0.0}}, +// {T{3.0, 0.0}, T{5.0, 3.5}, T{0.0, - 1.5}}, +// {T{2.0, 0.0}, T{0.0, 0.0}, T{2.0 + 0.0}}}), 0.0); +// // clang-format on +//} + + +TYPED_TEST(FbcsrComplex, InplaceAbsolute) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Mtx = typename TestFixture::Mtx; +// using T = typename TestFixture::value_type; +// using index_type = typename TestFixture::index_type; +// auto exec = gko::ReferenceExecutor::create(); +// // clang-format off +// auto mtx = gko::initialize( +// {{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, +// {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, +// {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, exec); +// // clang-format on +// +// mtx->compute_absolute_inplace(); +// +// GKO_ASSERT_MTX_NEAR( +// mtx, l({{1.0, 5.0, 2.0}, {5.0, 1.0, 0.0}, {0.0, 1.5, 2.0}}), 0.0); +//} + + +TYPED_TEST(FbcsrComplex, OutplaceAbsolute) +GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// using Mtx = typename TestFixture::Mtx; +// using T = typename TestFixture::value_type; +// using index_type = typename TestFixture::index_type; +// auto exec = gko::ReferenceExecutor::create(); +// // clang-format off +// auto mtx = gko::initialize( +// {{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, +// {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, +// {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, exec); +// // clang-format on +// +// auto abs_mtx = mtx->compute_absolute(); +// +// GKO_ASSERT_MTX_NEAR( +// abs_mtx, l({{1.0, 5.0, 2.0}, {5.0, 1.0, 0.0}, {0.0, 1.5, 2.0}}), 0.0); +// ASSERT_EQ(mtx->get_strategy()->get_name(), +// abs_mtx->get_strategy()->get_name()); +//} + + +} // namespace From cfb8e9a2dde8f91e061f006b0ae124ac42e697ff Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Fri, 6 Nov 2020 16:19:38 +0100 Subject: [PATCH 02/58] [tests fail] fixed cuda build issue and enabled DPCPP kernels (unimplemented) for fbcsr --- core/matrix/fbcsr_kernels.hpp | 9 +++++++++ cuda/test/matrix/fbcsr_kernels.cpp | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp index 715bb1192ce..a57b2af3c82 100644 --- a/core/matrix/fbcsr_kernels.hpp +++ b/core/matrix/fbcsr_kernels.hpp @@ -265,6 +265,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/cuda/test/matrix/fbcsr_kernels.cpp b/cuda/test/matrix/fbcsr_kernels.cpp index 043412c6402..72bc9c149f4 100644 --- a/cuda/test/matrix/fbcsr_kernels.cpp +++ b/cuda/test/matrix/fbcsr_kernels.cpp @@ -48,7 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include +#include #include "core/matrix/fbcsr_kernels.hpp" From 5b295d401e60ba519c8411392239a97148e291ff Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Tue, 10 Nov 2020 17:31:56 +0100 Subject: [PATCH 03/58] moved fbcsr test matrix generation to separate class --- core/components/fixed_block.hpp | 16 +- core/matrix/fbcsr.cpp | 21 ++ core/test/matrix/CMakeLists.txt | 9 +- core/test/matrix/fbcsr.cpp | 230 +++++++++------------ core/test/matrix/fbcsr_sample.cpp | 286 +++++++++++++++++++++++++++ core/test/matrix/fbcsr_sample.hpp | 83 ++++++++ include/ginkgo/core/matrix/fbcsr.hpp | 8 + 7 files changed, 500 insertions(+), 153 deletions(-) create mode 100644 core/test/matrix/fbcsr_sample.cpp create mode 100644 core/test/matrix/fbcsr_sample.hpp diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index 526cdb25909..f2ff770cafb 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -66,7 +66,7 @@ class BlockReadError : public Error { }; template -int getNumFixedBlocks(const int block_size, const IndexType size) +IndexType getNumFixedBlocks(const int block_size, const IndexType size) { if (size % block_size != 0) throw BlockSizeError(__FILE__, __LINE__, block_size, size); @@ -113,14 +113,6 @@ class FixedBlock final { }; -/// Two-dimensional square block -// template -// using FixedBlock = FixedRectangularBlock; - -/// Fixed-size column vector -// template -// using FixedSegment = FixedRectangularBlock; - /// A lightweight dynamic block type for the host space template class DenseBlock final { @@ -180,7 +172,11 @@ class DenseBlock final { value_type *vals_; }; -/// A view into a an array of dense of dense blocks of some runtime-defined size +/// A view into a an array of dense blocks of some runtime-defined size +/** Note that accessing BSR values using this type of view abstracts away the + * storage layout within the individual blocks, as long as all blocks use the + * same layout. For now, row-major blocks are assumed. + */ template class DenseBlocksView final { public: diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index e362b3b466f..1ef12627602 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -244,6 +244,27 @@ void Fbcsr::move_to(Dense *result) //} +template +void Fbcsr::convert_to( + Csr *result) const GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// auto exec = this->get_executor(); +// auto tmp = Dense::create(exec, this->get_size()); +// exec->run(fbcsr::make_convert_to_csr(this, tmp.get())); +// tmp->move_to(result); +//} + + +template +void Fbcsr::move_to(Csr *result) + GKO_NOT_IMPLEMENTED; +//{ +// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// this->convert_to(result); +//} + + // template // void Fbcsr::convert_to( // Hybrid *result) const diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt index 64b3b3ed593..22a8d0828c6 100644 --- a/core/test/matrix/CMakeLists.txt +++ b/core/test/matrix/CMakeLists.txt @@ -5,7 +5,14 @@ ginkgo_create_test(csr_builder) ginkgo_create_test(dense) ginkgo_create_test(diagonal) ginkgo_create_test(ell) -ginkgo_create_test(fbcsr) +add_library(test_fbcsr_sample fbcsr_sample.cpp) +target_include_directories(test_fbcsr_sample + PRIVATE + "$" + "$" + "$" + ) +ginkgo_create_test(fbcsr test_fbcsr_sample) ginkgo_create_test(fbcsr_builder) ginkgo_create_test(hybrid) ginkgo_create_test(identity) diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index dd456ef7316..893e12b0df6 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/fixed_block.hpp" +#include "core/test/matrix/fbcsr_sample.hpp" #include "core/test/utils.hpp" @@ -60,63 +61,26 @@ class Fbcsr : public ::testing::Test { Fbcsr() : exec(gko::ReferenceExecutor::create()), - mtx(Mtx::create(exec, gko::dim<2>{6, 12}, 36, 3, - std::make_shared>())) + fbsample( + std::static_pointer_cast(exec)), + mtx(fbsample.generate_fbcsr()) { - const int bs = 3; - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); - index_type *const s = mtx->get_srow(); - r[0] = 0; - r[1] = 2; - r[2] = 4; - c[0] = 1; - c[1] = 3; - c[2] = 0; - c[3] = 2; - - gko::blockutils::DenseBlocksView vals(v, bs, - bs); - - if (mtx->get_size()[0] % bs != 0) - throw gko::BadDimension(__FILE__, __LINE__, __func__, "test fbcsr", - mtx->get_size()[0], mtx->get_size()[1], - "block size does not divide the size!"); - - for (index_type ibrow = 0; ibrow < mtx->get_size()[0] / bs; ibrow++) { - const index_type *const browptr = mtx->get_row_ptrs(); - for (index_type inz = browptr[ibrow]; inz < browptr[ibrow + 1]; - inz++) { - const index_type bcolind = mtx->get_col_idxs()[inz]; - const value_type base = (ibrow + 1) * (bcolind + 1); - for (int ival = 0; ival < bs; ival++) - for (int jval = 0; jval < bs; jval++) - vals(inz, ival, jval) = - base + static_cast>( - ival * bs + jval); - } - } - - // Some of the entries are set to zero - vals(0, 2, 0) = gko::zero(); - vals(0, 2, 2) = gko::zero(); - vals(3, 0, 0) = gko::zero(); - - for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) - s[is] = 0; - // backup for move tests + const value_type *const v = mtx->get_values(); + const index_type *const c = mtx->get_col_idxs(); + const index_type *const r = mtx->get_row_ptrs(); orig_size = mtx->get_size(); - orig_rowptrs.resize(3); - orig_colinds.resize(4); - orig_vals.resize(36); - for (index_type i = 0; i < 3; i++) orig_rowptrs[i] = r[i]; - for (index_type i = 0; i < 4; i++) orig_colinds[i] = c[i]; - for (index_type i = 0; i < 36; i++) orig_vals[i] = v[i]; + orig_rowptrs.resize(fbsample.nbrows + 1); + orig_colinds.resize(fbsample.nbnz); + orig_vals.resize(fbsample.nnz); + for (index_type i = 0; i < fbsample.nbrows + 1; i++) + orig_rowptrs[i] = r[i]; + for (index_type i = 0; i < fbsample.nbnz; i++) orig_colinds[i] = c[i]; + for (index_type i = 0; i < fbsample.nnz; i++) orig_vals[i] = v[i]; } std::shared_ptr exec; + const gko::testing::FbcsrSample fbsample; std::unique_ptr mtx; gko::dim<2> orig_size; @@ -156,8 +120,6 @@ class Fbcsr : public ::testing::Test { } ASSERT_EQ(m->get_num_srow_elements(), 0); - // for(index_type is = 0; is < mtx->get_num_srow_elements(); is++) - // ASSERT_EQ(s[is], 0); } void assert_empty(const Mtx *m) @@ -175,6 +137,60 @@ class Fbcsr : public ::testing::Test { TYPED_TEST_CASE(Fbcsr, gko::test::ValueIndexTypes); +TYPED_TEST(Fbcsr, SampleGeneratorIsCorrect) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using Csr = gko::matrix::Csr; + using Dense = gko::matrix::Dense; + + std::unique_ptr fbmtx = this->fbsample.generate_fbcsr(); + std::unique_ptr csmtx = this->fbsample.generate_csr(); + std::unique_ptr dmtx = this->fbsample.generate_dense(); + const int bs = this->fbsample.bs; + ASSERT_EQ(bs, fbmtx->get_block_size()); + + gko::blockutils::DenseBlocksView fbvals( + fbmtx->get_const_values(), bs, bs); + + for (index_type ibrow = 0; ibrow < this->fbsample.nbrows; ibrow++) { + const index_type *const browptr = fbmtx->get_row_ptrs(); + const index_type numblocksbrow = browptr[ibrow + 1] - browptr[ibrow]; + for (index_type irow = ibrow * bs; irow < ibrow * bs + bs; irow++) { + const index_type rowstart = + browptr[ibrow] * bs * bs + + (irow - ibrow * bs) * numblocksbrow * bs; + ASSERT_EQ(csmtx->get_const_row_ptrs()[irow], rowstart); + } + + const index_type *const bcolinds = fbmtx->get_col_idxs(); + + for (index_type ibnz = browptr[ibrow]; ibnz < browptr[ibrow + 1]; + ibnz++) { + const index_type bcol = bcolinds[ibnz]; + const index_type blkoffset_frombrowstart = ibnz - browptr[ibrow]; + + for (int ib = 0; ib < bs; ib++) { + const index_type row = ibrow * bs + ib; + const index_type inz_rowstart = + csmtx->get_const_row_ptrs()[row] + + blkoffset_frombrowstart * bs; + + for (int jb = 0; jb < bs; jb++) { + const index_type col = bcol * bs + jb; + const index_type inz = inz_rowstart + jb; + ASSERT_EQ(col, csmtx->get_const_col_idxs()[inz]); + ASSERT_EQ(fbvals(ibnz, ib, jb), + csmtx->get_const_values()[inz]); + ASSERT_EQ(fbvals(ibnz, ib, jb), dmtx->at(row, col)); + } + } + } + } +} + + TYPED_TEST(Fbcsr, KnowsItsSize) { ASSERT_EQ(this->mtx->get_size(), gko::dim<2>(6, 12)); @@ -203,35 +219,16 @@ TYPED_TEST(Fbcsr, CanBeCreatedFromExistingData) using Mtx = typename TestFixture::Mtx; using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; + using size_type = gko::size_type; - constexpr int bs = 3; - constexpr index_type nbrows = 2; - constexpr index_type nbcols = 4; - constexpr index_type bnnz = 4; - value_type values[bnnz * bs * bs]; - index_type col_idxs[] = {1, 3, 0, 2}; - index_type row_ptrs[] = {0, 2, 4}; - - gko::blockutils::DenseBlocksView vals(values, bs, - bs); - - for (index_type ibrow = 0; ibrow < nbrows; ibrow++) { - for (index_type inz = row_ptrs[ibrow]; inz < row_ptrs[ibrow + 1]; - inz++) { - const index_type bcolind = col_idxs[inz]; - const value_type base = (ibrow + 1) * (bcolind + 1); - for (int ival = 0; ival < bs; ival++) - for (int jval = 0; jval < bs; jval++) - vals(inz, ival, jval) = - base + static_cast>( - ival * bs + jval); - } - } - - // Some of the entries are set to zero - vals(0, 2, 0) = gko::zero(); - vals(0, 2, 2) = gko::zero(); - vals(3, 0, 0) = gko::zero(); + const int bs = this->fbsample.bs; + const size_type nbrows = this->fbsample.nbrows; + const size_type nbcols = this->fbsample.nbcols; + const size_type bnnz = this->fbsample.nbnz; + std::unique_ptr refmat = this->fbsample.generate_fbcsr(); + value_type *const values = refmat->get_values(); + index_type *const col_idxs = refmat->get_col_idxs(); + index_type *const row_ptrs = refmat->get_row_ptrs(); auto mtx = gko::matrix::Fbcsr::create( this->exec, gko::dim<2>{nbrows * bs, nbcols * bs}, bs, @@ -244,7 +241,6 @@ TYPED_TEST(Fbcsr, CanBeCreatedFromExistingData) ASSERT_EQ(mtx->get_const_values(), values); ASSERT_EQ(mtx->get_const_col_idxs(), col_idxs); ASSERT_EQ(mtx->get_const_row_ptrs(), row_ptrs); - // ASSERT_EQ(mtx->get_const_srow()[0], 0); } @@ -293,27 +289,12 @@ TYPED_TEST(Fbcsr, CanBeCleared) TYPED_TEST(Fbcsr, CanBeReadFromMatrixData) { - // TODO (script:fbcsr): change the code imported from matrix/csr if needed using Mtx = typename TestFixture::Mtx; auto m = Mtx::create(this->exec, std::make_shared>()); - m->set_block_size(3); - - // Assuming row-major blocks - m->read( - {{6, 12}, {{0, 3, 2.0}, {0, 4, 3.0}, {0, 5, 4.0}, {1, 3, 5.0}, - {1, 4, 6.0}, {1, 5, 7.0}, {2, 4, 9.0}, + m->set_block_size(this->fbsample.bs); - {0, 9, 4.0}, {0, 10, 5.0}, {0, 11, 6.0}, {1, 9, 7.0}, - {1, 10, 8.0}, {1, 11, 9.0}, {2, 9, 10.0}, {2, 10, 11.0}, - {2, 11, 12.0}, - - {3, 0, 2.0}, {3, 1, 3.0}, {3, 2, 4.0}, {4, 0, 5.0}, - {4, 1, 6.0}, {4, 2, 7.0}, {5, 0, 8.0}, {5, 1, 9.0}, - {5, 2, 10.0}, - - {3, 7, 7.0}, {3, 8, 8.0}, {4, 6, 9.0}, {4, 7, 10.0}, - {4, 8, 11.0}, {5, 6, 12.0}, {5, 7, 13.0}, {5, 8, 14.0}}}); + m->read(this->fbsample.generate_matrix_data()); this->assert_equal_to_original_mtx(m.get()); } @@ -329,49 +310,14 @@ TYPED_TEST(Fbcsr, GeneratesCorrectMatrixData) this->mtx->write(data); data.ensure_row_major_order(); - ASSERT_EQ(data.size, gko::dim<2>(6, 12)); - ASSERT_EQ(data.nonzeros.size(), 36); - EXPECT_EQ(data.nonzeros[0], tpl(0, 3, value_type{2.0})); - EXPECT_EQ(data.nonzeros[1], tpl(0, 4, value_type{3.0})); - EXPECT_EQ(data.nonzeros[2], tpl(0, 5, value_type{4.0})); - EXPECT_EQ(data.nonzeros[3], tpl(0, 9, value_type{4.0})); - EXPECT_EQ(data.nonzeros[4], tpl(0, 10, value_type{5.0})); - EXPECT_EQ(data.nonzeros[5], tpl(0, 11, value_type{6.0})); - - EXPECT_EQ(data.nonzeros[6], tpl(1, 3, value_type{5.0})); - EXPECT_EQ(data.nonzeros[7], tpl(1, 4, value_type{6.0})); - EXPECT_EQ(data.nonzeros[8], tpl(1, 5, value_type{7.0})); - EXPECT_EQ(data.nonzeros[9], tpl(1, 9, value_type{7.0})); - EXPECT_EQ(data.nonzeros[10], tpl(1, 10, value_type{8.0})); - EXPECT_EQ(data.nonzeros[11], tpl(1, 11, value_type{9.0})); - - EXPECT_EQ(data.nonzeros[12], tpl(2, 3, value_type{0.0})); - EXPECT_EQ(data.nonzeros[13], tpl(2, 4, value_type{9.0})); - EXPECT_EQ(data.nonzeros[14], tpl(2, 5, value_type{0.0})); - EXPECT_EQ(data.nonzeros[15], tpl(2, 9, value_type{10.0})); - EXPECT_EQ(data.nonzeros[16], tpl(2, 10, value_type{11.0})); - EXPECT_EQ(data.nonzeros[17], tpl(2, 11, value_type{12.0})); - - EXPECT_EQ(data.nonzeros[18], tpl(3, 0, value_type{2.0})); - EXPECT_EQ(data.nonzeros[19], tpl(3, 1, value_type{3.0})); - EXPECT_EQ(data.nonzeros[20], tpl(3, 2, value_type{4.0})); - EXPECT_EQ(data.nonzeros[21], tpl(3, 6, value_type{0.0})); - EXPECT_EQ(data.nonzeros[22], tpl(3, 7, value_type{7.0})); - EXPECT_EQ(data.nonzeros[23], tpl(3, 8, value_type{8.0})); - - EXPECT_EQ(data.nonzeros[24], tpl(4, 0, value_type{5.0})); - EXPECT_EQ(data.nonzeros[25], tpl(4, 1, value_type{6.0})); - EXPECT_EQ(data.nonzeros[26], tpl(4, 2, value_type{7.0})); - EXPECT_EQ(data.nonzeros[27], tpl(4, 6, value_type{9.0})); - EXPECT_EQ(data.nonzeros[28], tpl(4, 7, value_type{10.0})); - EXPECT_EQ(data.nonzeros[29], tpl(4, 8, value_type{11.0})); - - EXPECT_EQ(data.nonzeros[30], tpl(5, 0, value_type{8.0})); - EXPECT_EQ(data.nonzeros[31], tpl(5, 1, value_type{9.0})); - EXPECT_EQ(data.nonzeros[32], tpl(5, 2, value_type{10.0})); - EXPECT_EQ(data.nonzeros[33], tpl(5, 6, value_type{12.0})); - EXPECT_EQ(data.nonzeros[34], tpl(5, 7, value_type{13.0})); - EXPECT_EQ(data.nonzeros[35], tpl(5, 8, value_type{14.0})); + gko::matrix_data refdata = + this->fbsample.generate_matrix_data_with_explicit_zeros(); + refdata.ensure_row_major_order(); + + ASSERT_EQ(data.size, refdata.size); + ASSERT_EQ(data.nonzeros.size(), refdata.nonzeros.size()); + for (size_t i = 0; i < data.nonzeros.size(); i++) + ASSERT_EQ(data.nonzeros[i], refdata.nonzeros[i]); } diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp new file mode 100644 index 00000000000..27944dd6873 --- /dev/null +++ b/core/test/matrix/fbcsr_sample.cpp @@ -0,0 +1,286 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#include + +#include "core/components/fixed_block.hpp" +#include "fbcsr_sample.hpp" + +namespace gko { +namespace testing { + + +namespace matstr = gko::matrix::matrix_strategy; + +template +FbcsrSample::FbcsrSample( + const std::shared_ptr rexec) + : nrows{6}, + ncols{12}, + nnz{36}, + nbrows{2}, + nbcols{4}, + nbnz{4}, + bs{3}, + exec(rexec) +{} + +template +std::unique_ptr> +FbcsrSample::generate_fbcsr() const +{ + std::unique_ptr mtx = + Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + nnz, bs, std::make_shared>()); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + index_type *const s = mtx->get_srow(); + r[0] = 0; + r[1] = 2; + r[2] = 4; + c[0] = 1; + c[1] = 3; + c[2] = 0; + c[3] = 2; + + gko::blockutils::DenseBlocksView vals(v, bs, bs); + + if (mtx->get_size()[0] % bs != 0) + throw gko::BadDimension(__FILE__, __LINE__, __func__, "test fbcsr", + mtx->get_size()[0], mtx->get_size()[1], + "block size does not divide the size!"); + + for (index_type ibrow = 0; ibrow < mtx->get_size()[0] / bs; ibrow++) { + const index_type *const browptr = mtx->get_row_ptrs(); + for (index_type inz = browptr[ibrow]; inz < browptr[ibrow + 1]; inz++) { + const index_type bcolind = mtx->get_col_idxs()[inz]; + const value_type base = (ibrow + 1) * (bcolind + 1); + for (int ival = 0; ival < bs; ival++) + for (int jval = 0; jval < bs; jval++) + vals(inz, ival, jval) = + base + static_cast>( + ival * bs + jval); + } + } + + // Some of the entries are set to zero + vals(0, 2, 0) = gko::zero(); + vals(0, 2, 2) = gko::zero(); + vals(3, 0, 0) = gko::zero(); + + for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) s[is] = 0; + + return mtx; +} + +template +std::unique_ptr> +FbcsrSample::generate_csr() const +{ + std::unique_ptr csrm = + Csr::create(exec, gko::dim<2>{nrows, ncols}, nnz, + std::make_shared()); + index_type *const csrrow = csrm->get_row_ptrs(); + index_type *const csrcols = csrm->get_col_idxs(); + value_type *const csrvals = csrm->get_values(); + + csrrow[0] = 0; + csrrow[1] = 6; + csrrow[2] = 12; + csrrow[3] = 18; + csrrow[4] = 24; + csrrow[5] = 30; + csrrow[6] = 36; + + csrcols[0] = 3; + csrcols[1] = 4; + csrcols[2] = 5; + csrcols[6] = 3; + csrcols[7] = 4; + csrcols[8] = 5; + csrcols[12] = 3; + csrcols[13] = 4; + csrcols[14] = 5; + + csrcols[3] = 9; + csrcols[4] = 10; + csrcols[5] = 11; + csrcols[9] = 9; + csrcols[10] = 10; + csrcols[11] = 11; + csrcols[15] = 9; + csrcols[16] = 10; + csrcols[17] = 11; + + csrcols[18] = 0; + csrcols[19] = 1; + csrcols[20] = 2; + csrcols[24] = 0; + csrcols[25] = 1; + csrcols[26] = 2; + csrcols[30] = 0; + csrcols[31] = 1; + csrcols[32] = 2; + + csrcols[21] = 6; + csrcols[22] = 7; + csrcols[23] = 8; + csrcols[27] = 6; + csrcols[28] = 7; + csrcols[29] = 8; + csrcols[33] = 6; + csrcols[34] = 7; + csrcols[35] = 8; + + // values + csrvals[0] = 2; + csrvals[1] = 3; + csrvals[2] = 4; + csrvals[6] = 5; + csrvals[7] = 6; + csrvals[8] = 7; + csrvals[12] = 0; + csrvals[13] = 9; + csrvals[14] = 0; + + csrvals[3] = 4; + csrvals[4] = 5; + csrvals[5] = 6; + csrvals[9] = 7; + csrvals[10] = 8; + csrvals[11] = 9; + csrvals[15] = 10; + csrvals[16] = 11; + csrvals[17] = 12; + + csrvals[18] = 2; + csrvals[19] = 3; + csrvals[20] = 4; + csrvals[24] = 5; + csrvals[25] = 6; + csrvals[26] = 7; + csrvals[30] = 8; + csrvals[31] = 9; + csrvals[32] = 10; + + csrvals[21] = 0; + csrvals[22] = 7; + csrvals[23] = 8; + csrvals[27] = 9; + csrvals[28] = 10; + csrvals[29] = 11; + csrvals[33] = 12; + csrvals[34] = 13; + csrvals[35] = 14; + + return csrm; +} + +template +std::unique_ptr> +FbcsrSample::generate_dense() const +{ + std::unique_ptr densem = + Dense::create(exec, gko::dim<2>(nrows, ncols)); + + for (size_type irow = 0; irow < densem->get_size()[0]; irow++) + for (size_type jcol = 0; jcol < densem->get_size()[1]; jcol++) { + densem->at(irow, jcol) = 0; + if (irow < 3 && jcol >= 3 && jcol < 6) + densem->at(irow, jcol) = 2.0 + irow * bs + jcol - 3; + if (irow < 3 && jcol >= 9) + densem->at(irow, jcol) = 4.0 + irow * bs + jcol - 9; + if (irow >= 3 && jcol < 3) + densem->at(irow, jcol) = 2.0 + (irow - 3) * bs + jcol; + if (irow >= 3 && jcol >= 6 && jcol < 9) + densem->at(irow, jcol) = 6.0 + (irow - 3) * bs + jcol - 6; + } + + densem->at(2, 3) = densem->at(2, 5) = densem->at(3, 6) = 0.0; + + return densem; +} + +// Assuming row-major blocks +template +gko::matrix_data +FbcsrSample::generate_matrix_data() const +{ + return MatData( + {{6, 12}, {{0, 3, 2.0}, {0, 4, 3.0}, {0, 5, 4.0}, {1, 3, 5.0}, + {1, 4, 6.0}, {1, 5, 7.0}, {2, 4, 9.0}, + + {0, 9, 4.0}, {0, 10, 5.0}, {0, 11, 6.0}, {1, 9, 7.0}, + {1, 10, 8.0}, {1, 11, 9.0}, {2, 9, 10.0}, {2, 10, 11.0}, + {2, 11, 12.0}, + + {3, 0, 2.0}, {3, 1, 3.0}, {3, 2, 4.0}, {4, 0, 5.0}, + {4, 1, 6.0}, {4, 2, 7.0}, {5, 0, 8.0}, {5, 1, 9.0}, + {5, 2, 10.0}, + + {3, 7, 7.0}, {3, 8, 8.0}, {4, 6, 9.0}, {4, 7, 10.0}, + {4, 8, 11.0}, {5, 6, 12.0}, {5, 7, 13.0}, {5, 8, 14.0}}}); +} + +// Assuming row-major blocks +template +gko::matrix_data FbcsrSample< + ValueType, IndexType>::generate_matrix_data_with_explicit_zeros() const +{ + return MatData({{6, 12}, {{0, 3, 2.0}, {0, 4, 3.0}, {0, 5, 4.0}, + {1, 3, 5.0}, {1, 4, 6.0}, {1, 5, 7.0}, + {2, 3, 0.0}, {2, 4, 9.0}, {2, 5, 0.0}, + + {0, 9, 4.0}, {0, 10, 5.0}, {0, 11, 6.0}, + {1, 9, 7.0}, {1, 10, 8.0}, {1, 11, 9.0}, + {2, 9, 10.0}, {2, 10, 11.0}, {2, 11, 12.0}, + + {3, 0, 2.0}, {3, 1, 3.0}, {3, 2, 4.0}, + {4, 0, 5.0}, {4, 1, 6.0}, {4, 2, 7.0}, + {5, 0, 8.0}, {5, 1, 9.0}, {5, 2, 10.0}, + + {3, 6, 0.0}, {3, 7, 7.0}, {3, 8, 8.0}, + {4, 6, 9.0}, {4, 7, 10.0}, {4, 8, 11.0}, + {5, 6, 12.0}, {5, 7, 13.0}, {5, 8, 14.0}}}); +} + +#define GKO_DECLARE_FBCSR_TEST_SAMPLE(ValueType, IndexType) \ + class FbcsrSample +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_TEST_SAMPLE); + +} // namespace testing +} // namespace gko diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp new file mode 100644 index 00000000000..d19037cddcd --- /dev/null +++ b/core/test/matrix/fbcsr_sample.hpp @@ -0,0 +1,83 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#ifndef GKO_CORE_MATRIX_TEST_FBCSR_SAMPLE_HPP +#define GKO_CORE_MATRIX_TEST_FBCSR_SAMPLE_HPP + +#include +#include +#include +#include + +namespace gko { +namespace testing { + + +/// Generates the same sample block CSR matrix in different formats +template +class FbcsrSample { +public: + using value_type = ValueType; + using index_type = IndexType; + using Fbcsr = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using Dense = gko::matrix::Dense; + using MatData = gko::matrix_data; + + FbcsrSample(std::shared_ptr exec); + + std::unique_ptr generate_fbcsr() const; + + /// Generates CSR matrix equal to the BSR matrix. Keeps explicit zeros. + std::unique_ptr generate_csr() const; + + std::unique_ptr generate_dense() const; + + MatData generate_matrix_data() const; + + MatData generate_matrix_data_with_explicit_zeros() const; + + const size_type nrows; + const size_type ncols; + const size_type nnz; + const size_type nbrows; + const size_type nbcols; + const size_type nbnz; + const int bs; + const std::shared_ptr exec; +}; + +} // namespace testing +} // namespace gko + +#endif diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index ac70964dfbc..bfbab84eabe 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -48,6 +48,9 @@ namespace matrix { template class Dense; +template +class Csr; + template class Coo; @@ -122,6 +125,7 @@ class Fbcsr : public EnableLinOp>, public EnableCreateMethod>, public ConvertibleTo, IndexType>>, public ConvertibleTo>, + public ConvertibleTo>, public ConvertibleTo>, // public ConvertibleTo>, // public ConvertibleTo>, @@ -193,6 +197,10 @@ class Fbcsr : public EnableLinOp>, void move_to(Dense *other) override; + void convert_to(Csr *result) const override; + + void move_to(Csr *result) override; + void convert_to(Coo *result) const override; void move_to(Coo *result) override; From e669230fea1c6c9aba264c231d47cd5078c29c2f Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 11 Nov 2020 16:03:00 +0100 Subject: [PATCH 04/58] [tests fail] added conversion to dense and csr on reference --- core/device_hooks/common_kernels.inc.cpp | 6 + core/matrix/fbcsr.cpp | 48 +- core/matrix/fbcsr_kernels.hpp | 8 + core/test/matrix/fbcsr_sample.cpp | 36 ++ core/test/matrix/fbcsr_sample.hpp | 9 + cuda/matrix/fbcsr_kernels.cu | 11 + hip/matrix/fbcsr_kernels.hip.cpp | 11 + omp/matrix/fbcsr_kernels.cpp | 11 + reference/matrix/fbcsr_kernels.cpp | 135 ++++-- reference/test/matrix/CMakeLists.txt | 2 +- reference/test/matrix/fbcsr_kernels.cpp | 532 ++++++++--------------- 11 files changed, 389 insertions(+), 420 deletions(-) diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 43f4d90d8c0..f66c749fd73 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -841,6 +841,12 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); +template +GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + template GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 1ef12627602..0d00e70bcc5 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -67,6 +67,7 @@ GKO_REGISTER_OPERATION(spgemm, fbcsr::spgemm); // GKO_REGISTER_OPERATION(advanced_spgemm, fbcsr::advanced_spgemm); GKO_REGISTER_OPERATION(spgeam, fbcsr::spgeam); GKO_REGISTER_OPERATION(convert_to_coo, fbcsr::convert_to_coo); +GKO_REGISTER_OPERATION(convert_to_csr, fbcsr::convert_to_csr); GKO_REGISTER_OPERATION(convert_to_dense, fbcsr::convert_to_dense); // GKO_REGISTER_OPERATION(convert_to_sellp, fbcsr::convert_to_sellp); GKO_REGISTER_OPERATION(calculate_total_cols, fbcsr::calculate_total_cols); @@ -225,44 +226,39 @@ void Fbcsr::move_to(Coo *result) template void Fbcsr::convert_to(Dense *result) const - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto exec = this->get_executor(); -// auto tmp = Dense::create(exec, this->get_size()); -// exec->run(fbcsr::make_convert_to_dense(this, tmp.get())); -// tmp->move_to(result); -//} +{ + auto exec = this->get_executor(); + auto tmp = Dense::create(exec, this->get_size()); + exec->run(fbcsr::make_convert_to_dense(this, tmp.get())); + tmp->move_to(result); +} template void Fbcsr::move_to(Dense *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// this->convert_to(result); -//} +{ + this->convert_to(result); +} template void Fbcsr::convert_to( - Csr *result) const GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto exec = this->get_executor(); -// auto tmp = Dense::create(exec, this->get_size()); -// exec->run(fbcsr::make_convert_to_csr(this, tmp.get())); -// tmp->move_to(result); -//} + Csr *result) const +{ + auto exec = this->get_executor(); + auto tmp = Csr::create( + exec, this->get_size(), this->get_num_stored_elements(), + result->get_strategy()); + exec->run(fbcsr::make_convert_to_csr(this, tmp.get())); + tmp->move_to(result); +} template void Fbcsr::move_to(Csr *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// this->convert_to(result); -//} +{ + this->convert_to(result); +} // template diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp index a57b2af3c82..6fa130b11c9 100644 --- a/core/matrix/fbcsr_kernels.hpp +++ b/core/matrix/fbcsr_kernels.hpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -93,6 +94,11 @@ namespace kernels { const matrix::Fbcsr *source, \ matrix::Dense *result) +#define GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) \ + void convert_to_csr(std::shared_ptr exec, \ + const matrix::Fbcsr *source, \ + matrix::Csr *result) + #define GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL(ValueType, IndexType) \ void convert_to_coo(std::shared_ptr exec, \ const matrix::Fbcsr *source, \ @@ -196,6 +202,8 @@ namespace kernels { template \ GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType); \ template \ + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL(ValueType, IndexType); \ + template \ GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL(ValueType, IndexType); \ diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index 27944dd6873..a221cd3381e 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -33,6 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + #include "core/components/fixed_block.hpp" #include "fbcsr_sample.hpp" @@ -278,6 +280,40 @@ gko::matrix_data FbcsrSample< {5, 6, 12.0}, {5, 7, 13.0}, {5, 8, 14.0}}}); } +template +std::unique_ptr> +FbcsrSample::generate_coo() const +{ + gko::matrix_data mdata = + generate_matrix_data_with_explicit_zeros(); + + using nztype = + typename gko::matrix_data::nonzero_type; + std::sort(mdata.nonzeros.begin(), mdata.nonzeros.end(), + [](const nztype &a, const nztype &b) { + if (a.row < b.row) + return true; + else if (a.row > b.row) + return false; + else if (a.column < b.column) + return true; + else + return false; + }); + + gko::Array rowidx(exec, nnz); + gko::Array colidx(exec, nnz); + gko::Array values(exec, nnz); + for (size_t i = 0; i < mdata.nonzeros.size(); i++) { + rowidx.get_data()[i] = mdata.nonzeros[i].row; + colidx.get_data()[i] = mdata.nonzeros[i].column; + values.get_data()[i] = mdata.nonzeros[i].value; + } + auto mat = + Coo::create(exec, gko::dim<2>{nrows, ncols}, values, colidx, rowidx); + return mat; +} + #define GKO_DECLARE_FBCSR_TEST_SAMPLE(ValueType, IndexType) \ class FbcsrSample GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_TEST_SAMPLE); diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index d19037cddcd..7fe2fd3cb32 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_MATRIX_TEST_FBCSR_SAMPLE_HPP #include +#include #include #include #include @@ -44,6 +45,8 @@ namespace testing { /// Generates the same sample block CSR matrix in different formats +/** This currently a 6 x 12 matrix with 3x3 blocks. + */ template class FbcsrSample { public: @@ -51,6 +54,7 @@ class FbcsrSample { using index_type = IndexType; using Fbcsr = gko::matrix::Fbcsr; using Csr = gko::matrix::Csr; + using Coo = gko::matrix::Coo; using Dense = gko::matrix::Dense; using MatData = gko::matrix_data; @@ -63,6 +67,11 @@ class FbcsrSample { std::unique_ptr generate_dense() const; + /// Returns the matrix in COO format keeping explicit nonzeros + /** The nonzeros are sorted by row and column. + */ + std::unique_ptr generate_coo() const; + MatData generate_matrix_data() const; MatData generate_matrix_data_with_explicit_zeros() const; diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu index 5d67bcc90d3..968399ff8ed 100644 --- a/cuda/matrix/fbcsr_kernels.cu +++ b/cuda/matrix/fbcsr_kernels.cu @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -928,6 +929,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); +template +void convert_to_csr(const std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Csr *const result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + + template void convert_to_sellp(std::shared_ptr exec, const matrix::Fbcsr *source, diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp index 5263e535e7a..da425238ab0 100644 --- a/hip/matrix/fbcsr_kernels.hip.cpp +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -43,6 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -777,6 +778,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); +template +void convert_to_csr(const std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Csr *const result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + + template void convert_to_sellp(std::shared_ptr exec, const matrix::Fbcsr *source, diff --git a/omp/matrix/fbcsr_kernels.cpp b/omp/matrix/fbcsr_kernels.cpp index 3ad97e0e6bc..d30d8cd3b16 100644 --- a/omp/matrix/fbcsr_kernels.cpp +++ b/omp/matrix/fbcsr_kernels.cpp @@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include @@ -430,6 +431,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); +template +void convert_to_csr(const std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Csr *const result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + + template void convert_to_sellp(std::shared_ptr exec, const matrix::Fbcsr *source, diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index af0f19a8947..fe6bac8efd3 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -51,6 +51,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/allocator.hpp" #include "core/base/iterator_factory.hpp" +#include "core/components/fixed_block.hpp" #include "core/components/prefix_sum.hpp" #include "core/matrix/fbcsr_builder.hpp" #include "reference/components/fbcsr_spgeam.hpp" @@ -367,14 +368,13 @@ void spgeam(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEAM_KERNEL); -template -void convert_row_ptrs_to_idxs(std::shared_ptr exec, - const IndexType *ptrs, size_type num_rows, - IndexType *idxs) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// template +// void convert_row_ptrs_to_idxs(std::shared_ptr exec, +// const IndexType *ptrs, size_type num_rows, +// IndexType *idxs) +// { // convert_ptrs_to_idxs(ptrs, num_rows, idxs); -//} +// } template @@ -382,46 +382,107 @@ void convert_to_coo(std::shared_ptr exec, const matrix::Fbcsr *source, matrix::Coo *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = result->get_size()[0]; -// -// auto row_idxs = result->get_row_idxs(); -// const auto source_row_ptrs = source->get_const_row_ptrs(); -// -// convert_row_ptrs_to_idxs(exec, source_row_ptrs, num_rows, row_idxs); -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL); template -void convert_to_dense(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Dense *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = source->get_size()[0]; -// auto num_cols = source->get_size()[1]; -// auto row_ptrs = source->get_const_row_ptrs(); -// auto col_idxs = source->get_const_col_idxs(); -// auto vals = source->get_const_values(); -// -// for (size_type row = 0; row < num_rows; ++row) { -// for (size_type col = 0; col < num_cols; ++col) { -// result->at(row, col) = zero(); -// } -// for (size_type i = row_ptrs[row]; -// i < static_cast(row_ptrs[row + 1]); ++i) { -// result->at(row, col_idxs[i]) = vals[i]; -// } -// } -//} +void convert_to_dense(const std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Dense *const result) +{ + const int bs = source->get_block_size(); + const size_type nbrows = + gko::blockutils::getNumFixedBlocks(bs, source->get_size()[0]); + const size_type nbcols = + gko::blockutils::getNumFixedBlocks(bs, source->get_size()[1]); + const IndexType *const row_ptrs = source->get_const_row_ptrs(); + const IndexType *const col_idxs = source->get_const_col_idxs(); + const ValueType *const vals = source->get_const_values(); + + const gko::blockutils::DenseBlocksView values( + vals, bs, bs); + + for (size_type brow = 0; brow < nbrows; ++brow) { + for (size_type bcol = 0; bcol < nbcols; ++bcol) { + for (int ib = 0; ib < bs; ib++) + for (int jb = 0; jb < bs; jb++) + result->at(brow * bs + ib, bcol * bs + jb) = + zero(); + } + for (IndexType ibnz = row_ptrs[brow]; ibnz < row_ptrs[brow + 1]; + ++ibnz) { + for (int ib = 0; ib < bs; ib++) { + const IndexType row = brow * bs + ib; + for (int jb = 0; jb < bs; jb++) + result->at(row, col_idxs[ibnz] * bs + jb) = + values(ibnz, ib, jb); + } + } + } +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); +template +void convert_to_csr(const std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Csr *const result) +{ + const int bs = source->get_block_size(); + const size_type nbrows = + gko::blockutils::getNumFixedBlocks(bs, source->get_size()[0]); + const size_type nbcols = + gko::blockutils::getNumFixedBlocks(bs, source->get_size()[1]); + const IndexType *const browptrs = source->get_const_row_ptrs(); + const IndexType *const bcolinds = source->get_const_col_idxs(); + const ValueType *const bvals = source->get_const_values(); + + assert(nbrows * bs == result->get_size()[0]); + assert(nbcols * bs == result->get_size()[1]); + assert(source->get_num_stored_elements() == + result->get_num_stored_elements()); + + IndexType *const row_ptrs = result->get_row_ptrs(); + IndexType *const col_idxs = result->get_col_idxs(); + ValueType *const vals = result->get_values(); + + const gko::blockutils::DenseBlocksView bvalues( + bvals, bs, bs); + + for (size_type brow = 0; brow < nbrows; ++brow) { + const IndexType nz_browstart = browptrs[brow] * bs * bs; + const IndexType numblocks_brow = browptrs[brow + 1] - browptrs[brow]; + for (int ib = 0; ib < bs; ib++) + row_ptrs[brow * bs + ib] = nz_browstart + numblocks_brow * bs * ib; + + for (IndexType ibnz = browptrs[brow]; ibnz < browptrs[brow + 1]; + ++ibnz) { + const IndexType bcol = bcolinds[ibnz]; + + for (int ib = 0; ib < bs; ib++) { + const IndexType row = brow * bs + ib; + const IndexType inz_blockstart = + row_ptrs[row] + (ibnz - browptrs[brow]) * bs; + + for (int jb = 0; jb < bs; jb++) { + const IndexType inz = inz_blockstart + jb; + vals[inz] = bvalues(ibnz, ib, jb); + col_idxs[inz] = bcol * bs + jb; + } + } + } + } + + row_ptrs[source->get_size()[0]] = source->get_num_stored_elements(); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + + template void convert_to_sellp(std::shared_ptr exec, const matrix::Fbcsr *source, diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt index c826a9eaca7..cb4d2b0217a 100644 --- a/reference/test/matrix/CMakeLists.txt +++ b/reference/test/matrix/CMakeLists.txt @@ -3,7 +3,7 @@ ginkgo_create_test(csr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) -ginkgo_create_test(fbcsr_kernels) +ginkgo_create_test(fbcsr_kernels test_fbcsr_sample) ginkgo_create_test(hybrid_kernels) ginkgo_create_test(identity) ginkgo_create_test(permutation) diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index b5fd2162da5..8fb53f4e3b2 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -55,6 +55,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/fbcsr_kernels.hpp" +#include "core/test/matrix/fbcsr_sample.hpp" #include "core/test/utils.hpp" @@ -74,8 +75,10 @@ class Fbcsr : public ::testing::Test { typename std::tuple_element<0, decltype(ValueIndexType())>::type; using index_type = typename std::tuple_element<1, decltype(ValueIndexType())>::type; - using Coo = gko::matrix::Coo; using Mtx = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using Coo = gko::matrix::Coo; + using Dense = gko::matrix::Dense; using Sellp = gko::matrix::Sellp; using SparsityCsr = gko::matrix::SparsityCsr; using Ell = gko::matrix::Ell; @@ -84,174 +87,63 @@ class Fbcsr : public ::testing::Test { Fbcsr() : exec(gko::ReferenceExecutor::create()), - mtx(Mtx::create(exec, gko::dim<2>{2, 3}, 4, mat_bs, - std::make_shared>(2))), - mtx2(Mtx::create(exec, gko::dim<2>{2, 3}, 5, mat_bs, - std::make_shared>())), - mtx3_sorted(Mtx::create(exec, gko::dim<2>(3, 3), 7, mat_bs, - std::make_shared>())), - mtx3_unsorted(Mtx::create(exec, gko::dim<2>(3, 3), 7, mat_bs, - std::make_shared>())) - { - this->create_mtx(mtx.get()); - this->create_mtx2(mtx2.get()); - this->create_mtx3(mtx3_sorted.get(), mtx3_unsorted.get()); - } - - void create_mtx(Mtx *m) + fbsample(exec), + mtx(fbsample.generate_fbcsr()), + refmtx(fbsample.generate_fbcsr()), + refcsrmtx(fbsample.generate_csr()), + refdenmtx(fbsample.generate_dense()), + refcoomtx(fbsample.generate_coo()) + {} + + // void create_mtx3(Mtx *sorted, Mtx *unsorted) + // { + // /* For both versions (sorted and unsorted), this matrix is stored: + // * 0 2 1 + // * 3 1 8 + // * 2 0 3 + // * The unsorted matrix will have the (value, column) pair per row not + // * sorted, which we still consider a valid FBCSR format. + // */ + // } + + void assert_equal_to_mtx(const Csr *const m) { - value_type *v = m->get_values(); - index_type *c = m->get_col_idxs(); - index_type *r = m->get_row_ptrs(); - auto *s = m->get_srow(); - /* - * 1 3 2 - * 0 5 0 - */ - r[0] = 0; - r[1] = 3; - r[2] = 4; - c[0] = 0; - c[1] = 1; - c[2] = 2; - c[3] = 1; - v[0] = 1.0; - v[1] = 3.0; - v[2] = 2.0; - v[3] = 5.0; - s[0] = 0; + ASSERT_EQ(m->get_size(), refcsrmtx->get_size()); + ASSERT_EQ(m->get_num_stored_elements(), + refcsrmtx->get_num_stored_elements()); + for (index_type i = 0; i < m->get_size()[0] + 1; i++) + ASSERT_EQ(m->get_const_row_ptrs()[i], + refcsrmtx->get_const_row_ptrs()[i]); + for (index_type i = 0; i < m->get_num_stored_elements(); i++) { + ASSERT_EQ(m->get_const_col_idxs()[i], + refcsrmtx->get_const_col_idxs()[i]); + ASSERT_EQ(m->get_const_values()[i], + refcsrmtx->get_const_values()[i]); + } } - void create_mtx2(Mtx *m) - { - value_type *v = m->get_values(); - index_type *c = m->get_col_idxs(); - index_type *r = m->get_row_ptrs(); - // It keeps an explict zero - /* - * 1 3 2 - * {0} 5 0 - */ - r[0] = 0; - r[1] = 3; - r[2] = 5; - c[0] = 0; - c[1] = 1; - c[2] = 2; - c[3] = 0; - c[4] = 1; - v[0] = 1.0; - v[1] = 3.0; - v[2] = 2.0; - v[3] = 0.0; - v[4] = 5.0; - } - - void create_mtx3(Mtx *sorted, Mtx *unsorted) - { - auto vals_s = sorted->get_values(); - auto cols_s = sorted->get_col_idxs(); - auto rows_s = sorted->get_row_ptrs(); - auto vals_u = unsorted->get_values(); - auto cols_u = unsorted->get_col_idxs(); - auto rows_u = unsorted->get_row_ptrs(); - /* For both versions (sorted and unsorted), this matrix is stored: - * 0 2 1 - * 3 1 8 - * 2 0 3 - * The unsorted matrix will have the (value, column) pair per row not - * sorted, which we still consider a valid FBCSR format. - */ - rows_s[0] = 0; - rows_s[1] = 2; - rows_s[2] = 5; - rows_s[3] = 7; - rows_u[0] = 0; - rows_u[1] = 2; - rows_u[2] = 5; - rows_u[3] = 7; - - vals_s[0] = 2.; - vals_s[1] = 1.; - vals_s[2] = 3.; - vals_s[3] = 1.; - vals_s[4] = 8.; - vals_s[5] = 2.; - vals_s[6] = 3.; - // Each row is stored rotated once to the left - vals_u[0] = 1.; - vals_u[1] = 2.; - vals_u[2] = 1.; - vals_u[3] = 8.; - vals_u[4] = 3.; - vals_u[5] = 3.; - vals_u[6] = 2.; - - cols_s[0] = 1; - cols_s[1] = 2; - cols_s[2] = 0; - cols_s[3] = 1; - cols_s[4] = 2; - cols_s[5] = 0; - cols_s[6] = 2; - // The same applies for the columns - cols_u[0] = 2; - cols_u[1] = 1; - cols_u[2] = 1; - cols_u[3] = 2; - cols_u[4] = 0; - cols_u[5] = 2; - cols_u[6] = 0; - } + // void assert_equal_to_mtx(const Dense *const m) + // { + // ASSERT_EQ(m->get_size(), refdenmtx->get_size()); + // ASSERT_EQ(m->get_num_stored_elements(), + // refdenmtx->get_num_stored_elements()); for(index_type i = 0; i < + // m->get_size()[0]; i++) + // for(index_type j = 0; j < m->get_size()[1]; j++) + // ASSERT_EQ(m->at(i,j), refdenmtx->at(i,j)); + // } void assert_equal_to_mtx(const Coo *m) { - auto v = m->get_const_values(); - auto c = m->get_const_col_idxs(); - auto r = m->get_const_row_idxs(); - - ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); - ASSERT_EQ(m->get_num_stored_elements(), 4); - EXPECT_EQ(r[0], 0); - EXPECT_EQ(r[1], 0); - EXPECT_EQ(r[2], 0); - EXPECT_EQ(r[3], 1); - EXPECT_EQ(c[0], 0); - EXPECT_EQ(c[1], 1); - EXPECT_EQ(c[2], 2); - EXPECT_EQ(c[3], 1); - EXPECT_EQ(v[0], value_type{1.0}); - EXPECT_EQ(v[1], value_type{3.0}); - EXPECT_EQ(v[2], value_type{2.0}); - EXPECT_EQ(v[3], value_type{5.0}); - } - - void assert_equal_to_mtx(const Sellp *m) - { - auto v = m->get_const_values(); - auto c = m->get_const_col_idxs(); - auto slice_sets = m->get_const_slice_sets(); - auto slice_lengths = m->get_const_slice_lengths(); - auto stride_factor = m->get_stride_factor(); - auto slice_size = m->get_slice_size(); - - ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); - ASSERT_EQ(stride_factor, 1); - ASSERT_EQ(slice_size, 64); - EXPECT_EQ(slice_sets[0], 0); - EXPECT_EQ(slice_lengths[0], 3); - EXPECT_EQ(c[0], 0); - EXPECT_EQ(c[1], 1); - EXPECT_EQ(c[64], 1); - EXPECT_EQ(c[65], 0); - EXPECT_EQ(c[128], 2); - EXPECT_EQ(c[129], 0); - EXPECT_EQ(v[0], value_type{1.0}); - EXPECT_EQ(v[1], value_type{5.0}); - EXPECT_EQ(v[64], value_type{3.0}); - EXPECT_EQ(v[65], value_type{0.0}); - EXPECT_EQ(v[128], value_type{2.0}); - EXPECT_EQ(v[129], value_type{0.0}); + ASSERT_EQ(m->get_size(), refcoomtx->get_size()); + ASSERT_EQ(m->get_num_stored_elements(), + refcoomtx->get_num_stored_elements()); + for (index_type i = 0; i < m->get_num_stored_elements(); i++) { + ASSERT_EQ(m->get_const_row_idxs()[i], + refcoomtx->get_const_row_idxs[i]); + ASSERT_EQ(m->get_const_col_idxs()[i], + refcoomtx->get_const_col_idxs[i]); + ASSERT_EQ(m->get_const_values()[i], refcoomtx->get_const_values[i]); + } } void assert_equal_to_mtx(const SparsityCsr *m) @@ -270,89 +162,13 @@ class Fbcsr : public ::testing::Test { EXPECT_EQ(c[3], 1); } - void assert_equal_to_mtx(const Ell *m) - { - auto v = m->get_const_values(); - auto c = m->get_const_col_idxs(); - - ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); - ASSERT_EQ(m->get_num_stored_elements(), 6); - EXPECT_EQ(c[0], 0); - EXPECT_EQ(c[1], 1); - EXPECT_EQ(c[2], 1); - EXPECT_EQ(c[3], 0); - EXPECT_EQ(c[4], 2); - EXPECT_EQ(c[5], 0); - EXPECT_EQ(v[0], value_type{1.0}); - EXPECT_EQ(v[1], value_type{5.0}); - EXPECT_EQ(v[2], value_type{3.0}); - EXPECT_EQ(v[3], value_type{0.0}); - EXPECT_EQ(v[4], value_type{2.0}); - EXPECT_EQ(v[5], value_type{0.0}); - } - - void assert_equal_to_mtx(const Hybrid *m) - { - auto v = m->get_const_coo_values(); - auto c = m->get_const_coo_col_idxs(); - auto r = m->get_const_coo_row_idxs(); - auto n = m->get_ell_num_stored_elements_per_row(); - auto p = m->get_ell_stride(); - - ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); - ASSERT_EQ(m->get_ell_num_stored_elements(), 0); - ASSERT_EQ(m->get_coo_num_stored_elements(), 4); - EXPECT_EQ(n, 0); - EXPECT_EQ(p, 2); - EXPECT_EQ(r[0], 0); - EXPECT_EQ(r[1], 0); - EXPECT_EQ(r[2], 0); - EXPECT_EQ(r[3], 1); - EXPECT_EQ(c[0], 0); - EXPECT_EQ(c[1], 1); - EXPECT_EQ(c[2], 2); - EXPECT_EQ(c[3], 1); - EXPECT_EQ(v[0], value_type{1.0}); - EXPECT_EQ(v[1], value_type{3.0}); - EXPECT_EQ(v[2], value_type{2.0}); - EXPECT_EQ(v[3], value_type{5.0}); - } - - void assert_equal_to_mtx2(const Hybrid *m) - { - auto v = m->get_const_coo_values(); - auto c = m->get_const_coo_col_idxs(); - auto r = m->get_const_coo_row_idxs(); - auto n = m->get_ell_num_stored_elements_per_row(); - auto p = m->get_ell_stride(); - auto ell_v = m->get_const_ell_values(); - auto ell_c = m->get_const_ell_col_idxs(); - - ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); - // Test Coo values - ASSERT_EQ(m->get_coo_num_stored_elements(), 1); - EXPECT_EQ(r[0], 0); - EXPECT_EQ(c[0], 2); - EXPECT_EQ(v[0], value_type{2.0}); - // Test Ell values - ASSERT_EQ(m->get_ell_num_stored_elements(), 4); - EXPECT_EQ(n, 2); - EXPECT_EQ(p, 2); - EXPECT_EQ(ell_v[0], value_type{1}); - EXPECT_EQ(ell_v[1], value_type{0}); - EXPECT_EQ(ell_v[2], value_type{3}); - EXPECT_EQ(ell_v[3], value_type{5}); - EXPECT_EQ(ell_c[0], 0); - EXPECT_EQ(ell_c[1], 0); - EXPECT_EQ(ell_c[2], 1); - EXPECT_EQ(ell_c[3], 1); - } - std::shared_ptr exec; + const gko::testing::FbcsrSample fbsample; std::unique_ptr mtx; - std::unique_ptr mtx2; - std::unique_ptr mtx3_sorted; - std::unique_ptr mtx3_unsorted; + const std::unique_ptr refmtx; + const std::unique_ptr refcsrmtx; + const std::unique_ptr refdenmtx; + const std::unique_ptr refcoomtx; }; TYPED_TEST_CASE(Fbcsr, gko::test::ValueIndexTypes); @@ -627,63 +443,75 @@ GKO_NOT_IMPLEMENTED; TYPED_TEST(Fbcsr, ConvertsToDense) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Dense = typename TestFixture::Vec; -// auto dense_mtx = Dense::create(this->mtx->get_executor()); -// auto dense_other = gko::initialize( -// 4, {{1.0, 3.0, 2.0}, {0.0, 5.0, 0.0}}, this->exec); -// -// this->mtx->convert_to(dense_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(dense_mtx, dense_other, 0.0); -//} +{ + using Dense = typename TestFixture::Dense; + auto dense_mtx = Dense::create(this->mtx->get_executor()); + + this->mtx->convert_to(dense_mtx.get()); + + GKO_ASSERT_MTX_NEAR(dense_mtx, this->refdenmtx, 0.0); +} TYPED_TEST(Fbcsr, MovesToDense) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Dense = typename TestFixture::Vec; -// auto dense_mtx = Dense::create(this->mtx->get_executor()); -// auto dense_other = gko::initialize( -// 4, {{1.0, 3.0, 2.0}, {0.0, 5.0, 0.0}}, this->exec); -// -// this->mtx->move_to(dense_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(dense_mtx, dense_other, 0.0); -//} +{ + using Dense = typename TestFixture::Dense; + auto dense_mtx = Dense::create(this->mtx->get_executor()); + this->mtx->move_to(dense_mtx.get()); -TYPED_TEST(Fbcsr, ConvertsToCoo) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed + GKO_ASSERT_MTX_NEAR(dense_mtx, this->refdenmtx, 0.0); +} + + +TYPED_TEST(Fbcsr, ConvertsToCsr) +{ + using Csr = typename TestFixture::Csr; + auto csr_mtx = Csr::create(this->mtx->get_executor(), + std::make_shared()); + + this->mtx->convert_to(csr_mtx.get()); + + this->assert_equal_to_mtx(csr_mtx.get()); +} + + +TYPED_TEST(Fbcsr, MovesToCsr) +{ + using Csr = typename TestFixture::Csr; + auto csr_mtx = Csr::create(this->mtx->get_executor(), + std::make_shared()); + + this->mtx->move_to(csr_mtx.get()); + + this->assert_equal_to_mtx(csr_mtx.get()); +} + + +// TYPED_TEST(Fbcsr, ConvertsToCoo) +// { // using Coo = typename TestFixture::Coo; // auto coo_mtx = Coo::create(this->mtx->get_executor()); -// + // this->mtx->convert_to(coo_mtx.get()); -// + // this->assert_equal_to_mtx(coo_mtx.get()); -//} +// } -TYPED_TEST(Fbcsr, MovesToCoo) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// TYPED_TEST(Fbcsr, MovesToCoo) +// { // using Coo = typename TestFixture::Coo; // auto coo_mtx = Coo::create(this->mtx->get_executor()); -// + // this->mtx->move_to(coo_mtx.get()); -// + // this->assert_equal_to_mtx(coo_mtx.get()); -//} +// } -TYPED_TEST(Fbcsr, ConvertsToSellp) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, ConvertsToSellp) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Sellp = typename TestFixture::Sellp; @@ -695,8 +523,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, MovesToSellp) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, MovesToSellp) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Sellp = typename TestFixture::Sellp; @@ -711,12 +539,12 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, ConvertsToSparsityFbcsr) +TYPED_TEST(Fbcsr, ConvertsToSparsityCsr) GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using SparsityFbcsr = typename TestFixture::SparsityFbcsr; -// auto sparsity_mtx = SparsityFbcsr::create(this->mtx->get_executor()); +// auto sparsity_mtx = SparsityCsr::create(this->mtx->get_executor()); // // this->mtx->convert_to(sparsity_mtx.get()); // @@ -724,13 +552,13 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, MovesToSparsityFbcsr) +TYPED_TEST(Fbcsr, MovesToSparsityCsr) GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using SparsityFbcsr = typename TestFixture::SparsityFbcsr; // using Fbcsr = typename TestFixture::Mtx; -// auto sparsity_mtx = SparsityFbcsr::create(this->mtx->get_executor()); +// auto sparsity_mtx = SparsityCsr::create(this->mtx->get_executor()); // auto fbcsr_ref = Fbcsr::create(this->mtx->get_executor()); // // fbcsr_ref->copy_from(this->mtx.get()); @@ -740,8 +568,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, ConvertsToHybridAutomatically) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, ConvertsToHybridAutomatically) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Hybrid = typename TestFixture::Hybrid; @@ -753,8 +581,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, MovesToHybridAutomatically) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, MovesToHybridAutomatically) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Hybrid = typename TestFixture::Hybrid; @@ -769,8 +597,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, ConvertsToHybridByColumn2) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, ConvertsToHybridByColumn2) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Hybrid = typename TestFixture::Hybrid; @@ -784,8 +612,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, MovesToHybridByColumn2) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, MovesToHybridByColumn2) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Hybrid = typename TestFixture::Hybrid; @@ -845,75 +673,67 @@ GKO_NOT_IMPLEMENTED; TYPED_TEST(Fbcsr, ConvertsEmptyToDense) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using Fbcsr = typename TestFixture::Mtx; -// using Dense = gko::matrix::Dense; -// auto empty = Fbcsr::create(this->exec); -// auto res = Dense::create(this->exec); -// -// empty->convert_to(res.get()); -// -// ASSERT_FALSE(res->get_size()); -//} +{ + using ValueType = typename TestFixture::value_type; + using Fbcsr = typename TestFixture::Mtx; + using Dense = typename TestFixture::Dense; + auto empty = Fbcsr::create(this->exec); + auto res = Dense::create(this->exec); + + empty->convert_to(res.get()); + + ASSERT_FALSE(res->get_size()); +} TYPED_TEST(Fbcsr, MovesEmptyToDense) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using Fbcsr = typename TestFixture::Mtx; -// using Dense = gko::matrix::Dense; -// auto empty = Fbcsr::create(this->exec); -// auto res = Dense::create(this->exec); -// -// empty->move_to(res.get()); -// -// ASSERT_FALSE(res->get_size()); -//} +{ + using ValueType = typename TestFixture::value_type; + using Fbcsr = typename TestFixture::Mtx; + using Dense = typename TestFixture::Dense; + auto empty = Fbcsr::create(this->exec); + auto res = Dense::create(this->exec); + empty->move_to(res.get()); -TYPED_TEST(Fbcsr, ConvertsEmptyToCoo) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed + ASSERT_FALSE(res->get_size()); +} + + +// TYPED_TEST(Fbcsr, ConvertsEmptyToCoo) +// { // using ValueType = typename TestFixture::value_type; // using IndexType = typename TestFixture::index_type; // using Fbcsr = typename TestFixture::Mtx; // using Coo = gko::matrix::Coo; // auto empty = Fbcsr::create(this->exec); // auto res = Coo::create(this->exec); -// + // empty->convert_to(res.get()); -// + // ASSERT_EQ(res->get_num_stored_elements(), 0); // ASSERT_FALSE(res->get_size()); -//} +// } -TYPED_TEST(Fbcsr, MovesEmptyToCoo) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed +// TYPED_TEST(Fbcsr, MovesEmptyToCoo) +// { // using ValueType = typename TestFixture::value_type; // using IndexType = typename TestFixture::index_type; // using Fbcsr = typename TestFixture::Mtx; // using Coo = gko::matrix::Coo; // auto empty = Fbcsr::create(this->exec); // auto res = Coo::create(this->exec); -// + // empty->move_to(res.get()); -// + // ASSERT_EQ(res->get_num_stored_elements(), 0); // ASSERT_FALSE(res->get_size()); -//} +// } -TYPED_TEST(Fbcsr, ConvertsEmptyToEll) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, ConvertsEmptyToEll) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using ValueType = typename TestFixture::value_type; @@ -930,8 +750,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, MovesEmptyToEll) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, MovesEmptyToEll) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using ValueType = typename TestFixture::value_type; @@ -948,8 +768,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, ConvertsEmptyToSellp) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, ConvertsEmptyToSellp) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using ValueType = typename TestFixture::value_type; @@ -967,8 +787,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, MovesEmptyToSellp) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, MovesEmptyToSellp) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using ValueType = typename TestFixture::value_type; @@ -986,7 +806,7 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, ConvertsEmptyToSparsityFbcsr) +TYPED_TEST(Fbcsr, ConvertsEmptyToSparsityCsr) GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed @@ -1005,7 +825,7 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, MovesEmptyToSparsityFbcsr) +TYPED_TEST(Fbcsr, MovesEmptyToSparsityCsr) GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed @@ -1024,8 +844,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, ConvertsEmptyToHybrid) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, ConvertsEmptyToHybrid) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using ValueType = typename TestFixture::value_type; @@ -1042,8 +862,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, MovesEmptyToHybrid) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, MovesEmptyToHybrid) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using ValueType = typename TestFixture::value_type; @@ -1090,8 +910,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, ConvertsToEll) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, ConvertsToEll) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Ell = typename TestFixture::Ell; @@ -1106,8 +926,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, MovesToEll) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, MovesToEll) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Ell = typename TestFixture::Ell; From 9533184a88c66091be8346679b2df343aba9f932 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Thu, 12 Nov 2020 10:47:14 +0100 Subject: [PATCH 05/58] [tests fail] conversion of FBCSR to sparsity CSR now works --- core/matrix/fbcsr.cpp | 110 +++++++++++++++++++- core/test/matrix/fbcsr_sample.cpp | 14 +++ core/test/matrix/fbcsr_sample.hpp | 4 + include/ginkgo/core/matrix/fbcsr.hpp | 111 ++------------------ reference/test/matrix/fbcsr_kernels.cpp | 131 +++++++++++------------- 5 files changed, 192 insertions(+), 178 deletions(-) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 0d00e70bcc5..e4d88aa9717 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -179,6 +179,38 @@ void Fbcsr::apply_impl(const LinOp *alpha, const LinOp *b, //} +template +void Fbcsr::convert_to( + Fbcsr *result) const +{ + bool same_executor = this->get_executor() == result->get_executor(); + // NOTE: as soon as strategies are improved, this can be reverted + result->values_ = this->values_; + result->col_idxs_ = this->col_idxs_; + result->row_ptrs_ = this->row_ptrs_; + result->startrow_ = this->startrow_; + result->set_size(this->get_size()); + result->bs_ = this->bs_; + if (!same_executor) { + convert_strategy_helper(result); + } else { + result->set_strategy(std::move(this->get_strategy()->copy())); + } + // END NOTE +} + + +template +void Fbcsr::move_to(Fbcsr *result) +{ + bool same_executor = this->get_executor() == result->get_executor(); + EnableLinOp::move_to(result); + if (!same_executor) { + matrix_strategy::strategy_rebuild_helper(result); + } +} + + template void Fbcsr::convert_to( Fbcsr, IndexType> *const result) const @@ -332,16 +364,17 @@ template void Fbcsr::convert_to( SparsityCsr *result) const { + using gko::blockutils::getNumFixedBlocks; auto exec = this->get_executor(); auto tmp = SparsityCsr::create( - exec, this->get_size(), this->get_num_stored_elements()); + exec, + gko::dim<2>{getNumFixedBlocks(bs_, this->get_size()[0]), + getNumFixedBlocks(bs_, this->get_size()[1])}, + getNumFixedBlocks(bs_ * bs_, this->get_num_stored_elements())); + tmp->col_idxs_ = this->col_idxs_; tmp->row_ptrs_ = this->row_ptrs_; - // if (result->value_.get_data()) { - // tmp->value_ = result->value_; - // } else { tmp->value_ = gko::Array(exec, {one()}); - // } tmp->move_to(result); } @@ -706,6 +739,73 @@ Fbcsr::compute_absolute() const GKO_NOT_IMPLEMENTED; //} +// TODO clean this up as soon as we improve strategy_type +template +template +void Fbcsr::convert_strategy_helper( + FbcsrType *result) const +{ + auto strat = this->get_strategy().get(); + std::shared_ptr> + new_strat; + using classical = matrix_strategy::classical; + using load_balance = matrix_strategy::load_balance; + using automatic = matrix_strategy::automatic; + + if (dynamic_cast(strat)) { + new_strat = std::make_shared(); + } else { + auto rexec = result->get_executor(); + auto cuda_exec = std::dynamic_pointer_cast(rexec); + auto hip_exec = std::dynamic_pointer_cast(rexec); + auto lb = dynamic_cast(strat); + if (cuda_exec) { + if (lb) { + new_strat = std::make_shared(cuda_exec); + } else { + new_strat = std::make_shared(cuda_exec); + } + } else if (hip_exec) { + if (lb) { + new_strat = std::make_shared(hip_exec); + } else { + new_strat = std::make_shared(hip_exec); + } + } else { + // Try to preserve this executor's configuration + auto this_cuda_exec = std::dynamic_pointer_cast( + this->get_executor()); + auto this_hip_exec = std::dynamic_pointer_cast( + this->get_executor()); + if (this_cuda_exec) { + if (lb) { + new_strat = std::make_shared(this_cuda_exec); + } else { + new_strat = std::make_shared(this_cuda_exec); + } + } else if (this_hip_exec) { + if (lb) { + new_strat = std::make_shared(this_hip_exec); + } else { + new_strat = std::make_shared(this_hip_exec); + } + } else { + // We had a load balance or automatic strategy from a non + // HIP or Cuda executor and are moving to a non HIP or Cuda + // executor. + // FIXME this creates a long delay + if (lb) { + new_strat = std::make_shared(); + } else { + new_strat = std::make_shared(); + } + } + } + } + result->set_strategy(new_strat); +} + + #define GKO_DECLARE_FBCSR_MATRIX(ValueType, IndexType) \ class Fbcsr GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_MATRIX); diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index a221cd3381e..035cdcefca8 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -314,6 +314,20 @@ FbcsrSample::generate_coo() const return mat; } +template +std::unique_ptr> +FbcsrSample::generate_sparsity_csr() const +{ + gko::Array colids(exec, nbnz); + gko::Array rowptrs(exec, nbrows + 1); + const std::unique_ptr fbmat = generate_fbcsr(); + for (index_type i = 0; i < nbrows + 1; i++) + rowptrs.get_data()[i] = fbmat->get_row_ptrs()[i]; + for (index_type i = 0; i < nbnz; i++) + colids.get_data()[i] = fbmat->get_col_idxs()[i]; + return SparCsr::create(exec, gko::dim<2>{nbrows, nbcols}, colids, rowptrs); +} + #define GKO_DECLARE_FBCSR_TEST_SAMPLE(ValueType, IndexType) \ class FbcsrSample GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_TEST_SAMPLE); diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 7fe2fd3cb32..5df96e6d64f 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include namespace gko { namespace testing { @@ -57,6 +58,7 @@ class FbcsrSample { using Coo = gko::matrix::Coo; using Dense = gko::matrix::Dense; using MatData = gko::matrix_data; + using SparCsr = gko::matrix::SparsityCsr; FbcsrSample(std::shared_ptr exec); @@ -72,6 +74,8 @@ class FbcsrSample { */ std::unique_ptr generate_coo() const; + std::unique_ptr generate_sparsity_csr() const; + MatData generate_matrix_data() const; MatData generate_matrix_data_with_explicit_zeros() const; diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index bfbab84eabe..836169ba0c3 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -160,32 +160,10 @@ class Fbcsr : public EnableLinOp>, matrix_strategy::strategy_type>; - void convert_to(Fbcsr *result) const override - { - bool same_executor = this->get_executor() == result->get_executor(); - // NOTE: as soon as strategies are improved, this can be reverted - result->values_ = this->values_; - result->col_idxs_ = this->col_idxs_; - result->row_ptrs_ = this->row_ptrs_; - result->startrow_ = this->startrow_; - result->set_size(this->get_size()); - result->bs_ = this->bs_; - if (!same_executor) { - convert_strategy_helper(result); - } else { - result->set_strategy(std::move(this->get_strategy()->copy())); - } - // END NOTE - } + void convert_to(Fbcsr *result) const override; + + void move_to(Fbcsr *result) override; - void move_to(Fbcsr *result) override - { - bool same_executor = this->get_executor() == result->get_executor(); - EnableLinOp::move_to(result); - if (!same_executor) { - matrix_strategy::strategy_rebuild_helper(result); - } - } friend class Fbcsr, IndexType>; void convert_to( @@ -205,18 +183,10 @@ class Fbcsr : public EnableLinOp>, void move_to(Coo *result) override; - // void convert_to(Ell *result) const override; - - // void move_to(Ell *result) override; - - // void convert_to(Hybrid *result) const override; - - // void move_to(Hybrid *result) override; - - // void convert_to(Sellp *result) const override; - - // void move_to(Sellp *result) override; - + /// Get the block sparsity pattern in CSR-like format + /** Note that the actual non-zero values are never copied; + * the result always has a value array of size 1 with the value 1. + */ void convert_to(SparsityCsr *result) const override; void move_to(SparsityCsr *result) override; @@ -483,72 +453,7 @@ class Fbcsr : public EnableLinOp>, // TODO clean this up as soon as we improve strategy_type template - void convert_strategy_helper(FbcsrType *result) const - { - auto strat = this->get_strategy().get(); - std::shared_ptr> - new_strat; - using classical = matrix_strategy::classical; - using load_balance = matrix_strategy::load_balance; - using automatic = matrix_strategy::automatic; - - if (dynamic_cast(strat)) { - new_strat = std::make_shared(); - } else { - auto rexec = result->get_executor(); - auto cuda_exec = - std::dynamic_pointer_cast(rexec); - auto hip_exec = std::dynamic_pointer_cast(rexec); - auto lb = dynamic_cast(strat); - if (cuda_exec) { - if (lb) { - new_strat = std::make_shared(cuda_exec); - } else { - new_strat = std::make_shared(cuda_exec); - } - } else if (hip_exec) { - if (lb) { - new_strat = std::make_shared(hip_exec); - } else { - new_strat = std::make_shared(hip_exec); - } - } else { - // Try to preserve this executor's configuration - auto this_cuda_exec = - std::dynamic_pointer_cast( - this->get_executor()); - auto this_hip_exec = - std::dynamic_pointer_cast( - this->get_executor()); - if (this_cuda_exec) { - if (lb) { - new_strat = - std::make_shared(this_cuda_exec); - } else { - new_strat = std::make_shared(this_cuda_exec); - } - } else if (this_hip_exec) { - if (lb) { - new_strat = - std::make_shared(this_hip_exec); - } else { - new_strat = std::make_shared(this_hip_exec); - } - } else { - // We had a load balance or automatic strategy from a non - // HIP or Cuda executor and are moving to a non HIP or Cuda - // executor. - // FIXME this creates a long delay - if (lb) { - new_strat = std::make_shared(); - } else { - new_strat = std::make_shared(); - } - } - } - } - result->set_strategy(new_strat); - } + void convert_strategy_helper(FbcsrType *result) const; /** * Computes srow. It should be run after changing any row_ptrs_ value. diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 8fb53f4e3b2..f20fe456b29 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -80,7 +80,7 @@ class Fbcsr : public ::testing::Test { using Coo = gko::matrix::Coo; using Dense = gko::matrix::Dense; using Sellp = gko::matrix::Sellp; - using SparsityCsr = gko::matrix::SparsityCsr; + using SparCsr = gko::matrix::SparsityCsr; using Ell = gko::matrix::Ell; using Hybrid = gko::matrix::Hybrid; using Vec = gko::matrix::Dense; @@ -92,7 +92,8 @@ class Fbcsr : public ::testing::Test { refmtx(fbsample.generate_fbcsr()), refcsrmtx(fbsample.generate_csr()), refdenmtx(fbsample.generate_dense()), - refcoomtx(fbsample.generate_coo()) + refcoomtx(fbsample.generate_coo()), + refspcmtx(fbsample.generate_sparsity_csr()) {} // void create_mtx3(Mtx *sorted, Mtx *unsorted) @@ -146,20 +147,17 @@ class Fbcsr : public ::testing::Test { } } - void assert_equal_to_mtx(const SparsityCsr *m) + void assert_equal_to_mtx(const SparCsr *m) { - auto *c = m->get_const_col_idxs(); - auto *r = m->get_const_row_ptrs(); - - ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); - ASSERT_EQ(m->get_num_nonzeros(), 4); - EXPECT_EQ(r[0], 0); - EXPECT_EQ(r[1], 3); - EXPECT_EQ(r[2], 4); - EXPECT_EQ(c[0], 0); - EXPECT_EQ(c[1], 1); - EXPECT_EQ(c[2], 2); - EXPECT_EQ(c[3], 1); + ASSERT_EQ(m->get_size(), refspcmtx->get_size()); + ASSERT_EQ(m->get_num_nonzeros(), refspcmtx->get_num_nonzeros()); + for (index_type i = 0; i < m->get_size()[0] + 1; i++) + ASSERT_EQ(m->get_const_row_ptrs()[i], + refspcmtx->get_const_row_ptrs()[i]); + for (index_type i = 0; i < m->get_num_nonzeros(); i++) { + ASSERT_EQ(m->get_const_col_idxs()[i], + refspcmtx->get_const_col_idxs()[i]); + } } std::shared_ptr exec; @@ -169,6 +167,7 @@ class Fbcsr : public ::testing::Test { const std::unique_ptr refcsrmtx; const std::unique_ptr refdenmtx; const std::unique_ptr refcoomtx; + const std::unique_ptr refspcmtx; }; TYPED_TEST_CASE(Fbcsr, gko::test::ValueIndexTypes); @@ -540,32 +539,28 @@ TYPED_TEST(Fbcsr, MovesToCsr) TYPED_TEST(Fbcsr, ConvertsToSparsityCsr) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using SparsityFbcsr = typename TestFixture::SparsityFbcsr; -// auto sparsity_mtx = SparsityCsr::create(this->mtx->get_executor()); -// -// this->mtx->convert_to(sparsity_mtx.get()); -// -// this->assert_equal_to_mtx(sparsity_mtx.get()); -//} +{ + using SparsityCsr = typename TestFixture::SparCsr; + auto sparsity_mtx = SparsityCsr::create(this->mtx->get_executor()); + + this->mtx->convert_to(sparsity_mtx.get()); + + this->assert_equal_to_mtx(sparsity_mtx.get()); +} TYPED_TEST(Fbcsr, MovesToSparsityCsr) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using SparsityFbcsr = typename TestFixture::SparsityFbcsr; -// using Fbcsr = typename TestFixture::Mtx; -// auto sparsity_mtx = SparsityCsr::create(this->mtx->get_executor()); -// auto fbcsr_ref = Fbcsr::create(this->mtx->get_executor()); -// -// fbcsr_ref->copy_from(this->mtx.get()); -// fbcsr_ref->move_to(sparsity_mtx.get()); -// -// this->assert_equal_to_mtx(sparsity_mtx.get()); -//} +{ + using SparsityCsr = typename TestFixture::SparCsr; + using Fbcsr = typename TestFixture::Mtx; + auto sparsity_mtx = SparsityCsr::create(this->mtx->get_executor()); + // auto fbcsr_ref = Fbcsr::create(this->mtx->get_executor()); + + // fbcsr_ref->copy_from(this->mtx.get()); + this->mtx->move_to(sparsity_mtx.get()); + + this->assert_equal_to_mtx(sparsity_mtx.get()); +} // TYPED_TEST(Fbcsr, ConvertsToHybridAutomatically) @@ -807,41 +802,37 @@ TYPED_TEST(Fbcsr, MovesEmptyToDense) TYPED_TEST(Fbcsr, ConvertsEmptyToSparsityCsr) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using Fbcsr = typename TestFixture::Mtx; -// using SparsityFbcsr = gko::matrix::SparsityFbcsr; -// auto empty = Fbcsr::create(this->exec); -// empty->get_row_ptrs()[0] = 0; -// auto res = SparsityFbcsr::create(this->exec); -// -// empty->convert_to(res.get()); -// -// ASSERT_EQ(res->get_num_nonzeros(), 0); -// ASSERT_EQ(*res->get_const_row_ptrs(), 0); -//} +{ + using ValueType = typename TestFixture::value_type; + using IndexType = typename TestFixture::index_type; + using Fbcsr = typename TestFixture::Mtx; + using SparCsr = typename TestFixture::SparCsr; + auto empty = Fbcsr::create(this->exec); + empty->get_row_ptrs()[0] = 0; + auto res = SparCsr::create(this->exec); + + empty->convert_to(res.get()); + + ASSERT_EQ(res->get_num_nonzeros(), 0); + ASSERT_EQ(*res->get_const_row_ptrs(), 0); +} TYPED_TEST(Fbcsr, MovesEmptyToSparsityCsr) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using Fbcsr = typename TestFixture::Mtx; -// using SparsityFbcsr = gko::matrix::SparsityFbcsr; -// auto empty = Fbcsr::create(this->exec); -// empty->get_row_ptrs()[0] = 0; -// auto res = SparsityFbcsr::create(this->exec); -// -// empty->move_to(res.get()); -// -// ASSERT_EQ(res->get_num_nonzeros(), 0); -// ASSERT_EQ(*res->get_const_row_ptrs(), 0); -//} +{ + using ValueType = typename TestFixture::value_type; + using IndexType = typename TestFixture::index_type; + using Fbcsr = typename TestFixture::Mtx; + using SparCsr = typename TestFixture::SparCsr; + auto empty = Fbcsr::create(this->exec); + empty->get_row_ptrs()[0] = 0; + auto res = SparCsr::create(this->exec); + + empty->move_to(res.get()); + + ASSERT_EQ(res->get_num_nonzeros(), 0); + ASSERT_EQ(*res->get_const_row_ptrs(), 0); +} // TYPED_TEST(Fbcsr, ConvertsEmptyToHybrid) From c75cf076e9270b493050240614c0707b3435137e Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Thu, 12 Nov 2020 11:34:20 +0100 Subject: [PATCH 06/58] [tests fail] enabled tests for fbcsr precision conversion --- reference/test/matrix/fbcsr_kernels.cpp | 154 +++++++++++------------- 1 file changed, 72 insertions(+), 82 deletions(-) diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index f20fe456b29..dbba82dc3e4 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -390,55 +390,49 @@ GKO_NOT_IMPLEMENTED; TYPED_TEST(Fbcsr, ConvertsToPrecision) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using OtherType = typename gko::next_precision; -// using Fbcsr = typename TestFixture::Mtx; -// using OtherFbcsr = gko::matrix::Fbcsr; -// auto tmp = OtherFbcsr::create(this->exec); -// auto res = Fbcsr::create(this->exec); -// // If OtherType is more precise: 0, otherwise r -// auto residual = r::value < r::value -// ? gko::remove_complex{0} -// : gko::remove_complex{r::value}; -// -// // use mtx2 as mtx's strategy would involve creating a CudaExecutor -// this->mtx2->convert_to(tmp.get()); -// tmp->convert_to(res.get()); -// -// GKO_ASSERT_MTX_NEAR(this->mtx2, res, residual); -// ASSERT_EQ(typeid(*this->mtx2->get_strategy()), -// typeid(*res->get_strategy())); -//} +{ + using ValueType = typename TestFixture::value_type; + using IndexType = typename TestFixture::index_type; + using OtherType = typename gko::next_precision; + using Fbcsr = typename TestFixture::Mtx; + using OtherFbcsr = gko::matrix::Fbcsr; + auto tmp = OtherFbcsr::create(this->exec); + auto res = Fbcsr::create(this->exec); + // If OtherType is more precise: 0, otherwise r + auto residual = r::value < r::value + ? gko::remove_complex{0} + : gko::remove_complex{r::value}; + + this->mtx->convert_to(tmp.get()); + tmp->convert_to(res.get()); + + GKO_ASSERT_MTX_NEAR(this->mtx, res, residual); + // ASSERT_EQ(typeid(*this->mtx->get_strategy()), + // typeid(*res->get_strategy())); +} TYPED_TEST(Fbcsr, MovesToPrecision) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using OtherType = typename gko::next_precision; -// using Fbcsr = typename TestFixture::Mtx; -// using OtherFbcsr = gko::matrix::Fbcsr; -// auto tmp = OtherFbcsr::create(this->exec); -// auto res = Fbcsr::create(this->exec); -// // If OtherType is more precise: 0, otherwise r -// auto residual = r::value < r::value -// ? gko::remove_complex{0} -// : gko::remove_complex{r::value}; -// -// // use mtx2 as mtx's strategy would involve creating a CudaExecutor -// this->mtx2->move_to(tmp.get()); -// tmp->move_to(res.get()); -// -// GKO_ASSERT_MTX_NEAR(this->mtx2, res, residual); -// ASSERT_EQ(typeid(*this->mtx2->get_strategy()), -// typeid(*res->get_strategy())); -//} +{ + using ValueType = typename TestFixture::value_type; + using IndexType = typename TestFixture::index_type; + using OtherType = typename gko::next_precision; + using Fbcsr = typename TestFixture::Mtx; + using OtherFbcsr = gko::matrix::Fbcsr; + auto tmp = OtherFbcsr::create(this->exec); + auto res = Fbcsr::create(this->exec); + // If OtherType is more precise: 0, otherwise r + auto residual = r::value < r::value + ? gko::remove_complex{0} + : gko::remove_complex{r::value}; + + this->mtx->move_to(tmp.get()); + tmp->move_to(res.get()); + + GKO_ASSERT_MTX_NEAR(this->mtx, res, residual); + // ASSERT_EQ(typeid(*this->mtx->get_strategy()), + // typeid(*res->get_strategy())); +} TYPED_TEST(Fbcsr, ConvertsToDense) @@ -626,45 +620,41 @@ TYPED_TEST(Fbcsr, MovesToSparsityCsr) TYPED_TEST(Fbcsr, ConvertsEmptyToPrecision) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using OtherType = typename gko::next_precision; -// using Fbcsr = typename TestFixture::Mtx; -// using OtherFbcsr = gko::matrix::Fbcsr; -// auto empty = OtherFbcsr::create(this->exec); -// empty->get_row_ptrs()[0] = 0; -// auto res = Fbcsr::create(this->exec); -// -// empty->convert_to(res.get()); -// -// ASSERT_EQ(res->get_num_stored_elements(), 0); -// ASSERT_EQ(*res->get_const_row_ptrs(), 0); -// ASSERT_FALSE(res->get_size()); -//} +{ + using ValueType = typename TestFixture::value_type; + using IndexType = typename TestFixture::index_type; + using OtherType = typename gko::next_precision; + using Fbcsr = typename TestFixture::Mtx; + using OtherFbcsr = gko::matrix::Fbcsr; + auto empty = OtherFbcsr::create(this->exec); + empty->get_row_ptrs()[0] = 0; + auto res = Fbcsr::create(this->exec); + + empty->convert_to(res.get()); + + ASSERT_EQ(res->get_num_stored_elements(), 0); + ASSERT_EQ(*res->get_const_row_ptrs(), 0); + ASSERT_FALSE(res->get_size()); +} TYPED_TEST(Fbcsr, MovesEmptyToPrecision) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using OtherType = typename gko::next_precision; -// using Fbcsr = typename TestFixture::Mtx; -// using OtherFbcsr = gko::matrix::Fbcsr; -// auto empty = OtherFbcsr::create(this->exec); -// empty->get_row_ptrs()[0] = 0; -// auto res = Fbcsr::create(this->exec); -// -// empty->move_to(res.get()); -// -// ASSERT_EQ(res->get_num_stored_elements(), 0); -// ASSERT_EQ(*res->get_const_row_ptrs(), 0); -// ASSERT_FALSE(res->get_size()); -//} +{ + using ValueType = typename TestFixture::value_type; + using IndexType = typename TestFixture::index_type; + using OtherType = typename gko::next_precision; + using Fbcsr = typename TestFixture::Mtx; + using OtherFbcsr = gko::matrix::Fbcsr; + auto empty = OtherFbcsr::create(this->exec); + empty->get_row_ptrs()[0] = 0; + auto res = Fbcsr::create(this->exec); + + empty->move_to(res.get()); + + ASSERT_EQ(res->get_num_stored_elements(), 0); + ASSERT_EQ(*res->get_const_row_ptrs(), 0); + ASSERT_FALSE(res->get_size()); +} TYPED_TEST(Fbcsr, ConvertsEmptyToDense) From 06dcd2122fde62ec4114c5fcd79ecbfac6f6a89c Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Fri, 13 Nov 2020 17:16:47 +0100 Subject: [PATCH 07/58] [tests fail] ref spmv and generalized spmv work and are tested --- core/matrix/fbcsr.cpp | 77 +++---- core/test/matrix/fbcsr_sample.cpp | 234 ++++++++++++++++--- core/test/matrix/fbcsr_sample.hpp | 61 +++++ reference/matrix/fbcsr_kernels.cpp | 234 +++++++++++-------- reference/test/matrix/fbcsr_kernels.cpp | 294 +++++++++++++++--------- 5 files changed, 628 insertions(+), 272 deletions(-) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index e4d88aa9717..68defa46f4b 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -62,9 +62,9 @@ namespace fbcsr { GKO_REGISTER_OPERATION(spmv, fbcsr::spmv); -// GKO_REGISTER_OPERATION(advanced_spmv, fbcsr::advanced_spmv); +GKO_REGISTER_OPERATION(advanced_spmv, fbcsr::advanced_spmv); GKO_REGISTER_OPERATION(spgemm, fbcsr::spgemm); -// GKO_REGISTER_OPERATION(advanced_spgemm, fbcsr::advanced_spgemm); +GKO_REGISTER_OPERATION(advanced_spgemm, fbcsr::advanced_spgemm); GKO_REGISTER_OPERATION(spgeam, fbcsr::spgeam); GKO_REGISTER_OPERATION(convert_to_coo, fbcsr::convert_to_coo); GKO_REGISTER_OPERATION(convert_to_csr, fbcsr::convert_to_csr); @@ -131,7 +131,6 @@ template void Fbcsr::apply_impl(const LinOp *const b, LinOp *const x) const { - // TODO (script:fbcsr): change the code imported from matrix/csr if needed using Dense = Dense; using TFbcsr = Fbcsr; if (auto b_fbcsr = dynamic_cast(b)) { @@ -151,32 +150,29 @@ void Fbcsr::apply_impl(const LinOp *const b, template void Fbcsr::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Dense = Dense; -// using TFbcsr = Fbcsr; -// if (auto b_fbcsr = dynamic_cast(b)) { -// // if b is a FBCSR matrix, we compute a SpGeMM -// auto x_fbcsr = as(x); -// auto x_copy = x_fbcsr->clone(); -// this->get_executor()->run( -// fbcsr::make_advanced_spgemm(as(alpha), this, b_fbcsr, -// as(beta), x_copy.get(), -// x_fbcsr)); -// } else if (dynamic_cast *>(b)) { -// // if b is an identity matrix, we compute an SpGEAM -// auto x_fbcsr = as(x); -// auto x_copy = x_fbcsr->clone(); -// this->get_executor()->run(fbcsr::make_spgeam( -// as(alpha), this, as(beta), lend(x_copy), x_fbcsr)); -// } else { -// // otherwise we assume that b is dense and compute a SpMV/SpMM -// this->get_executor()->run( -// fbcsr::make_advanced_spmv(as(alpha), this, as(b), -// as(beta), as(x))); -// } -//} +{ + using Dense = Dense; + using TFbcsr = Fbcsr; + if (auto b_fbcsr = dynamic_cast(b)) { + // if b is a FBCSR matrix, we compute a SpGeMM + auto x_fbcsr = as(x); + auto x_copy = x_fbcsr->clone(); + this->get_executor()->run(fbcsr::make_advanced_spgemm( + as(alpha), this, b_fbcsr, as(beta), x_copy.get(), + x_fbcsr)); + } else if (dynamic_cast *>(b)) { + // if b is an identity matrix, we compute an SpGEAM + auto x_fbcsr = as(x); + auto x_copy = x_fbcsr->clone(); + this->get_executor()->run(fbcsr::make_spgeam( + as(alpha), this, as(beta), lend(x_copy), x_fbcsr)); + } else { + // otherwise we assume that b is dense and compute a SpMV/SpMM + this->get_executor()->run( + fbcsr::make_advanced_spmv(as(alpha), this, as(b), + as(beta), as(x))); + } +} template @@ -692,18 +688,17 @@ bool Fbcsr::is_sorted_by_column_index() const template std::unique_ptr> -Fbcsr::extract_diagonal() const GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto exec = this->get_executor(); -// -// const auto diag_size = std::min(this->get_size()[0], this->get_size()[1]); -// auto diag = Diagonal::create(exec, diag_size); -// exec->run(fbcsr::make_fill_array(diag->get_values(), diag->get_size()[0], -// zero())); -// exec->run(fbcsr::make_extract_diagonal(this, lend(diag))); -// return diag; -//} +Fbcsr::extract_diagonal() const +{ + auto exec = this->get_executor(); + + const auto diag_size = std::min(this->get_size()[0], this->get_size()[1]); + auto diag = Diagonal::create(exec, diag_size); + exec->run(fbcsr::make_fill_array(diag->get_values(), diag->get_size()[0], + zero())); + exec->run(fbcsr::make_extract_diagonal(this, lend(diag))); + return diag; +} template diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index 035cdcefca8..f8d8136e23f 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -38,6 +38,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/fixed_block.hpp" #include "fbcsr_sample.hpp" +#define FBCSR_TEST_OFFSET 0.000011118888 + namespace gko { namespace testing { @@ -104,6 +106,9 @@ FbcsrSample::generate_fbcsr() const vals(0, 2, 2) = gko::zero(); vals(3, 0, 0) = gko::zero(); + v[34] += FBCSR_TEST_OFFSET; + v[35] += FBCSR_TEST_OFFSET; + for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) s[is] = 0; return mtx; @@ -209,6 +214,9 @@ FbcsrSample::generate_csr() const csrvals[34] = 13; csrvals[35] = 14; + csrvals[34] += FBCSR_TEST_OFFSET; + csrvals[35] += FBCSR_TEST_OFFSET; + return csrm; } @@ -233,6 +241,8 @@ FbcsrSample::generate_dense() const } densem->at(2, 3) = densem->at(2, 5) = densem->at(3, 6) = 0.0; + densem->at(5, 7) += FBCSR_TEST_OFFSET; + densem->at(5, 8) += FBCSR_TEST_OFFSET; return densem; } @@ -242,20 +252,43 @@ template gko::matrix_data FbcsrSample::generate_matrix_data() const { - return MatData( - {{6, 12}, {{0, 3, 2.0}, {0, 4, 3.0}, {0, 5, 4.0}, {1, 3, 5.0}, - {1, 4, 6.0}, {1, 5, 7.0}, {2, 4, 9.0}, - - {0, 9, 4.0}, {0, 10, 5.0}, {0, 11, 6.0}, {1, 9, 7.0}, - {1, 10, 8.0}, {1, 11, 9.0}, {2, 9, 10.0}, {2, 10, 11.0}, - {2, 11, 12.0}, - - {3, 0, 2.0}, {3, 1, 3.0}, {3, 2, 4.0}, {4, 0, 5.0}, - {4, 1, 6.0}, {4, 2, 7.0}, {5, 0, 8.0}, {5, 1, 9.0}, - {5, 2, 10.0}, - - {3, 7, 7.0}, {3, 8, 8.0}, {4, 6, 9.0}, {4, 7, 10.0}, - {4, 8, 11.0}, {5, 6, 12.0}, {5, 7, 13.0}, {5, 8, 14.0}}}); + return MatData({{6, 12}, + {{0, 3, 2.0}, + {0, 4, 3.0}, + {0, 5, 4.0}, + {1, 3, 5.0}, + {1, 4, 6.0}, + {1, 5, 7.0}, + {2, 4, 9.0}, + + {0, 9, 4.0}, + {0, 10, 5.0}, + {0, 11, 6.0}, + {1, 9, 7.0}, + {1, 10, 8.0}, + {1, 11, 9.0}, + {2, 9, 10.0}, + {2, 10, 11.0}, + {2, 11, 12.0}, + + {3, 0, 2.0}, + {3, 1, 3.0}, + {3, 2, 4.0}, + {4, 0, 5.0}, + {4, 1, 6.0}, + {4, 2, 7.0}, + {5, 0, 8.0}, + {5, 1, 9.0}, + {5, 2, 10.0}, + + {3, 7, 7.0}, + {3, 8, 8.0}, + {4, 6, 9.0}, + {4, 7, 10.0}, + {4, 8, 11.0}, + {5, 6, 12.0}, + {5, 7, 13.0 + FBCSR_TEST_OFFSET}, + {5, 8, 14.0 + FBCSR_TEST_OFFSET}}}); } // Assuming row-major blocks @@ -263,21 +296,46 @@ template gko::matrix_data FbcsrSample< ValueType, IndexType>::generate_matrix_data_with_explicit_zeros() const { - return MatData({{6, 12}, {{0, 3, 2.0}, {0, 4, 3.0}, {0, 5, 4.0}, - {1, 3, 5.0}, {1, 4, 6.0}, {1, 5, 7.0}, - {2, 3, 0.0}, {2, 4, 9.0}, {2, 5, 0.0}, - - {0, 9, 4.0}, {0, 10, 5.0}, {0, 11, 6.0}, - {1, 9, 7.0}, {1, 10, 8.0}, {1, 11, 9.0}, - {2, 9, 10.0}, {2, 10, 11.0}, {2, 11, 12.0}, - - {3, 0, 2.0}, {3, 1, 3.0}, {3, 2, 4.0}, - {4, 0, 5.0}, {4, 1, 6.0}, {4, 2, 7.0}, - {5, 0, 8.0}, {5, 1, 9.0}, {5, 2, 10.0}, - - {3, 6, 0.0}, {3, 7, 7.0}, {3, 8, 8.0}, - {4, 6, 9.0}, {4, 7, 10.0}, {4, 8, 11.0}, - {5, 6, 12.0}, {5, 7, 13.0}, {5, 8, 14.0}}}); + return MatData({{6, 12}, + {{0, 3, 2.0}, + {0, 4, 3.0}, + {0, 5, 4.0}, + {1, 3, 5.0}, + {1, 4, 6.0}, + {1, 5, 7.0}, + {2, 3, 0.0}, + {2, 4, 9.0}, + {2, 5, 0.0}, + + {0, 9, 4.0}, + {0, 10, 5.0}, + {0, 11, 6.0}, + {1, 9, 7.0}, + {1, 10, 8.0}, + {1, 11, 9.0}, + {2, 9, 10.0}, + {2, 10, 11.0}, + {2, 11, 12.0}, + + {3, 0, 2.0}, + {3, 1, 3.0}, + {3, 2, 4.0}, + {4, 0, 5.0}, + {4, 1, 6.0}, + {4, 2, 7.0}, + {5, 0, 8.0}, + {5, 1, 9.0}, + {5, 2, 10.0}, + + {3, 6, 0.0}, + {3, 7, 7.0}, + {3, 8, 8.0}, + {4, 6, 9.0}, + {4, 7, 10.0}, + {4, 8, 11.0}, + {5, 6, 12.0}, + {5, 7, 13.0 + FBCSR_TEST_OFFSET}, + {5, 8, 14.0 + FBCSR_TEST_OFFSET}}}); } template @@ -328,9 +386,127 @@ FbcsrSample::generate_sparsity_csr() const return SparCsr::create(exec, gko::dim<2>{nbrows, nbcols}, colids, rowptrs); } +template +gko::Array FbcsrSample::getNonzerosPerRow() + const +{ + return gko::Array(exec, {6, 6, 6, 6, 6, 6}); +} + #define GKO_DECLARE_FBCSR_TEST_SAMPLE(ValueType, IndexType) \ class FbcsrSample GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_TEST_SAMPLE); + +template +FbcsrSample2::FbcsrSample2( + const std::shared_ptr rexec) + : nrows{6}, + ncols{8}, + nnz{16}, + nbrows{3}, + nbcols{4}, + nbnz{4}, + bs{2}, + exec(rexec) +{} + +template +std::unique_ptr> +FbcsrSample2::generate_fbcsr() const +{ + std::unique_ptr mtx = + Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + nnz, bs, std::make_shared>()); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + index_type *const s = mtx->get_srow(); + r[0] = 0; + r[1] = 1; + r[2] = 3; + r[3] = 4; + c[0] = 0; + c[1] = 0; + c[2] = 3; + c[3] = 2; + + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + + v[0] = 1; + v[1] = 2; + v[2] = 3; + v[3] = 0; + v[10] = 0; + v[11] = 0; + v[12] = -12; + v[13] = -1; + v[14] = -2; + v[15] = -11; + + for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) s[is] = 0; + + return mtx; +} + +template +std::unique_ptr> +FbcsrSample2::extract_diagonal() const +{ + gko::Array dvals(exec, nrows); + ValueType *const dv = dvals.get_data(); + dv[0] = 1; + dv[1] = 0; + dv[2] = 0; + dv[3] = 0; + dv[4] = -12; + dv[5] = -11; + return Diagonal::create(exec, nrows, dvals); +} + +template +gko::Array FbcsrSample2::getNonzerosPerRow() + const +{ + return gko::Array(exec, {2, 2, 4, 4, 2, 2}); +} + +template +void FbcsrSample2::apply( + const gko::matrix::Dense *const x, + gko::matrix::Dense *const y) const +{ + if (x->get_size()[0] != ncols) + throw gko::BadDimension(__FILE__, __LINE__, __func__, "spmv", nrows, + ncols, ""); + if (y->get_size()[0] != nrows) + throw gko::BadDimension(__FILE__, __LINE__, __func__, "spmv", nrows, + ncols, ""); + if (x->get_size()[1] != y->get_size()[1]) + throw gko::BadDimension(__FILE__, __LINE__, __func__, "spmv", nrows, + ncols, ""); + + const ValueType defv = sct(0.15 + FBCSR_TEST_OFFSET); + + // ValueType *const yvals = y->get_data(); + // const ValueType *const xvals = x->get_const_data(); + for (index_type k = 0; k < x->get_size()[1]; k++) { + y->at(0, k) = sct(1.0) * x->at(0, k) + sct(2.0) * x->at(1, k); + y->at(1, k) = sct(3.0) * x->at(0, k); + y->at(2, k) = + defv * (x->at(0, k) + x->at(1, k) + x->at(6, k) + x->at(7, k)); + y->at(3, k) = defv * (x->at(0, k) + x->at(1, k)); + y->at(4, k) = sct(-12.0) * x->at(4, k) - x->at(5, k); + y->at(5, k) = sct(-2.0) * x->at(4, k) + sct(-11.0) * x->at(5, k); + } +} + +#define GKO_DECLARE_FBCSR_TEST_SAMPLE_2(ValueType, IndexType) \ + class FbcsrSample2 +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_TEST_SAMPLE_2); + } // namespace testing } // namespace gko diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 5df96e6d64f..05b1a2f76ae 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include @@ -47,6 +48,7 @@ namespace testing { /// Generates the same sample block CSR matrix in different formats /** This currently a 6 x 12 matrix with 3x3 blocks. + * Assumes that the layout within each block is row-major. */ template class FbcsrSample { @@ -80,6 +82,65 @@ class FbcsrSample { MatData generate_matrix_data_with_explicit_zeros() const; + /// Returns an array containing number of stored values in each row + /// (not block-row) + gko::Array getNonzerosPerRow() const; + + const size_type nrows; + const size_type ncols; + const size_type nnz; + const size_type nbrows; + const size_type nbcols; + const size_type nbnz; + const int bs; + const std::shared_ptr exec; +}; + +/// Generates the a sample block CSR matrix in different formats +/** This currently a 6 x 8 matrix with 2x2 blocks. + */ +template +class FbcsrSample2 { +public: + using value_type = ValueType; + using index_type = IndexType; + using Fbcsr = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using Coo = gko::matrix::Coo; + using Dense = gko::matrix::Dense; + using MatData = gko::matrix_data; + using SparCsr = gko::matrix::SparsityCsr; + using Diagonal = gko::matrix::Diagonal; + + FbcsrSample2(std::shared_ptr exec); + + std::unique_ptr generate_fbcsr() const; + + std::unique_ptr extract_diagonal() const; + + void apply(const Dense *x, Dense *y) const; + + // std::unique_ptr generate_csr() const; + + // std::unique_ptr generate_dense() const; + + // std::unique_ptr generate_coo() const; + + // std::unique_ptr generate_sparsity_csr() const; + + // MatData generate_matrix_data() const; + + // MatData generate_matrix_data_with_explicit_zeros() const; + + gko::Array getNonzerosPerRow() const; + + template + inline constexpr ValueType sct(U u) const + { + return static_cast(u); + } + + const size_type nrows; const size_type ncols; const size_type nnz; diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index fe6bac8efd3..938f146e4a0 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -70,63 +70,87 @@ namespace fbcsr { template -void spmv(std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Dense *b, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto row_ptrs = a->get_const_row_ptrs(); -// auto col_idxs = a->get_const_col_idxs(); -// auto vals = a->get_const_values(); -// -// for (size_type row = 0; row < a->get_size()[0]; ++row) { -// for (size_type j = 0; j < c->get_size()[1]; ++j) { -// c->at(row, j) = zero(); -// } -// for (size_type k = row_ptrs[row]; -// k < static_cast(row_ptrs[row + 1]); ++k) { -// auto val = vals[k]; -// auto col = col_idxs[k]; -// for (size_type j = 0; j < c->get_size()[1]; ++j) { -// c->at(row, j) += val * b->at(col, j); -// } -// } -// } -//} +void spmv(const std::shared_ptr exec, + const matrix::Fbcsr *const a, + const matrix::Dense *const b, + matrix::Dense *const c) +{ + const int bs = a->get_block_size(); + const IndexType nvecs = static_cast(b->get_size()[1]); + const IndexType nbrows = + gko::blockutils::getNumFixedBlocks(bs, a->get_size()[0]); + auto row_ptrs = a->get_const_row_ptrs(); + auto col_idxs = a->get_const_col_idxs(); + auto vals = a->get_const_values(); + const gko::blockutils::DenseBlocksView avalues( + vals, bs, bs); + + ValueType *const cvals = c->get_values(); + + for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { + const IndexType crowblkend = (ibrow + 1) * bs * nvecs; + for (IndexType i = ibrow * bs * nvecs; i < crowblkend; i++) + cvals[i] = zero(); + + for (IndexType inz = row_ptrs[ibrow]; inz < row_ptrs[ibrow + 1]; + ++inz) { + for (int ib = 0; ib < bs; ib++) { + const IndexType row = ibrow * bs + ib; + for (int jb = 0; jb < bs; jb++) { + const auto val = avalues(inz, ib, jb); + const auto col = col_idxs[inz] * bs + jb; + for (size_type j = 0; j < nvecs; ++j) + c->at(row, j) += val * b->at(col, j); + } + } + } + } +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); template -void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto row_ptrs = a->get_const_row_ptrs(); -// auto col_idxs = a->get_const_col_idxs(); -// auto vals = a->get_const_values(); -// auto valpha = alpha->at(0, 0); -// auto vbeta = beta->at(0, 0); -// -// for (size_type row = 0; row < a->get_size()[0]; ++row) { -// for (size_type j = 0; j < c->get_size()[1]; ++j) { -// c->at(row, j) *= vbeta; -// } -// for (size_type k = row_ptrs[row]; -// k < static_cast(row_ptrs[row + 1]); ++k) { -// auto val = vals[k]; -// auto col = col_idxs[k]; -// for (size_type j = 0; j < c->get_size()[1]; ++j) { -// c->at(row, j) += valpha * val * b->at(col, j); -// } -// } -// } -//} +void advanced_spmv(const std::shared_ptr exec, + const matrix::Dense *const alpha, + const matrix::Fbcsr *const a, + const matrix::Dense *const b, + const matrix::Dense *const beta, + matrix::Dense *const c) +{ + const int bs = a->get_block_size(); + const IndexType nvecs = static_cast(b->get_size()[1]); + const IndexType nbrows = + gko::blockutils::getNumFixedBlocks(bs, a->get_size()[0]); + auto row_ptrs = a->get_const_row_ptrs(); + auto col_idxs = a->get_const_col_idxs(); + auto vals = a->get_const_values(); + auto valpha = alpha->at(0, 0); + auto vbeta = beta->at(0, 0); + const gko::blockutils::DenseBlocksView avalues( + vals, bs, bs); + + ValueType *const cvals = c->get_values(); + + for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { + const IndexType crowblkend = (ibrow + 1) * bs * nvecs; + for (IndexType i = ibrow * bs * nvecs; i < crowblkend; i++) + cvals[i] *= vbeta; + + for (IndexType inz = row_ptrs[ibrow]; inz < row_ptrs[ibrow + 1]; + ++inz) { + for (int ib = 0; ib < bs; ib++) { + const IndexType row = ibrow * bs + ib; + for (int jb = 0; jb < bs; jb++) { + const auto val = avalues(inz, ib, jb); + const auto col = col_idxs[inz] * bs + jb; + for (size_type j = 0; j < nvecs; ++j) + c->at(row, j) += valpha * val * b->at(col, j); + } + } + } + } +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); @@ -933,16 +957,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row( std::shared_ptr exec, - const matrix::Fbcsr *source, - Array *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto row_ptrs = source->get_const_row_ptrs(); -// auto row_nnz_val = result->get_data(); -// for (size_type i = 0; i < result->get_num_elems(); i++) { -// row_nnz_val[i] = row_ptrs[i + 1] - row_ptrs[i]; -// } -//} + const matrix::Fbcsr *source, Array *result) +{ + const auto row_ptrs = source->get_const_row_ptrs(); + auto row_nnz_val = result->get_data(); + const int bs = source->get_block_size(); + assert(result->get_num_elems() == source->get_size()[0]); + + for (size_type i = 0; i < result->get_num_elems(); i++) { + const size_type ibrow = i / bs; + row_nnz_val[i] = (row_ptrs[ibrow + 1] - row_ptrs[ibrow]) * bs; + } +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); @@ -974,24 +1000,26 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Fbcsr *to_check, - bool *is_sorted) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto row_ptrs = to_check->get_const_row_ptrs(); -// const auto col_idxs = to_check->get_const_col_idxs(); -// const auto size = to_check->get_size(); -// for (size_type i = 0; i < size[0]; ++i) { -// for (auto idx = row_ptrs[i] + 1; idx < row_ptrs[i + 1]; ++idx) { -// if (col_idxs[idx - 1] > col_idxs[idx]) { -// *is_sorted = false; -// return; -// } -// } -// } -// *is_sorted = true; -// return; -//} + const matrix::Fbcsr *const to_check, + bool *const is_sorted) +{ + const auto row_ptrs = to_check->get_const_row_ptrs(); + const auto col_idxs = to_check->get_const_col_idxs(); + const auto size = to_check->get_size(); + const int bs = to_check->get_block_size(); + const size_type nbrows = gko::blockutils::getNumFixedBlocks(bs, size[0]); + + for (size_type i = 0; i < nbrows; ++i) { + for (auto idx = row_ptrs[i] + 1; idx < row_ptrs[i + 1]; ++idx) { + if (col_idxs[idx - 1] > col_idxs[idx]) { + *is_sorted = false; + return; + } + } + } + *is_sorted = true; + return; +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); @@ -999,25 +1027,33 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto row_ptrs = orig->get_const_row_ptrs(); -// const auto col_idxs = orig->get_const_col_idxs(); -// const auto values = orig->get_const_values(); -// const auto diag_size = diag->get_size()[0]; -// auto diag_values = diag->get_values(); -// -// for (size_type row = 0; row < diag_size; ++row) { -// for (size_type idx = row_ptrs[row]; idx < row_ptrs[row + 1]; ++idx) { -// if (col_idxs[idx] == row) { -// diag_values[row] = values[idx]; -// break; -// } -// } -// } -//} + const matrix::Fbcsr *const orig, + matrix::Diagonal *const diag) +{ + const auto row_ptrs = orig->get_const_row_ptrs(); + const auto col_idxs = orig->get_const_col_idxs(); + const auto values = orig->get_const_values(); + const int bs = orig->get_block_size(); + const size_type diag_size = diag->get_size()[0]; + const size_type nbrows = + gko::blockutils::getNumFixedBlocks(bs, orig->get_size()[0]); + auto diag_values = diag->get_values(); + assert(diag_size == orig->get_size()[0]); + + const gko::blockutils::DenseBlocksView vblocks( + values, bs, bs); + + for (size_type ibrow = 0; ibrow < nbrows; ++ibrow) { + for (size_type idx = row_ptrs[ibrow]; idx < row_ptrs[ibrow + 1]; + ++idx) { + if (col_idxs[idx] == ibrow) { + for (int ib = 0; ib < bs; ib++) + diag_values[ibrow * bs + ib] = vblocks(idx, ib, ib); + break; + } + } + } +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index dbba82dc3e4..4ed6bad3b8c 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -83,17 +83,21 @@ class Fbcsr : public ::testing::Test { using SparCsr = gko::matrix::SparsityCsr; using Ell = gko::matrix::Ell; using Hybrid = gko::matrix::Hybrid; + using Diag = gko::matrix::Diagonal; using Vec = gko::matrix::Dense; Fbcsr() : exec(gko::ReferenceExecutor::create()), fbsample(exec), + fbsample2(exec), mtx(fbsample.generate_fbcsr()), refmtx(fbsample.generate_fbcsr()), refcsrmtx(fbsample.generate_csr()), refdenmtx(fbsample.generate_dense()), refcoomtx(fbsample.generate_coo()), - refspcmtx(fbsample.generate_sparsity_csr()) + refspcmtx(fbsample.generate_sparsity_csr()), + mtx2(fbsample2.generate_fbcsr()), + m2diag(fbsample2.extract_diagonal()) {} // void create_mtx3(Mtx *sorted, Mtx *unsorted) @@ -162,90 +166,174 @@ class Fbcsr : public ::testing::Test { std::shared_ptr exec; const gko::testing::FbcsrSample fbsample; + const gko::testing::FbcsrSample2 fbsample2; std::unique_ptr mtx; const std::unique_ptr refmtx; const std::unique_ptr refcsrmtx; const std::unique_ptr refdenmtx; const std::unique_ptr refcoomtx; const std::unique_ptr refspcmtx; + const std::unique_ptr mtx2; + const std::unique_ptr m2diag; }; TYPED_TEST_CASE(Fbcsr, gko::test::ValueIndexTypes); +template +constexpr T get_some_number() +{ + return static_cast(1.2); +} + +template +constexpr typename std::enable_if_t> get_some_number() +{ + using RT = gko::remove_complex; + return {static_cast(1.2), static_cast(3.4)}; +} + + TYPED_TEST(Fbcsr, AppliesToDenseVector) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Vec = typename TestFixture::Vec; -// using T = typename TestFixture::value_type; -// auto x = gko::initialize({2.0, 1.0, 4.0}, this->exec); -// auto y = Vec::create(this->exec, gko::dim<2>{2, 1}); -// -// this->mtx->apply(x.get(), y.get()); -// -// EXPECT_EQ(y->at(0), T{13.0}); -// EXPECT_EQ(y->at(1), T{5.0}); -//} +{ + using Vec = typename TestFixture::Vec; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + + const index_type nrows = this->mtx2->get_size()[0]; + const index_type ncols = this->mtx2->get_size()[1]; + auto x = Vec::create(this->exec, gko::dim<2>{(gko::size_type)ncols, 1}); + T *const xvals = x->get_values(); + for (index_type i = 0; i < ncols; i++) + // xvals[i] = std::log(static_cast(static_cast((i+1)^2))); + xvals[i] = (i + 1.0) * (i + 1.0); + auto y = Vec::create(this->exec, gko::dim<2>{(gko::size_type)nrows, 1}); + auto yref = Vec::create(this->exec, gko::dim<2>{(gko::size_type)nrows, 1}); + + this->mtx2->apply(x.get(), y.get()); + + this->fbsample2.apply(x.get(), yref.get()); + + const double tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(y, yref, tolerance); +} TYPED_TEST(Fbcsr, AppliesToDenseMatrix) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Vec = typename TestFixture::Vec; -// using T = typename TestFixture::value_type; -// auto x = gko::initialize( -// {I{2.0, 3.0}, I{1.0, -1.5}, I{4.0, 2.5}}, this->exec); -// auto y = Vec::create(this->exec, gko::dim<2>{2}); -// -// this->mtx->apply(x.get(), y.get()); -// -// EXPECT_EQ(y->at(0, 0), T{13.0}); -// EXPECT_EQ(y->at(1, 0), T{5.0}); -// EXPECT_EQ(y->at(0, 1), T{3.5}); -// EXPECT_EQ(y->at(1, 1), T{-7.5}); -//} +{ + using Vec = typename TestFixture::Vec; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + + const gko::size_type nrows = this->mtx2->get_size()[0]; + const gko::size_type ncols = this->mtx2->get_size()[1]; + const gko::size_type nvecs = 3; + auto x = Vec::create(this->exec, gko::dim<2>{ncols, nvecs}); + + for (index_type i = 0; i < ncols; i++) + for (index_type j = 0; j < nvecs; j++) + x->at(i, j) = (static_cast(3.0 * i) + get_some_number()) / + static_cast(j + 1.0); + auto y = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); + auto yref = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); + + this->mtx2->apply(x.get(), y.get()); + + this->fbsample2.apply(x.get(), yref.get()); + + const double tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(y, yref, tolerance); +} TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseVector) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Vec = typename TestFixture::Vec; -// using T = typename TestFixture::value_type; -// auto alpha = gko::initialize({-1.0}, this->exec); -// auto beta = gko::initialize({2.0}, this->exec); -// auto x = gko::initialize({2.0, 1.0, 4.0}, this->exec); -// auto y = gko::initialize({1.0, 2.0}, this->exec); -// -// this->mtx->apply(alpha.get(), x.get(), beta.get(), y.get()); -// -// EXPECT_EQ(y->at(0), T{-11.0}); -// EXPECT_EQ(y->at(1), T{-1.0}); -//} +{ + using Vec = typename TestFixture::Vec; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + + constexpr T alphav = -1.0; + constexpr T betav = 2.0; + auto alpha = gko::initialize({alphav}, this->exec); + auto beta = gko::initialize({betav}, this->exec); + + const gko::size_type nrows = this->mtx2->get_size()[0]; + const gko::size_type ncols = this->mtx2->get_size()[1]; + auto x = Vec::create(this->exec, gko::dim<2>{ncols, 1}); + auto y = Vec::create(this->exec, gko::dim<2>{nrows, 1}); + + for (index_type i = 0; i < ncols; i++) { + // xvals[i] = std::log(static_cast(static_cast((i+1)^2))); + x->at(i, 0) = (i + 1.0) * (i + 1.0); + } + for (index_type i = 0; i < nrows; i++) { + y->at(i, 0) = static_cast(std::sin(2 * 3.14 * (i + 0.1) / nrows)) + + get_some_number(); + } + + auto yref = Vec::create(this->exec, gko::dim<2>{nrows, 1}); + yref = y->clone(); + + this->mtx2->apply(alpha.get(), x.get(), beta.get(), y.get()); + + auto prod = Vec::create(this->exec, gko::dim<2>{nrows, 1}); + this->fbsample2.apply(x.get(), prod.get()); + + yref->scale(beta.get()); + yref->add_scaled(alpha.get(), prod.get()); + + const double tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(y, yref, tolerance); +} TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseMatrix) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Vec = typename TestFixture::Vec; -// using T = typename TestFixture::value_type; -// auto alpha = gko::initialize({-1.0}, this->exec); -// auto beta = gko::initialize({2.0}, this->exec); -// auto x = gko::initialize( -// {I{2.0, 3.0}, I{1.0, -1.5}, I{4.0, 2.5}}, this->exec); -// auto y = -// gko::initialize({I{1.0, 0.5}, I{2.0, -1.5}}, this->exec); -// -// this->mtx->apply(alpha.get(), x.get(), beta.get(), y.get()); -// -// EXPECT_EQ(y->at(0, 0), T{-11.0}); -// EXPECT_EQ(y->at(1, 0), T{-1.0}); -// EXPECT_EQ(y->at(0, 1), T{-2.5}); -// EXPECT_EQ(y->at(1, 1), T{4.5}); -//} +{ + using Vec = typename TestFixture::Vec; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + + constexpr T alphav = -1.0; + constexpr T betav = 2.0; + auto alpha = gko::initialize({alphav}, this->exec); + auto beta = gko::initialize({betav}, this->exec); + + const gko::size_type nrows = this->mtx2->get_size()[0]; + const gko::size_type ncols = this->mtx2->get_size()[1]; + const gko::size_type nvecs = 3; + auto x = Vec::create(this->exec, gko::dim<2>{ncols, nvecs}); + auto y = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); + + for (index_type i = 0; i < ncols; i++) + for (index_type j = 0; j < nvecs; j++) { + // xvals[i] = std::log(static_cast(static_cast((i+1)^2))); + x->at(i, j) = (i + 1.0) / (j + 1.0); + } + for (index_type i = 0; i < nrows; i++) + for (index_type j = 0; j < nvecs; j++) { + y->at(i, j) = + static_cast(std::sin(2 * 3.14 * (i + j + 0.1) / nrows)) + + get_some_number(); + } + + auto yref = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); + yref = y->clone(); + + this->mtx2->apply(alpha.get(), x.get(), beta.get(), y.get()); + + auto prod = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); + this->fbsample2.apply(x.get(), prod.get()); + + yref->scale(beta.get()); + yref->add_scaled(alpha.get(), prod.get()); + + const double tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(y, yref, tolerance); +} TYPED_TEST(Fbcsr, AppliesToFbcsrMatrix) @@ -862,22 +950,24 @@ TYPED_TEST(Fbcsr, MovesEmptyToSparsityCsr) TYPED_TEST(Fbcsr, CalculatesNonzerosPerRow) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// gko::Array row_nnz(this->exec, this->mtx->get_size()[0]); -// -// gko::kernels::reference::fbcsr::calculate_nonzeros_per_row( -// this->exec, this->mtx.get(), &row_nnz); -// -// auto row_nnz_val = row_nnz.get_data(); -// ASSERT_EQ(row_nnz_val[0], 3); -// ASSERT_EQ(row_nnz_val[1], 1); -//} +{ + using IndexType = typename TestFixture::index_type; + gko::Array row_nnz(this->exec, this->mtx2->get_size()[0]); -TYPED_TEST(Fbcsr, CalculatesTotalCols) -GKO_NOT_IMPLEMENTED; + gko::kernels::reference::fbcsr ::calculate_nonzeros_per_row( + this->exec, this->mtx2.get(), &row_nnz); + + auto row_nnz_val = row_nnz.get_data(); + const gko::Array refrnnz = this->fbsample2.getNonzerosPerRow(); + ASSERT_EQ(row_nnz.get_num_elems(), refrnnz.get_num_elems()); + for (gko::size_type i = 0; i < this->mtx2->get_size()[0]; i++) + ASSERT_EQ(row_nnz_val[i], refrnnz.get_const_data()[i]); +} + + +// TYPED_TEST(Fbcsr, CalculatesTotalCols) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // gko::size_type total_cols; @@ -1173,21 +1263,22 @@ GKO_NOT_IMPLEMENTED; TYPED_TEST(Fbcsr, RecognizeSortedMatrix) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// ASSERT_TRUE(this->mtx->is_sorted_by_column_index()); -// ASSERT_TRUE(this->mtx2->is_sorted_by_column_index()); -// ASSERT_TRUE(this->mtx3_sorted->is_sorted_by_column_index()); -//} +{ + ASSERT_TRUE(this->mtx->is_sorted_by_column_index()); +} TYPED_TEST(Fbcsr, RecognizeUnsortedMatrix) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// ASSERT_FALSE(this->mtx3_unsorted->is_sorted_by_column_index()); -//} +{ + using Fbcsr = typename TestFixture::Mtx; + using index_type = typename TestFixture::index_type; + + // auto cpmat = Fbcsr::create(this->exec, this->mtx->get_strategy()); + auto cpmat = this->mtx->clone(); + index_type *const colinds = cpmat->get_col_idxs(); + std::swap(colinds[0], colinds[1]); + ASSERT_FALSE(cpmat->is_sorted_by_column_index()); +} TYPED_TEST(Fbcsr, SortSortedMatrix) @@ -1215,19 +1306,16 @@ GKO_NOT_IMPLEMENTED; TYPED_TEST(Fbcsr, ExtractsDiagonal) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using T = typename TestFixture::value_type; -// auto matrix = this->mtx3_unsorted->clone(); -// auto diag = matrix->extract_diagonal(); -// -// ASSERT_EQ(diag->get_size()[0], 3); -// ASSERT_EQ(diag->get_size()[1], 3); -// ASSERT_EQ(diag->get_values()[0], T{0.}); -// ASSERT_EQ(diag->get_values()[1], T{1.}); -// ASSERT_EQ(diag->get_values()[2], T{3.}); -//} +{ + using T = typename TestFixture::value_type; + auto matrix = this->mtx2->clone(); + auto diag = matrix->extract_diagonal(); + + ASSERT_EQ(this->m2diag->get_size(), diag->get_size()); + for (gko::size_type i = 0; i < this->m2diag->get_size()[0]; i++) + ASSERT_EQ(this->m2diag->get_const_values()[i], + diag->get_const_values()[i]); +} TYPED_TEST(Fbcsr, InplaceAbsolute) From 34cf398460305c72b377a4fbf7cbfa5edfe2af91 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Sun, 15 Nov 2020 23:24:10 +0100 Subject: [PATCH 08/58] [tests fail] added fbcsr transpose and test for square matrix --- core/matrix/fbcsr.cpp | 22 +++-- core/test/matrix/fbcsr_sample.cpp | 77 +++++++++++++++++ core/test/matrix/fbcsr_sample.hpp | 38 ++++++--- reference/matrix/fbcsr_kernels.cpp | 108 +++++++++++++----------- reference/test/matrix/fbcsr_kernels.cpp | 97 +++++++++------------ 5 files changed, 215 insertions(+), 127 deletions(-) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 68defa46f4b..8b239617991 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -556,18 +556,16 @@ void Fbcsr::write(mat_data &data) const template std::unique_ptr Fbcsr::transpose() const - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto exec = this->get_executor(); -// auto trans_cpy = -// Fbcsr::create(exec, gko::transpose(this->get_size()), -// this->get_num_stored_elements(), this->get_strategy()); -// -// exec->run(fbcsr::make_transpose(this, trans_cpy.get())); -// trans_cpy->make_srow(); -// return std::move(trans_cpy); -//} +{ + auto exec = this->get_executor(); + auto trans_cpy = Fbcsr::create(exec, gko::transpose(this->get_size()), + this->get_num_stored_elements(), bs_, + this->get_strategy()); + + exec->run(fbcsr::make_transpose(this, trans_cpy.get())); + trans_cpy->make_srow(); + return std::move(trans_cpy); +} template diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index f8d8136e23f..aa8299d33c8 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -508,5 +508,82 @@ void FbcsrSample2::apply( class FbcsrSample2 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_TEST_SAMPLE_2); + +template +FbcsrSampleSquare::FbcsrSampleSquare( + const std::shared_ptr rexec) + : nrows{4}, + ncols{4}, + nnz{8}, + nbrows{2}, + nbcols{2}, + nbnz{2}, + bs{2}, + exec(rexec) +{} + +template +std::unique_ptr> +FbcsrSampleSquare::generate_fbcsr() const +{ + std::unique_ptr mtx = + Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + nnz, bs, std::make_shared>()); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + index_type *const s = mtx->get_srow(); + r[0] = 0; + r[1] = 1; + r[2] = 2; + c[0] = 1; + c[1] = 1; + + for (IndexType i = 0; i < nnz; i++) v[i] = i; + + return mtx; +} + +template +std::unique_ptr> +FbcsrSampleSquare::generate_transpose_fbcsr() const +{ + std::unique_ptr mtx = + Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + nnz, bs, std::make_shared>()); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + index_type *const s = mtx->get_srow(); + r[0] = 0; + r[1] = 0; + r[2] = 2; + c[0] = 0; + c[1] = 1; + + gko::blockutils::DenseBlocksView vals(v, bs, bs); + vals(0, 0, 0) = 0; + vals(0, 0, 1) = 2; + vals(0, 1, 0) = 1; + vals(0, 1, 1) = 3; + vals(1, 0, 0) = 4; + vals(1, 0, 1) = 6; + vals(1, 1, 0) = 5; + vals(1, 1, 1) = 7; + + return mtx; +} + +#define GKO_DECLARE_FBCSR_TEST_SAMPLE_SQUARE(ValueType, IndexType) \ + class FbcsrSampleSquare +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TEST_SAMPLE_SQUARE); + } // namespace testing } // namespace gko diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 05b1a2f76ae..a0990887477 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -120,18 +120,6 @@ class FbcsrSample2 { void apply(const Dense *x, Dense *y) const; - // std::unique_ptr generate_csr() const; - - // std::unique_ptr generate_dense() const; - - // std::unique_ptr generate_coo() const; - - // std::unique_ptr generate_sparsity_csr() const; - - // MatData generate_matrix_data() const; - - // MatData generate_matrix_data_with_explicit_zeros() const; - gko::Array getNonzerosPerRow() const; template @@ -151,6 +139,32 @@ class FbcsrSample2 { const std::shared_ptr exec; }; +/// Generates the a sample block CSR square matrix in different formats +/** This currently a 4 x 4 matrix with 2x2 blocks. + */ +template +class FbcsrSampleSquare { +public: + using value_type = ValueType; + using index_type = IndexType; + using Fbcsr = gko::matrix::Fbcsr; + + FbcsrSampleSquare(std::shared_ptr exec); + + std::unique_ptr generate_fbcsr() const; + + std::unique_ptr generate_transpose_fbcsr() const; + + const size_type nrows; + const size_type ncols; + const size_type nnz; + const size_type nbrows; + const size_type nbcols; + const size_type nbnz; + const int bs; + const std::shared_ptr exec; +}; + } // namespace testing } // namespace gko diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 938f146e4a0..fe1ec87061e 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -652,62 +652,74 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL); -template -inline void convert_fbcsr_to_csc(size_type num_rows, const IndexType *row_ptrs, - const IndexType *col_idxs, - const ValueType *fbcsr_vals, - IndexType *row_idxs, IndexType *col_ptrs, - ValueType *csc_vals, - UnaryOperator op) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// for (size_type row = 0; row < num_rows; ++row) { -// for (auto i = row_ptrs[row]; i < row_ptrs[row + 1]; ++i) { -// const auto dest_idx = col_ptrs[col_idxs[i]]++; -// row_idxs[dest_idx] = row; -// csc_vals[dest_idx] = op(fbcsr_vals[i]); -// } -// } -//} +template +void convert_fbcsr_to_fbcsc(const size_type num_blk_rows, const int blksz, + const IndexType *const row_ptrs, + const IndexType *const col_idxs, + const ValueType *const fbcsr_vals, + IndexType *const row_idxs, + IndexType *const col_ptrs, + ValueType *const csc_vals, UnaryOperator op) +{ + const gko::blockutils::DenseBlocksView rvalues( + fbcsr_vals, blksz, blksz); + gko::blockutils::DenseBlocksView cvalues( + csc_vals, blksz, blksz); + for (size_type brow = 0; brow < num_blk_rows; ++brow) { + for (auto i = row_ptrs[brow]; i < row_ptrs[brow + 1]; ++i) { + const auto dest_idx = col_ptrs[col_idxs[i]]++; + row_idxs[dest_idx] = brow; + for (int ib = 0; ib < blksz; ib++) + for (int jb = 0; jb < blksz; jb++) + // csc_vals[dest_idx] = op(fbcsr_vals[i]); + cvalues(dest_idx, ib, jb) = + op(transpose_blocks ? rvalues(i, jb, ib) + : rvalues(i, ib, jb)); + } + } +} template -void transpose_and_transform(std::shared_ptr exec, - matrix::Fbcsr *trans, - const matrix::Fbcsr *orig, - UnaryOperator op) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto trans_row_ptrs = trans->get_row_ptrs(); -// auto orig_row_ptrs = orig->get_const_row_ptrs(); -// auto trans_col_idxs = trans->get_col_idxs(); -// auto orig_col_idxs = orig->get_const_col_idxs(); -// auto trans_vals = trans->get_values(); -// auto orig_vals = orig->get_const_values(); -// -// auto orig_num_cols = orig->get_size()[1]; -// auto orig_num_rows = orig->get_size()[0]; -// auto orig_nnz = orig_row_ptrs[orig_num_rows]; -// -// trans_row_ptrs[0] = 0; -// convert_idxs_to_ptrs(orig_col_idxs, orig_nnz, trans_row_ptrs + 1, -// orig_num_cols); -// -// convert_fbcsr_to_csc(orig_num_rows, orig_row_ptrs, orig_col_idxs, -// orig_vals, -// trans_col_idxs, trans_row_ptrs + 1, trans_vals, op); -//} +void transpose_and_transform( + const std::shared_ptr exec, + matrix::Fbcsr *const trans, + const matrix::Fbcsr *const orig, UnaryOperator op) +{ + const int bs = orig->get_block_size(); + auto trans_row_ptrs = trans->get_row_ptrs(); + auto orig_row_ptrs = orig->get_const_row_ptrs(); + auto trans_col_idxs = trans->get_col_idxs(); + auto orig_col_idxs = orig->get_const_col_idxs(); + auto trans_vals = trans->get_values(); + auto orig_vals = orig->get_const_values(); + + auto orig_num_cols = orig->get_size()[1]; + const size_type nbcols = + gko::blockutils::getNumFixedBlocks(bs, orig_num_cols); + auto orig_num_rows = orig->get_size()[0]; + const size_type nbrows = + gko::blockutils::getNumFixedBlocks(bs, orig_num_rows); + auto orig_nbnz = orig_row_ptrs[nbrows]; + + trans_row_ptrs[0] = 0; + convert_idxs_to_ptrs(orig_col_idxs, orig_nbnz, trans_row_ptrs + 1, nbcols); + + convert_fbcsr_to_fbcsc( + nbrows, bs, orig_row_ptrs, orig_col_idxs, orig_vals, trans_col_idxs, + trans_row_ptrs + 1, trans_vals, op); +} template void transpose(std::shared_ptr exec, const matrix::Fbcsr *orig, - matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// transpose_and_transform(exec, trans, orig, -// [](const ValueType x) { return x; }); -//} + matrix::Fbcsr *trans) +{ + transpose_and_transform(exec, trans, orig, + [](const ValueType x) { return x; }); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 4ed6bad3b8c..50407efb317 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -90,6 +90,7 @@ class Fbcsr : public ::testing::Test { : exec(gko::ReferenceExecutor::create()), fbsample(exec), fbsample2(exec), + fbsamplesquare(exec), mtx(fbsample.generate_fbcsr()), refmtx(fbsample.generate_fbcsr()), refcsrmtx(fbsample.generate_csr()), @@ -97,7 +98,8 @@ class Fbcsr : public ::testing::Test { refcoomtx(fbsample.generate_coo()), refspcmtx(fbsample.generate_sparsity_csr()), mtx2(fbsample2.generate_fbcsr()), - m2diag(fbsample2.extract_diagonal()) + m2diag(fbsample2.extract_diagonal()), + mtxsq(fbsamplesquare.generate_fbcsr()) {} // void create_mtx3(Mtx *sorted, Mtx *unsorted) @@ -167,6 +169,8 @@ class Fbcsr : public ::testing::Test { std::shared_ptr exec; const gko::testing::FbcsrSample fbsample; const gko::testing::FbcsrSample2 fbsample2; + const gko::testing::FbcsrSampleSquare + fbsamplesquare; std::unique_ptr mtx; const std::unique_ptr refmtx; const std::unique_ptr refcsrmtx; @@ -175,6 +179,7 @@ class Fbcsr : public ::testing::Test { const std::unique_ptr refspcmtx; const std::unique_ptr mtx2; const std::unique_ptr m2diag; + const std::unique_ptr mtxsq; }; TYPED_TEST_CASE(Fbcsr, gko::test::ValueIndexTypes); @@ -336,8 +341,8 @@ TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseMatrix) } -TYPED_TEST(Fbcsr, AppliesToFbcsrMatrix) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, AppliesToFbcsrMatrix) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using T = typename TestFixture::value_type; @@ -369,8 +374,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, AppliesLinearCombinationToFbcsrMatrix) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, AppliesLinearCombinationToFbcsrMatrix) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Vec = typename TestFixture::Vec; @@ -407,8 +412,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, AppliesLinearCombinationToIdentityMatrix) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, AppliesLinearCombinationToIdentityMatrix) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using T = typename TestFixture::value_type; @@ -442,39 +447,33 @@ GKO_NOT_IMPLEMENTED; TYPED_TEST(Fbcsr, ApplyFailsOnWrongInnerDimension) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Vec = typename TestFixture::Vec; -// auto x = Vec::create(this->exec, gko::dim<2>{2}); -// auto y = Vec::create(this->exec, gko::dim<2>{2}); -// -// ASSERT_THROW(this->mtx->apply(x.get(), y.get()), gko::DimensionMismatch); -//} +{ + using Vec = typename TestFixture::Vec; + auto x = Vec::create(this->exec, gko::dim<2>{2}); + auto y = Vec::create(this->exec, gko::dim<2>{this->fbsample.nrows}); + + ASSERT_THROW(this->mtx->apply(x.get(), y.get()), gko::DimensionMismatch); +} TYPED_TEST(Fbcsr, ApplyFailsOnWrongNumberOfRows) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Vec = typename TestFixture::Vec; -// auto x = Vec::create(this->exec, gko::dim<2>{3, 2}); -// auto y = Vec::create(this->exec, gko::dim<2>{3, 2}); -// -// ASSERT_THROW(this->mtx->apply(x.get(), y.get()), gko::DimensionMismatch); -//} +{ + using Vec = typename TestFixture::Vec; + auto x = Vec::create(this->exec, gko::dim<2>{this->fbsample.ncols, 2}); + auto y = Vec::create(this->exec, gko::dim<2>{3, 2}); + + ASSERT_THROW(this->mtx->apply(x.get(), y.get()), gko::DimensionMismatch); +} TYPED_TEST(Fbcsr, ApplyFailsOnWrongNumberOfCols) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Vec = typename TestFixture::Vec; -// auto x = Vec::create(this->exec, gko::dim<2>{3}); -// auto y = Vec::create(this->exec, gko::dim<2>{2}); -// -// ASSERT_THROW(this->mtx->apply(x.get(), y.get()), gko::DimensionMismatch); -//} +{ + using Vec = typename TestFixture::Vec; + auto x = Vec::create(this->exec, gko::dim<2>{this->fbsample.ncols, 3}); + auto y = Vec::create(this->exec, gko::dim<2>{this->fbsample.nrows, 2}); + + ASSERT_THROW(this->mtx->apply(x.get(), y.get()), gko::DimensionMismatch); +} TYPED_TEST(Fbcsr, ConvertsToPrecision) @@ -1014,27 +1013,15 @@ TYPED_TEST(Fbcsr, CalculatesNonzerosPerRow) TYPED_TEST(Fbcsr, SquareMtxIsTransposable) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Fbcsr = typename TestFixture::Mtx; -// // clang-format off -// auto mtx2 = gko::initialize( -// {{1.0, 3.0, 2.0}, -// {0.0, 5.0, 0.0}, -// {0.0, 1.5, 2.0}}, this->exec); -// // clang-format on -// -// auto trans = mtx2->transpose(); -// auto trans_as_fbcsr = static_cast(trans.get()); -// -// // clang-format off -// GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, -// l({{1.0, 0.0, 0.0}, -// {3.0, 5.0, 1.5}, -// {2.0, 0.0, 2.0}}), 0.0); -// // clang-format on -//} +{ + using Fbcsr = typename TestFixture::Mtx; + auto reftmtx = this->fbsamplesquare.generate_transpose_fbcsr(); + + auto trans = this->mtxsq->transpose(); + auto trans_as_fbcsr = static_cast(trans.get()); + + GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftmtx, 0.0); +} TYPED_TEST(Fbcsr, NonSquareMtxIsTransposable) From 1c1f9912ba4da24af4530fa884f08ca4bfaa1b22 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Mon, 16 Nov 2020 15:25:53 +0100 Subject: [PATCH 09/58] [tests fail] enabled fbcsr non-square transpose test --- core/test/matrix/fbcsr_sample.cpp | 42 +++++++++++++++++++++++++ core/test/matrix/fbcsr_sample.hpp | 2 ++ reference/test/matrix/fbcsr_kernels.cpp | 23 ++++++-------- 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index aa8299d33c8..d0cbe649efe 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -452,6 +452,48 @@ FbcsrSample2::generate_fbcsr() const return mtx; } +template +std::unique_ptr> +FbcsrSample2::generate_transpose_fbcsr() const +{ + std::unique_ptr mtx = + Fbcsr::create(exec, + gko::dim<2>{static_cast(ncols), + static_cast(nrows)}, + nnz, bs, std::make_shared>()); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + index_type *const s = mtx->get_srow(); + r[0] = 0; + r[1] = 2; + r[2] = 2; + r[3] = 3; + r[4] = 4; + c[0] = 0; + c[1] = 1; + c[2] = 2; + c[3] = 1; + + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + + v[0] = 1; + v[1] = 3; + v[2] = 2; + v[3] = 0; + v[8] = -12; + v[9] = -2; + v[10] = -1; + v[11] = -11; + v[13] = 0; + v[15] = 0; + + for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) s[is] = 0; + + return mtx; +} + template std::unique_ptr> FbcsrSample2::extract_diagonal() const diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index a0990887477..48a68343d94 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -116,6 +116,8 @@ class FbcsrSample2 { std::unique_ptr generate_fbcsr() const; + std::unique_ptr generate_transpose_fbcsr() const; + std::unique_ptr extract_diagonal() const; void apply(const Dense *x, Dense *y) const; diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 50407efb317..229fcb20e26 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -1025,20 +1025,15 @@ TYPED_TEST(Fbcsr, SquareMtxIsTransposable) TYPED_TEST(Fbcsr, NonSquareMtxIsTransposable) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Fbcsr = typename TestFixture::Mtx; -// auto trans = this->mtx->transpose(); -// auto trans_as_fbcsr = static_cast(trans.get()); -// -// // clang-format off -// GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, -// l({{1.0, 0.0}, -// {3.0, 5.0}, -// {2.0, 0.0}}), 0.0); -// // clang-format on -//} +{ + using Fbcsr = typename TestFixture::Mtx; + auto reftmtx = this->fbsample2.generate_transpose_fbcsr(); + + auto trans = this->mtx2->transpose(); + auto trans_as_fbcsr = static_cast(trans.get()); + + GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftmtx, 0.0); +} TYPED_TEST(Fbcsr, SquareMatrixIsRowPermutable) From 848bc8a11b74c28171d31b59cf4a40699cbbd83a Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 18 Nov 2020 18:29:41 +0100 Subject: [PATCH 10/58] [tests fail] fbcsr absolute value tests pass --- core/matrix/fbcsr.cpp | 62 ++++----- core/test/matrix/fbcsr_sample.cpp | 174 ++++++++++++++++++++++-- core/test/matrix/fbcsr_sample.hpp | 39 ++++++ reference/test/matrix/fbcsr_kernels.cpp | 136 +++++++++--------- 4 files changed, 298 insertions(+), 113 deletions(-) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 8b239617991..d94aa5d6e81 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -674,14 +674,12 @@ void Fbcsr::sort_by_column_index() GKO_NOT_IMPLEMENTED; template bool Fbcsr::is_sorted_by_column_index() const - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto exec = this->get_executor(); -// bool is_sorted; -// exec->run(fbcsr::make_is_sorted_by_column_index(this, &is_sorted)); -// return is_sorted; -//} +{ + auto exec = this->get_executor(); + bool is_sorted; + exec->run(fbcsr::make_is_sorted_by_column_index(this, &is_sorted)); + return is_sorted; +} template @@ -701,35 +699,33 @@ Fbcsr::extract_diagonal() const template void Fbcsr::compute_absolute_inplace() - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto exec = this->get_executor(); -// -// exec->run(fbcsr::make_inplace_absolute_array( -// this->get_values(), this->get_num_stored_elements())); -//} +{ + auto exec = this->get_executor(); + + exec->run(fbcsr::make_inplace_absolute_array( + this->get_values(), this->get_num_stored_elements())); +} template std::unique_ptr::absolute_type> -Fbcsr::compute_absolute() const GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto exec = this->get_executor(); -// -// auto abs_fbcsr = absolute_type::create(exec, this->get_size(), -// this->get_num_stored_elements()); -// -// abs_fbcsr->col_idxs_ = col_idxs_; -// abs_fbcsr->row_ptrs_ = row_ptrs_; -// exec->run(fbcsr::make_outplace_absolute_array(this->get_const_values(), -// this->get_num_stored_elements(), -// abs_fbcsr->get_values())); -// -// convert_strategy_helper(abs_fbcsr.get()); -// return abs_fbcsr; -//} +Fbcsr::compute_absolute() const +{ + auto exec = this->get_executor(); + + auto abs_fbcsr = absolute_type::create(exec, this->get_size(), + this->get_num_stored_elements(), + this->get_block_size()); + + abs_fbcsr->col_idxs_ = col_idxs_; + abs_fbcsr->row_ptrs_ = row_ptrs_; + exec->run(fbcsr::make_outplace_absolute_array( + this->get_const_values(), this->get_num_stored_elements(), + abs_fbcsr->get_values())); + + convert_strategy_helper(abs_fbcsr.get()); + return abs_fbcsr; +} // TODO clean this up as soon as we improve strategy_type diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index d0cbe649efe..6e7815f96d2 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -40,12 +40,56 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define FBCSR_TEST_OFFSET 0.000011118888 +#define FBCSR_TEST_C_MAG 0.1 + FBCSR_TEST_OFFSET +#define FBCSR_TEST_IMAGINARY \ + sct(std::complex>(0, FBCSR_TEST_C_MAG)) + namespace gko { namespace testing { namespace matstr = gko::matrix::matrix_strategy; +/// Generates a copu of the given matrix with a different scalar type +/** \tparam AbsValueType The scalar type of the output matrix + */ +template +static std::unique_ptr< + gko::matrix::Fbcsr> +generate_acopy_impl(const FbcsrType *const mat) +{ + // using AbsValueType = typename gko::remove_complex; + using index_type = typename FbcsrType::index_type; + using value_type = typename FbcsrType::value_type; + using AbsFbcsr = gko::matrix::Fbcsr; + using classical = matstr::classical; + + std::shared_ptr exec = + std::dynamic_pointer_cast(mat->get_executor()); + + std::unique_ptr amat = + AbsFbcsr::create(exec, mat->get_size(), mat->get_num_stored_elements(), + mat->get_block_size(), std::make_shared()); + + const index_type *const colidxs = mat->get_col_idxs(); + const index_type *const rowptrs = mat->get_row_ptrs(); + index_type *const acolidxs = amat->get_col_idxs(); + index_type *const arowptrs = amat->get_row_ptrs(); + // blockutils + for (index_type i = 0; + i < mat->get_num_stored_elements() / + (mat->get_block_size() * mat->get_block_size()); + i++) + acolidxs[i] = colidxs[i]; + // blockutils + for (index_type i = 0; i < mat->get_size()[0] / mat->get_block_size() + 1; + i++) + arowptrs[i] = rowptrs[i]; + + return amat; +} + + template FbcsrSample::FbcsrSample( const std::shared_ptr rexec) @@ -88,6 +132,7 @@ FbcsrSample::generate_fbcsr() const mtx->get_size()[0], mtx->get_size()[1], "block size does not divide the size!"); + // blockutils for (index_type ibrow = 0; ibrow < mtx->get_size()[0] / bs; ibrow++) { const index_type *const browptr = mtx->get_row_ptrs(); for (index_type inz = browptr[ibrow]; inz < browptr[ibrow + 1]; inz++) { @@ -106,14 +151,70 @@ FbcsrSample::generate_fbcsr() const vals(0, 2, 2) = gko::zero(); vals(3, 0, 0) = gko::zero(); - v[34] += FBCSR_TEST_OFFSET; - v[35] += FBCSR_TEST_OFFSET; + v[34] += FBCSR_TEST_IMAGINARY; + v[35] += FBCSR_TEST_IMAGINARY; for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) s[is] = 0; return mtx; } +template +template +void FbcsrSample::correct_abs_for_complex_values( + FbcsrType *const amat) const +{ + using out_value_type = typename FbcsrType::value_type; + using outreal_type = remove_complex; + out_value_type *const avals = amat->get_values(); + if (is_complex()) { + auto mo = static_cast(FBCSR_TEST_C_MAG); + avals[34] = sqrt(pow(static_cast(13.0), 2) + + pow(static_cast(mo), 2)); + avals[35] = sqrt(pow(static_cast(14.0), 2) + + pow(static_cast(mo), 2)); + } +} + +template +std::unique_ptr, IndexType>> +FbcsrSample::generate_abs_fbcsr_abstype() const +{ + using AbsValueType = typename gko::remove_complex; + using AbsFbcsr = gko::matrix::Fbcsr; + + const std::unique_ptr mat = generate_fbcsr(); + std::unique_ptr amat = + generate_acopy_impl(mat.get()); + + AbsValueType *const avals = amat->get_values(); + const ValueType *const vals = mat->get_values(); + for (IndexType i = 0; i < amat->get_num_stored_elements(); i++) + avals[i] = abs(vals[i]); + + correct_abs_for_complex_values(amat.get()); + + return amat; +} + +template +std::unique_ptr> +FbcsrSample::generate_abs_fbcsr() const +{ + const std::unique_ptr mat = generate_fbcsr(); + std::unique_ptr amat = + generate_acopy_impl(mat.get()); + + ValueType *const avals = amat->get_values(); + const ValueType *const vals = mat->get_values(); + for (IndexType i = 0; i < amat->get_num_stored_elements(); i++) + avals[i] = abs(vals[i]); + + correct_abs_for_complex_values(amat.get()); + + return amat; +} + template std::unique_ptr> FbcsrSample::generate_csr() const @@ -214,8 +315,8 @@ FbcsrSample::generate_csr() const csrvals[34] = 13; csrvals[35] = 14; - csrvals[34] += FBCSR_TEST_OFFSET; - csrvals[35] += FBCSR_TEST_OFFSET; + csrvals[34] += FBCSR_TEST_IMAGINARY; + csrvals[35] += FBCSR_TEST_IMAGINARY; return csrm; } @@ -241,8 +342,8 @@ FbcsrSample::generate_dense() const } densem->at(2, 3) = densem->at(2, 5) = densem->at(3, 6) = 0.0; - densem->at(5, 7) += FBCSR_TEST_OFFSET; - densem->at(5, 8) += FBCSR_TEST_OFFSET; + densem->at(5, 7) += FBCSR_TEST_IMAGINARY; + densem->at(5, 8) += FBCSR_TEST_IMAGINARY; return densem; } @@ -287,8 +388,8 @@ FbcsrSample::generate_matrix_data() const {4, 7, 10.0}, {4, 8, 11.0}, {5, 6, 12.0}, - {5, 7, 13.0 + FBCSR_TEST_OFFSET}, - {5, 8, 14.0 + FBCSR_TEST_OFFSET}}}); + {5, 7, sct(13.0) + FBCSR_TEST_IMAGINARY}, + {5, 8, sct(14.0) + FBCSR_TEST_IMAGINARY}}}); } // Assuming row-major blocks @@ -334,8 +435,8 @@ gko::matrix_data FbcsrSample< {4, 7, 10.0}, {4, 8, 11.0}, {5, 6, 12.0}, - {5, 7, 13.0 + FBCSR_TEST_OFFSET}, - {5, 8, 14.0 + FBCSR_TEST_OFFSET}}}); + {5, 7, sct(13.0) + FBCSR_TEST_IMAGINARY}, + {5, 8, sct(14.0) + FBCSR_TEST_IMAGINARY}}}); } template @@ -452,6 +553,59 @@ FbcsrSample2::generate_fbcsr() const return mtx; } +template +std::unique_ptr, IndexType>> +FbcsrSample2::generate_abs_fbcsr_abstype() const +{ + using AbsValueType = typename gko::remove_complex; + using AbsFbcsr = gko::matrix::Fbcsr; + + const std::unique_ptr mat = generate_fbcsr(); + std::unique_ptr amat = + generate_acopy_impl(mat.get()); + + AbsValueType *const v = amat->get_values(); + + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + v[0] = 1; + v[1] = 2; + v[2] = 3; + v[3] = 0; + v[10] = 0; + v[11] = 0; + v[12] = 12; + v[13] = 1; + v[14] = 2; + v[15] = 11; + + return amat; +} + +template +std::unique_ptr> +FbcsrSample2::generate_abs_fbcsr() const +{ + const std::unique_ptr mat = generate_fbcsr(); + std::unique_ptr amat = + generate_acopy_impl(mat.get()); + + ValueType *const v = amat->get_values(); + + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + v[0] = 1.0; + v[1] = 2.0; + v[2] = 3.0; + v[3] = 0.0; + v[10] = 0.0; + v[11] = 0.0; + v[12] = 12.0; + v[13] = 1.0; + v[14] = 2.0; + v[15] = 11.0; + + return amat; +} + template std::unique_ptr> FbcsrSample2::generate_transpose_fbcsr() const diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 48a68343d94..51208c3dfb7 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -55,6 +55,7 @@ class FbcsrSample { public: using value_type = ValueType; using index_type = IndexType; + using absvalue_type = remove_complex; using Fbcsr = gko::matrix::Fbcsr; using Csr = gko::matrix::Csr; using Coo = gko::matrix::Coo; @@ -86,6 +87,12 @@ class FbcsrSample { /// (not block-row) gko::Array getNonzerosPerRow() const; + std::unique_ptr generate_abs_fbcsr() const; + + std::unique_ptr, index_type>> + generate_abs_fbcsr_abstype() const; + + const size_type nrows; const size_type ncols; const size_type nnz; @@ -94,6 +101,32 @@ class FbcsrSample { const size_type nbnz; const int bs; const std::shared_ptr exec; + +private: + // template + // void + // correct_abs_for_complex(gko::matrix::Fbcsr *amat) + // const; + template + void correct_abs_for_complex_values(FbcsrType *const mat) const; + + /// Enables complex data to be used for complex instantiations... + template + constexpr std::enable_if_t() || is_complex(), + ValueType> + sct(U u) const + { + return static_cast(u); + } + + /// ... while ignoring imaginary parts for real instantiations + template + constexpr std::enable_if_t() && !is_complex(), + ValueType> + sct(U u) const + { + return static_cast(u.real()); + } }; /// Generates the a sample block CSR matrix in different formats @@ -124,6 +157,12 @@ class FbcsrSample2 { gko::Array getNonzerosPerRow() const; + std::unique_ptr generate_abs_fbcsr() const; + + std::unique_ptr, index_type>> + generate_abs_fbcsr_abstype() const; + + /// Enables use of literals to instantiate value data template inline constexpr ValueType sct(U u) const { diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 229fcb20e26..5185e03b5e1 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -65,9 +66,6 @@ namespace { namespace matstr = gko::matrix::matrix_strategy; -constexpr int mat_bs = 1; - - template class Fbcsr : public ::testing::Test { protected: @@ -192,7 +190,7 @@ constexpr T get_some_number() } template -constexpr typename std::enable_if_t> get_some_number() +constexpr typename std::enable_if_t, T> get_some_number() { using RT = gko::remove_complex; return {static_cast(1.2), static_cast(3.4)}; @@ -1301,35 +1299,35 @@ TYPED_TEST(Fbcsr, ExtractsDiagonal) TYPED_TEST(Fbcsr, InplaceAbsolute) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Mtx = typename TestFixture::Mtx; -// auto mtx = gko::initialize( -// {{1.0, 2.0, -2.0}, {3.0, -5.0, 0.0}, {0.0, 1.0, -1.5}}, this->exec); -// -// mtx->compute_absolute_inplace(); -// -// GKO_ASSERT_MTX_NEAR( -// mtx, l({{1.0, 2.0, 2.0}, {3.0, 5.0, 0.0}, {0.0, 1.0, 1.5}}), 0.0); -//} +{ + using Mtx = typename TestFixture::Mtx; + auto mtx = this->fbsample2.generate_fbcsr(); + const std::unique_ptr refabs = + this->fbsample2.generate_abs_fbcsr(); + + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + const value_type *const refvals = refabs->get_const_values(); + + mtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, refabs, 0.0); +} TYPED_TEST(Fbcsr, OutplaceAbsolute) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Mtx = typename TestFixture::Mtx; -// auto mtx = gko::initialize( -// {{1.0, 2.0, -2.0}, {3.0, -5.0, 0.0}, {0.0, 1.0, -1.5}}, this->exec); -// -// auto abs_mtx = mtx->compute_absolute(); -// -// GKO_ASSERT_MTX_NEAR( -// abs_mtx, l({{1.0, 2.0, 2.0}, {3.0, 5.0, 0.0}, {0.0, 1.0, 1.5}}), 0.0); -// ASSERT_EQ(mtx->get_strategy()->get_name(), -// abs_mtx->get_strategy()->get_name()); -//} +{ + using Mtx = typename TestFixture::Mtx; + using AbsMtx = typename gko::remove_complex; + + auto mtx = this->fbsample2.generate_fbcsr(); + const std::unique_ptr refabs = + this->fbsample2.generate_abs_fbcsr_abstype(); + + auto abs_mtx = mtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, refabs, 0.0); +} template @@ -1373,49 +1371,47 @@ GKO_NOT_IMPLEMENTED; TYPED_TEST(FbcsrComplex, InplaceAbsolute) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Mtx = typename TestFixture::Mtx; -// using T = typename TestFixture::value_type; -// using index_type = typename TestFixture::index_type; -// auto exec = gko::ReferenceExecutor::create(); -// // clang-format off -// auto mtx = gko::initialize( -// {{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, -// {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, -// {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, exec); -// // clang-format on -// -// mtx->compute_absolute_inplace(); -// -// GKO_ASSERT_MTX_NEAR( -// mtx, l({{1.0, 5.0, 2.0}, {5.0, 1.0, 0.0}, {0.0, 1.5, 2.0}}), 0.0); -//} +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + gko::testing::FbcsrSample fbsample( + gko::ReferenceExecutor::create()); + auto mtx = fbsample.generate_fbcsr(); + + std::cout << " Generated fbcsr: " << mtx->get_values()[34] << ", " + << mtx->get_values()[35] << std::endl; + + const std::unique_ptr refabs = fbsample.generate_abs_fbcsr(); + + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + const value_type *const refvals = refabs->get_const_values(); + + mtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, refabs, 0.0); +} TYPED_TEST(FbcsrComplex, OutplaceAbsolute) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Mtx = typename TestFixture::Mtx; -// using T = typename TestFixture::value_type; -// using index_type = typename TestFixture::index_type; -// auto exec = gko::ReferenceExecutor::create(); -// // clang-format off -// auto mtx = gko::initialize( -// {{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, -// {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, -// {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, exec); -// // clang-format on -// -// auto abs_mtx = mtx->compute_absolute(); -// -// GKO_ASSERT_MTX_NEAR( -// abs_mtx, l({{1.0, 5.0, 2.0}, {5.0, 1.0, 0.0}, {0.0, 1.5, 2.0}}), 0.0); -// ASSERT_EQ(mtx->get_strategy()->get_name(), -// abs_mtx->get_strategy()->get_name()); -//} +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using AbsMtx = typename gko::remove_complex; + + gko::testing::FbcsrSample fbsample( + gko::ReferenceExecutor::create()); + + auto mtx = fbsample.generate_fbcsr(); + const std::unique_ptr refabs = + fbsample.generate_abs_fbcsr_abstype(); + + auto abs_mtx = mtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, refabs, 0.0); +} } // namespace From 70f6041cdc06eb935b01a52befe8272485b6d15a Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 18 Nov 2020 19:46:56 +0100 Subject: [PATCH 11/58] [tests fail] moved block utils to public interface; Fbcsr can now return number of block-rows and -columns --- core/components/fixed_block.hpp | 30 ---------- core/matrix/fbcsr.cpp | 31 +++-------- include/ginkgo/core/base/blockutils.hpp | 73 +++++++++++++++++++++++++ include/ginkgo/core/matrix/fbcsr.hpp | 14 ++++- include/ginkgo/ginkgo.hpp | 1 + reference/matrix/fbcsr_kernels.cpp | 23 ++++---- 6 files changed, 106 insertions(+), 66 deletions(-) create mode 100644 include/ginkgo/core/base/blockutils.hpp diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index f2ff770cafb..8b6c433dfff 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -43,36 +43,6 @@ namespace gko { namespace blockutils { -/// Error that denotes issues between block sizes and matrix dimensions -template -class BlockSizeError : public Error { -public: - BlockSizeError(const std::string &file, const int line, - const int block_size, const IndexType size) - : Error(file, line, - " block size = " + std::to_string(block_size) + - ", size = " + std::to_string(size)) - {} -}; - -/// Error that denotes issues between block sizes and matrix dimensions -template -class BlockReadError : public Error { -public: - BlockReadError(const std::string &file, const int line, - const std::string &msg) - : Error(file, line, msg) - {} -}; - -template -IndexType getNumFixedBlocks(const int block_size, const IndexType size) -{ - if (size % block_size != 0) - throw BlockSizeError(__FILE__, __LINE__, block_size, size); - return size / block_size; -} - /// A dense block of values with compile-time constant dimensions /** The blocks are stored row-major. However, in future, * a layout template parameter can be added if needed. diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index d94aa5d6e81..0153c3b77ae 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -104,27 +104,14 @@ Fbcsr::Fbcsr(std::shared_ptr exec, std::shared_ptr strategy) : EnableLinOp(exec, size), bs_{block_size}, + nbcols_{gko::blockutils::getNumBlocks(block_size, size[1])}, values_(exec, num_nonzeros), - col_idxs_(exec, gko::blockutils::getNumFixedBlocks( - block_size * block_size, num_nonzeros)), - row_ptrs_(exec, - gko::blockutils::getNumFixedBlocks(block_size, size[0]) + 1), + col_idxs_(exec, gko::blockutils::getNumBlocks(block_size * block_size, + num_nonzeros)), + row_ptrs_(exec, gko::blockutils::getNumBlocks(block_size, size[0]) + 1), startrow_(exec, strategy->calc_size(num_nonzeros)), strategy_(strategy->copy()) -{ - if (size[0] % bs_ != 0) - throw gko::BadDimension(__FILE__, __LINE__, __func__, "construct", - size[0], size[1], - "block size does not divide the dim 0!"); - if (size[1] % bs_ != 0) - throw gko::BadDimension(__FILE__, __LINE__, __func__, "construct", - size[0], size[1], - "block size does not divide the dim 1!"); - if (num_nonzeros % (bs_ * bs_) != 0) - throw gko::BadDimension(__FILE__, __LINE__, __func__, "construct", - size[0], size[1], - "block size^2 does not divide NNZ!"); -} +{} template @@ -360,13 +347,13 @@ template void Fbcsr::convert_to( SparsityCsr *result) const { - using gko::blockutils::getNumFixedBlocks; + using gko::blockutils::getNumBlocks; auto exec = this->get_executor(); auto tmp = SparsityCsr::create( exec, - gko::dim<2>{getNumFixedBlocks(bs_, this->get_size()[0]), - getNumFixedBlocks(bs_, this->get_size()[1])}, - getNumFixedBlocks(bs_ * bs_, this->get_num_stored_elements())); + gko::dim<2>{getNumBlocks(bs_, this->get_size()[0]), + getNumBlocks(bs_, this->get_size()[1])}, + getNumBlocks(bs_ * bs_, this->get_num_stored_elements())); tmp->col_idxs_ = this->col_idxs_; tmp->row_ptrs_ = this->row_ptrs_; diff --git a/include/ginkgo/core/base/blockutils.hpp b/include/ginkgo/core/base/blockutils.hpp new file mode 100644 index 00000000000..4bd4d3d040d --- /dev/null +++ b/include/ginkgo/core/base/blockutils.hpp @@ -0,0 +1,73 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#ifndef GINKGO_CORE_BASE_BLOCKUTILS_HPP_ +#define GINKGO_CORE_BASE_BLOCKUTILS_HPP_ + + +#include + + +namespace gko { +namespace blockutils { + + +/// Error that denotes issues between block sizes and matrix dimensions +template +class BlockSizeError : public Error { +public: + BlockSizeError(const std::string &file, const int line, + const int block_size, const IndexType size) + : Error(file, line, + " block size = " + std::to_string(block_size) + + ", size = " + std::to_string(size)) + {} +}; + + +/** Returns the quotient of the second arg divided by the first + * but throws when they don't divide + */ +template +IndexType getNumBlocks(const int block_size, const IndexType size) +{ + if (size % block_size != 0) + throw BlockSizeError(__FILE__, __LINE__, block_size, size); + return size / block_size; +} + + +} // namespace blockutils +} // namespace gko + +#endif diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 836169ba0c3..141c1a3c3eb 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include @@ -376,6 +377,13 @@ class Fbcsr : public EnableLinOp>, void set_block_size(const int block_size) { bs_ = block_size; } + index_type get_num_block_rows() const + { + return row_ptrs_.get_num_elems() - 1; + } + + index_type get_num_block_cols() const { return nbcols_; } + protected: using classical = matrix_strategy::classical>; @@ -434,6 +442,7 @@ class Fbcsr : public EnableLinOp>, std::shared_ptr strategy = std::make_shared()) : EnableLinOp(exec, size), bs_{block_size}, + nbcols_{gko::blockutils::getNumBlocks(block_size, size[1])}, values_{exec, std::forward(values)}, col_idxs_{exec, std::forward(col_idxs)}, row_ptrs_{exec, std::forward(row_ptrs)}, @@ -461,12 +470,13 @@ class Fbcsr : public EnableLinOp>, void make_srow() { startrow_.resize_and_reset( - strategy_->calc_size(values_.get_num_elems() / bs_ / bs_)); + strategy_->calc_size(col_idxs_.get_num_elems())); strategy_->process(row_ptrs_, &startrow_); } private: - int bs_; ///< Block size + int bs_; ///< Block size + size_type nbcols_; ///< Number of block-columns Array values_; Array col_idxs_; Array row_ptrs_; diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 30408b8970b..541980beaa3 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index fe1ec87061e..9601df09d1d 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include @@ -78,7 +79,7 @@ void spmv(const std::shared_ptr exec, const int bs = a->get_block_size(); const IndexType nvecs = static_cast(b->get_size()[1]); const IndexType nbrows = - gko::blockutils::getNumFixedBlocks(bs, a->get_size()[0]); + gko::blockutils::getNumBlocks(bs, a->get_size()[0]); auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); auto vals = a->get_const_values(); @@ -121,7 +122,7 @@ void advanced_spmv(const std::shared_ptr exec, const int bs = a->get_block_size(); const IndexType nvecs = static_cast(b->get_size()[1]); const IndexType nbrows = - gko::blockutils::getNumFixedBlocks(bs, a->get_size()[0]); + gko::blockutils::getNumBlocks(bs, a->get_size()[0]); auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); auto vals = a->get_const_values(); @@ -417,9 +418,9 @@ void convert_to_dense(const std::shared_ptr exec, { const int bs = source->get_block_size(); const size_type nbrows = - gko::blockutils::getNumFixedBlocks(bs, source->get_size()[0]); + gko::blockutils::getNumBlocks(bs, source->get_size()[0]); const size_type nbcols = - gko::blockutils::getNumFixedBlocks(bs, source->get_size()[1]); + gko::blockutils::getNumBlocks(bs, source->get_size()[1]); const IndexType *const row_ptrs = source->get_const_row_ptrs(); const IndexType *const col_idxs = source->get_const_col_idxs(); const ValueType *const vals = source->get_const_values(); @@ -457,9 +458,9 @@ void convert_to_csr(const std::shared_ptr exec, { const int bs = source->get_block_size(); const size_type nbrows = - gko::blockutils::getNumFixedBlocks(bs, source->get_size()[0]); + gko::blockutils::getNumBlocks(bs, source->get_size()[0]); const size_type nbcols = - gko::blockutils::getNumFixedBlocks(bs, source->get_size()[1]); + gko::blockutils::getNumBlocks(bs, source->get_size()[1]); const IndexType *const browptrs = source->get_const_row_ptrs(); const IndexType *const bcolinds = source->get_const_col_idxs(); const ValueType *const bvals = source->get_const_values(); @@ -696,11 +697,9 @@ void transpose_and_transform( auto orig_vals = orig->get_const_values(); auto orig_num_cols = orig->get_size()[1]; - const size_type nbcols = - gko::blockutils::getNumFixedBlocks(bs, orig_num_cols); + const size_type nbcols = gko::blockutils::getNumBlocks(bs, orig_num_cols); auto orig_num_rows = orig->get_size()[0]; - const size_type nbrows = - gko::blockutils::getNumFixedBlocks(bs, orig_num_rows); + const size_type nbrows = gko::blockutils::getNumBlocks(bs, orig_num_rows); auto orig_nbnz = orig_row_ptrs[nbrows]; trans_row_ptrs[0] = 0; @@ -1019,7 +1018,7 @@ void is_sorted_by_column_index( const auto col_idxs = to_check->get_const_col_idxs(); const auto size = to_check->get_size(); const int bs = to_check->get_block_size(); - const size_type nbrows = gko::blockutils::getNumFixedBlocks(bs, size[0]); + const size_type nbrows = gko::blockutils::getNumBlocks(bs, size[0]); for (size_type i = 0; i < nbrows; ++i) { for (auto idx = row_ptrs[i] + 1; idx < row_ptrs[i + 1]; ++idx) { @@ -1048,7 +1047,7 @@ void extract_diagonal(std::shared_ptr exec, const int bs = orig->get_block_size(); const size_type diag_size = diag->get_size()[0]; const size_type nbrows = - gko::blockutils::getNumFixedBlocks(bs, orig->get_size()[0]); + gko::blockutils::getNumBlocks(bs, orig->get_size()[0]); auto diag_values = diag->get_values(); assert(diag_size == orig->get_size()[0]); From 6e94c92b35ad558f03a871d3ef9fa772c511cbbb Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Thu, 19 Nov 2020 17:28:04 +0100 Subject: [PATCH 12/58] [tests fail] conjugate transpose works for fbcsr, disabled other tests --- core/matrix/fbcsr.cpp | 22 +++-- core/test/matrix/fbcsr_sample.cpp | 104 ++++++++++++++++++++++ core/test/matrix/fbcsr_sample.hpp | 40 ++++++++- cuda/test/matrix/CMakeLists.txt | 2 +- hip/test/matrix/CMakeLists.txt | 2 +- omp/test/matrix/CMakeLists.txt | 2 +- reference/matrix/fbcsr_kernels.cpp | 66 ++++++-------- reference/test/matrix/fbcsr_kernels.cpp | 113 +++++++++--------------- 8 files changed, 223 insertions(+), 128 deletions(-) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 0153c3b77ae..7d2d94209aa 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -557,18 +557,16 @@ std::unique_ptr Fbcsr::transpose() const template std::unique_ptr Fbcsr::conj_transpose() const - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto exec = this->get_executor(); -// auto trans_cpy = -// Fbcsr::create(exec, gko::transpose(this->get_size()), -// this->get_num_stored_elements(), this->get_strategy()); -// -// exec->run(fbcsr::make_conj_transpose(this, trans_cpy.get())); -// trans_cpy->make_srow(); -// return std::move(trans_cpy); -//} +{ + auto exec = this->get_executor(); + auto trans_cpy = Fbcsr::create(exec, gko::transpose(this->get_size()), + this->get_num_stored_elements(), bs_, + this->get_strategy()); + + exec->run(fbcsr::make_conj_transpose(this, trans_cpy.get())); + trans_cpy->make_srow(); + return std::move(trans_cpy); +} template diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index 6e7815f96d2..b48562c2cf1 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -781,5 +781,109 @@ FbcsrSampleSquare::generate_transpose_fbcsr() const GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_TEST_SAMPLE_SQUARE); + +template +FbcsrSampleComplex::FbcsrSampleComplex( + const std::shared_ptr rexec) + : nrows{6}, + ncols{8}, + nnz{16}, + nbrows{3}, + nbcols{4}, + nbnz{4}, + bs{2}, + exec(rexec) +{} + +template +std::unique_ptr> +FbcsrSampleComplex::generate_fbcsr() const +{ + std::unique_ptr mtx = + Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + nnz, bs, std::make_shared>()); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + index_type *const s = mtx->get_srow(); + r[0] = 0; + r[1] = 1; + r[2] = 3; + r[3] = 4; + c[0] = 0; + c[1] = 0; + c[2] = 3; + c[3] = 2; + + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + + using namespace std::complex_literals; + v[0] = 1.0 + 1.15i; + v[1] = 2.0 + 2.15i; + v[2] = 3.0 - 3.15i; + v[3] = 0.0 - 0.15i; + v[10] = 0.0; + v[11] = 0.0; + v[12] = -12.0 + 12.15i; + v[13] = -1.0 + 1.15i; + v[14] = -2.0 - 2.15i; + v[15] = -11.0 - 11.15i; + + for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) s[is] = 0; + + return mtx; +} + +template +std::unique_ptr> +FbcsrSampleComplex::generate_conjtranspose_fbcsr() const +{ + std::unique_ptr mtx = + Fbcsr::create(exec, + gko::dim<2>{static_cast(ncols), + static_cast(nrows)}, + nnz, bs, std::make_shared>()); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + index_type *const s = mtx->get_srow(); + r[0] = 0; + r[1] = 2; + r[2] = 2; + r[3] = 3; + r[4] = 4; + c[0] = 0; + c[1] = 1; + c[2] = 2; + c[3] = 1; + + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + + using namespace std::complex_literals; + v[0] = 1.0 - 1.15i; + v[1] = 3.0 + 3.15i; + v[2] = 2.0 - 2.15i; + v[3] = 0.0 + 0.15i; + v[8] = -12.0 - 12.15i; + v[9] = -2.0 + 2.15i; + v[10] = -1.0 - 1.15i; + v[11] = -11.0 + 11.15i; + v[13] = 0; + v[15] = 0; + + for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) s[is] = 0; + + return mtx; +} + +template class FbcsrSampleComplex, int>; +template class FbcsrSampleComplex, int>; +template class FbcsrSampleComplex, long>; +template class FbcsrSampleComplex, long>; + } // namespace testing } // namespace gko diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 51208c3dfb7..8fe2b18497f 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -49,6 +49,7 @@ namespace testing { /// Generates the same sample block CSR matrix in different formats /** This currently a 6 x 12 matrix with 3x3 blocks. * Assumes that the layout within each block is row-major. + * Generates complex data when instantiated with a complex value type. */ template class FbcsrSample { @@ -103,10 +104,6 @@ class FbcsrSample { const std::shared_ptr exec; private: - // template - // void - // correct_abs_for_complex(gko::matrix::Fbcsr *amat) - // const; template void correct_abs_for_complex_values(FbcsrType *const mat) const; @@ -206,6 +203,41 @@ class FbcsrSampleSquare { const std::shared_ptr exec; }; +/// Generates the a sample block CSR matrix with complex values +/** This currently a 6 x 8 matrix with 2x2 blocks. + */ +template +class FbcsrSampleComplex { +public: + using value_type = ValueType; + using index_type = IndexType; + using Fbcsr = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using Coo = gko::matrix::Coo; + using Dense = gko::matrix::Dense; + using MatData = gko::matrix_data; + using SparCsr = gko::matrix::SparsityCsr; + using Diagonal = gko::matrix::Diagonal; + + static_assert(is_complex(), "Only for complex types!"); + + FbcsrSampleComplex(std::shared_ptr exec); + + std::unique_ptr generate_fbcsr() const; + + std::unique_ptr generate_conjtranspose_fbcsr() const; + + + const size_type nrows; + const size_type ncols; + const size_type nnz; + const size_type nbrows; + const size_type nbcols; + const size_type nbnz; + const int bs; + const std::shared_ptr exec; +}; + } // namespace testing } // namespace gko diff --git a/cuda/test/matrix/CMakeLists.txt b/cuda/test/matrix/CMakeLists.txt index 5be841b3d00..9d40716bea0 100644 --- a/cuda/test/matrix/CMakeLists.txt +++ b/cuda/test/matrix/CMakeLists.txt @@ -3,6 +3,6 @@ ginkgo_create_test(csr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) -ginkgo_create_test(fbcsr_kernels) +#ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(hybrid_kernels) ginkgo_create_test(sellp_kernels) diff --git a/hip/test/matrix/CMakeLists.txt b/hip/test/matrix/CMakeLists.txt index 4a32b5272f7..95d058e8069 100644 --- a/hip/test/matrix/CMakeLists.txt +++ b/hip/test/matrix/CMakeLists.txt @@ -3,6 +3,6 @@ ginkgo_create_hip_test(csr_kernels) ginkgo_create_hip_test(dense_kernels) ginkgo_create_hip_test(diagonal_kernels) ginkgo_create_hip_test(ell_kernels) -ginkgo_create_hip_test(fbcsr_kernels) +#ginkgo_create_hip_test(fbcsr_kernels) ginkgo_create_hip_test(hybrid_kernels) ginkgo_create_hip_test(sellp_kernels) diff --git a/omp/test/matrix/CMakeLists.txt b/omp/test/matrix/CMakeLists.txt index ff8ed2d0118..3c61fd32243 100644 --- a/omp/test/matrix/CMakeLists.txt +++ b/omp/test/matrix/CMakeLists.txt @@ -3,7 +3,7 @@ ginkgo_create_test(csr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) -ginkgo_create_test(fbcsr_kernels) +#ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(hybrid_kernels) ginkgo_create_test(sellp_kernels) ginkgo_create_test(sparsity_csr_kernels) diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 9601df09d1d..151661a875b 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -78,8 +78,7 @@ void spmv(const std::shared_ptr exec, { const int bs = a->get_block_size(); const IndexType nvecs = static_cast(b->get_size()[1]); - const IndexType nbrows = - gko::blockutils::getNumBlocks(bs, a->get_size()[0]); + const IndexType nbrows = a->get_num_block_rows(); auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); auto vals = a->get_const_values(); @@ -121,8 +120,7 @@ void advanced_spmv(const std::shared_ptr exec, { const int bs = a->get_block_size(); const IndexType nvecs = static_cast(b->get_size()[1]); - const IndexType nbrows = - gko::blockutils::getNumBlocks(bs, a->get_size()[0]); + const IndexType nbrows = a->get_num_block_rows(); auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); auto vals = a->get_const_values(); @@ -417,10 +415,8 @@ void convert_to_dense(const std::shared_ptr exec, matrix::Dense *const result) { const int bs = source->get_block_size(); - const size_type nbrows = - gko::blockutils::getNumBlocks(bs, source->get_size()[0]); - const size_type nbcols = - gko::blockutils::getNumBlocks(bs, source->get_size()[1]); + const size_type nbrows = source->get_num_block_rows(); + const size_type nbcols = source->get_num_block_cols(); const IndexType *const row_ptrs = source->get_const_row_ptrs(); const IndexType *const col_idxs = source->get_const_col_idxs(); const ValueType *const vals = source->get_const_values(); @@ -457,10 +453,8 @@ void convert_to_csr(const std::shared_ptr exec, matrix::Csr *const result) { const int bs = source->get_block_size(); - const size_type nbrows = - gko::blockutils::getNumBlocks(bs, source->get_size()[0]); - const size_type nbcols = - gko::blockutils::getNumBlocks(bs, source->get_size()[1]); + const size_type nbrows = source->get_num_block_rows(); + const size_type nbcols = source->get_num_block_cols(); const IndexType *const browptrs = source->get_const_row_ptrs(); const IndexType *const bcolinds = source->get_const_col_idxs(); const ValueType *const bvals = source->get_const_values(); @@ -697,9 +691,9 @@ void transpose_and_transform( auto orig_vals = orig->get_const_values(); auto orig_num_cols = orig->get_size()[1]; - const size_type nbcols = gko::blockutils::getNumBlocks(bs, orig_num_cols); + const size_type nbcols = orig->get_num_block_cols(); auto orig_num_rows = orig->get_size()[0]; - const size_type nbrows = gko::blockutils::getNumBlocks(bs, orig_num_rows); + const size_type nbrows = orig->get_num_block_rows(); auto orig_nbnz = orig_row_ptrs[nbrows]; trans_row_ptrs[0] = 0; @@ -728,12 +722,10 @@ template void conj_transpose(std::shared_ptr exec, const matrix::Fbcsr *orig, matrix::Fbcsr *trans) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// transpose_and_transform(exec, trans, orig, -// [](const ValueType x) { return conj(x); }); -//} +{ + transpose_and_transform(exec, trans, orig, + [](const ValueType x) { return conj(x); }); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); @@ -742,20 +734,21 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row( std::shared_ptr exec, - const matrix::Fbcsr *source, - size_type *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto num_rows = source->get_size()[0]; -// const auto row_ptrs = source->get_const_row_ptrs(); -// IndexType max_nnz = 0; -// -// for (size_type i = 0; i < num_rows; i++) { -// max_nnz = std::max(row_ptrs[i + 1] - row_ptrs[i], max_nnz); -// } -// -// *result = max_nnz; -//} + const matrix::Fbcsr *source, size_type *const result) +{ + const auto num_rows = source->get_size()[0]; + const auto row_ptrs = source->get_const_row_ptrs(); + const int bs = source->get_block_size(); + IndexType max_nnz = 0; + + for (size_type i = 0; i < num_rows; i++) { + const size_type ibrow = i / bs; + max_nnz = + std::max((row_ptrs[ibrow + 1] - row_ptrs[ibrow]) * bs, max_nnz); + } + + *result = max_nnz; +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); @@ -1018,7 +1011,7 @@ void is_sorted_by_column_index( const auto col_idxs = to_check->get_const_col_idxs(); const auto size = to_check->get_size(); const int bs = to_check->get_block_size(); - const size_type nbrows = gko::blockutils::getNumBlocks(bs, size[0]); + const size_type nbrows = to_check->get_num_block_rows(); for (size_type i = 0; i < nbrows; ++i) { for (auto idx = row_ptrs[i] + 1; idx < row_ptrs[i + 1]; ++idx) { @@ -1046,8 +1039,7 @@ void extract_diagonal(std::shared_ptr exec, const auto values = orig->get_const_values(); const int bs = orig->get_block_size(); const size_type diag_size = diag->get_size()[0]; - const size_type nbrows = - gko::blockutils::getNumBlocks(bs, orig->get_size()[0]); + const size_type nbrows = orig->get_num_block_rows(); auto diag_values = diag->get_values(); assert(diag_size == orig->get_size()[0]); diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 5185e03b5e1..4b75c240236 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -100,17 +100,6 @@ class Fbcsr : public ::testing::Test { mtxsq(fbsamplesquare.generate_fbcsr()) {} - // void create_mtx3(Mtx *sorted, Mtx *unsorted) - // { - // /* For both versions (sorted and unsorted), this matrix is stored: - // * 0 2 1 - // * 3 1 8 - // * 2 0 3 - // * The unsorted matrix will have the (value, column) pair per row not - // * sorted, which we still consider a valid FBCSR format. - // */ - // } - void assert_equal_to_mtx(const Csr *const m) { ASSERT_EQ(m->get_size(), refcsrmtx->get_size()); @@ -127,16 +116,6 @@ class Fbcsr : public ::testing::Test { } } - // void assert_equal_to_mtx(const Dense *const m) - // { - // ASSERT_EQ(m->get_size(), refdenmtx->get_size()); - // ASSERT_EQ(m->get_num_stored_elements(), - // refdenmtx->get_num_stored_elements()); for(index_type i = 0; i < - // m->get_size()[0]; i++) - // for(index_type j = 0; j < m->get_size()[1]; j++) - // ASSERT_EQ(m->at(i,j), refdenmtx->at(i,j)); - // } - void assert_equal_to_mtx(const Coo *m) { ASSERT_EQ(m->get_size(), refcoomtx->get_size()); @@ -1034,8 +1013,8 @@ TYPED_TEST(Fbcsr, NonSquareMtxIsTransposable) } -TYPED_TEST(Fbcsr, SquareMatrixIsRowPermutable) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, SquareMatrixIsRowPermutable) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Fbcsr = typename TestFixture::Mtx; @@ -1060,8 +1039,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, NonSquareMatrixIsRowPermutable) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, NonSquareMatrixIsRowPermutable) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Fbcsr = typename TestFixture::Mtx; @@ -1084,8 +1063,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, SquareMatrixIsColPermutable) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, SquareMatrixIsColPermutable) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Fbcsr = typename TestFixture::Mtx; @@ -1110,8 +1089,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, NonSquareMatrixIsColPermutable) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, NonSquareMatrixIsColPermutable) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Fbcsr = typename TestFixture::Mtx; @@ -1134,8 +1113,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, SquareMatrixIsInverseRowPermutable) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, SquareMatrixIsInverseRowPermutable) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Fbcsr = typename TestFixture::Mtx; @@ -1162,8 +1141,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, NonSquareMatrixIsInverseRowPermutable) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, NonSquareMatrixIsInverseRowPermutable) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Fbcsr = typename TestFixture::Mtx; @@ -1188,8 +1167,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, SquareMatrixIsInverseColPermutable) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, SquareMatrixIsInverseColPermutable) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Fbcsr = typename TestFixture::Mtx; @@ -1216,8 +1195,8 @@ GKO_NOT_IMPLEMENTED; //} -TYPED_TEST(Fbcsr, NonSquareMatrixIsInverseColPermutable) -GKO_NOT_IMPLEMENTED; +// TYPED_TEST(Fbcsr, NonSquareMatrixIsInverseColPermutable) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // using Fbcsr = typename TestFixture::Mtx; @@ -1261,20 +1240,18 @@ TYPED_TEST(Fbcsr, RecognizeUnsortedMatrix) } -TYPED_TEST(Fbcsr, SortSortedMatrix) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto matrix = this->mtx3_sorted->clone(); -// +// TYPED_TEST(Fbcsr, SortSortedMatrix) +// { +// auto matrix = this->mtx->clone(); + // matrix->sort_by_column_index(); -// -// GKO_ASSERT_MTX_NEAR(matrix, this->mtx3_sorted, 0.0); -//} +// GKO_ASSERT_MTX_NEAR(matrix, this->mtx, 0.0); +// } -TYPED_TEST(Fbcsr, SortUnsortedMatrix) -GKO_NOT_IMPLEMENTED; + +// TYPED_TEST(Fbcsr, SortUnsortedMatrix) +// GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // auto matrix = this->mtx3_unsorted->clone(); @@ -1344,30 +1321,22 @@ TYPED_TEST_CASE(FbcsrComplex, gko::test::ComplexValueIndexTypes); TYPED_TEST(FbcsrComplex, MtxIsConjugateTransposable) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Fbcsr = typename TestFixture::Mtx; -// using T = typename TestFixture::value_type; -// -// auto exec = gko::ReferenceExecutor::create(); -// // clang-format off -// auto mtx2 = gko::initialize( -// {{T{1.0, 2.0}, T{3.0, 0.0}, T{2.0, 0.0}}, -// {T{0.0, 0.0}, T{5.0, - 3.5}, T{0.0,0.0}}, -// {T{0.0, 0.0}, T{0.0, 1.5}, T{2.0,0.0}}}, exec); -// // clang-format on -// -// auto trans = mtx2->conj_transpose(); -// auto trans_as_fbcsr = static_cast(trans.get()); -// -// // clang-format off -// GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, -// l({{T{1.0, - 2.0}, T{0.0, 0.0}, T{0.0, 0.0}}, -// {T{3.0, 0.0}, T{5.0, 3.5}, T{0.0, - 1.5}}, -// {T{2.0, 0.0}, T{0.0, 0.0}, T{2.0 + 0.0}}}), 0.0); -// // clang-format on -//} +{ + using Fbcsr = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + + auto exec = gko::ReferenceExecutor::create(); + gko::testing::FbcsrSampleComplex csample(exec); + std::unique_ptr mtx = csample.generate_fbcsr(); + std::unique_ptr reftrans = + csample.generate_conjtranspose_fbcsr(); + + auto trans = mtx->conj_transpose(); + auto trans_as_fbcsr = static_cast(trans.get()); + + GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftrans, 0.0); +} TYPED_TEST(FbcsrComplex, InplaceAbsolute) From 93f982cb3213edb49b3b384cf9fa937a8049b682 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Fri, 20 Nov 2020 17:32:36 +0100 Subject: [PATCH 13/58] [builds fail] changed fbcsr permutation interfaces to match csr --- core/matrix/fbcsr_kernels.hpp | 14 +- dpcpp/CMakeLists.txt | 1 + dpcpp/matrix/fbcsr_kernels.dp.cpp | 351 +++++++++++++++++++++++++++++ reference/matrix/fbcsr_kernels.cpp | 37 ++- 4 files changed, 373 insertions(+), 30 deletions(-) create mode 100644 dpcpp/matrix/fbcsr_kernels.dp.cpp diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp index 6fa130b11c9..11d76888859 100644 --- a/core/matrix/fbcsr_kernels.hpp +++ b/core/matrix/fbcsr_kernels.hpp @@ -137,27 +137,21 @@ namespace kernels { #define GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ void row_permute(std::shared_ptr exec, \ - const Array *permutation_indices, \ + const IndexType *permutation_indices, \ const matrix::Fbcsr *orig, \ matrix::Fbcsr *row_permuted) -#define GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) \ - void column_permute(std::shared_ptr exec, \ - const Array *permutation_indices, \ - const matrix::Fbcsr *orig, \ - matrix::Fbcsr *column_permuted) - #define GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ void inverse_row_permute( \ std::shared_ptr exec, \ - const Array *permutation_indices, \ + const IndexType *permutation_indices, \ const matrix::Fbcsr *orig, \ matrix::Fbcsr *row_permuted) #define GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) \ void inverse_column_permute( \ std::shared_ptr exec, \ - const Array *permutation_indices, \ + const IndexType > *permutation_indices, \ const matrix::Fbcsr *orig, \ matrix::Fbcsr *column_permuted) @@ -220,8 +214,6 @@ namespace kernels { template \ GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ template \ - GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ - template \ GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index 186da7f8b41..c52c77e109b 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -24,6 +24,7 @@ target_sources(ginkgo_dpcpp factorization/par_ilut_kernels.dp.cpp matrix/coo_kernels.dp.cpp matrix/csr_kernels.dp.cpp + matrix/fbcsr_kernels.dp.cpp matrix/dense_kernels.dp.cpp matrix/diagonal_kernels.dp.cpp matrix/ell_kernels.dp.cpp diff --git a/dpcpp/matrix/fbcsr_kernels.dp.cpp b/dpcpp/matrix/fbcsr_kernels.dp.cpp new file mode 100644 index 00000000000..6375aea4223 --- /dev/null +++ b/dpcpp/matrix/fbcsr_kernels.dp.cpp @@ -0,0 +1,351 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_kernels.hpp" + + +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "core/base/allocator.hpp" +#include "core/base/iterator_factory.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/fbcsr_builder.hpp" +#include "dpcpp/components/format_conversion.dp.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The Compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +template +void spmv(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + + +template +void spgemm_insert_row(unordered_set &cols, + const matrix::Fbcsr *c, + size_type row) GKO_NOT_IMPLEMENTED; + + +template +void spgemm_insert_row2(unordered_set &cols, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + size_type row) GKO_NOT_IMPLEMENTED; + + +template +void spgemm_accumulate_row(map &cols, + const matrix::Fbcsr *c, + ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; + + +template +void spgemm_accumulate_row2(map &cols, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; + + +template +void spgemm(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEMM_KERNEL); + + +template +void advanced_spgemm(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Fbcsr *b, + const matrix::Dense *beta, + const matrix::Fbcsr *d, + matrix::Fbcsr *c) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL); + + +template +void spgeam(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *beta, + const matrix::Fbcsr *b, + matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEAM_KERNEL); + + +template +void convert_row_ptrs_to_idxs(std::shared_ptr exec, + const IndexType *ptrs, size_type num_rows, + IndexType *idxs) GKO_NOT_IMPLEMENTED; + + +template +void convert_to_coo(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Coo *result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL); + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Dense *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_csr(const std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Csr *const result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + + +template +void convert_to_sellp(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Sellp *result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL); + + +template +void convert_to_ell(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Ell *result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL); + + +template +inline void convert_fbcsr_to_csc(size_type num_rows, const IndexType *row_ptrs, + const IndexType *col_idxs, + const ValueType *fbcsr_vals, + IndexType *row_idxs, IndexType *col_ptrs, + ValueType *csc_vals, + UnaryOperator op) GKO_NOT_IMPLEMENTED; + + +template +void transpose_and_transform(std::shared_ptr exec, + matrix::Fbcsr *trans, + const matrix::Fbcsr *orig, + UnaryOperator op) GKO_NOT_IMPLEMENTED; + + +template +void transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + + +template +void calculate_total_cols(std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result, size_type stride_factor, + size_type slice_size) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL); + + +template +void calculate_max_nnz_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void convert_to_hybrid(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Hybrid *result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL); + + +// template +// void invert_permutation(std::shared_ptr exec, +// size_type size, const IndexType *permutation_indices, +// IndexType *inv_permutation) GKO_NOT_IMPLEMENTED; + +// GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); + + +template +void row_permute( + std::shared_ptr exec, const Array *perm, + const matrix::Fbcsr *orig, + matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); + + +template +void inverse_row_permute( + std::shared_ptr exec, const IndexType *perm, + const matrix::Fbcsr *orig, + matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL); + + +template +void inverse_column_permute( + std::shared_ptr exec, const IndexType *perm, + const matrix::Fbcsr *orig, + matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL); + + +template +void calculate_nonzeros_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + Array *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void sort_by_column_index(std::shared_ptr exec, + matrix::Fbcsr *to_sort) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::Fbcsr *to_check, + bool *is_sorted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 151661a875b..042357d0429 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -707,8 +707,8 @@ void transpose_and_transform( template void transpose(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Fbcsr *trans) + const matrix::Fbcsr *const orig, + matrix::Fbcsr *const trans) { transpose_and_transform(exec, trans, orig, [](const ValueType x) { return x; }); @@ -720,8 +720,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Fbcsr *trans) + const matrix::Fbcsr *const orig, + matrix::Fbcsr *const trans) { transpose_and_transform(exec, trans, orig, [](const ValueType x) { return conj(x); }); @@ -734,7 +734,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row( std::shared_ptr exec, - const matrix::Fbcsr *source, size_type *const result) + const matrix::Fbcsr *const source, + size_type *const result) { const auto num_rows = source->get_size()[0]; const auto row_ptrs = source->get_const_row_ptrs(); @@ -814,7 +815,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void row_permute_impl(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *const permutation_indices, const matrix::Fbcsr *orig, matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; @@ -857,7 +858,7 @@ void row_permute_impl(std::shared_ptr exec, template void row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *const permutation_indices, const matrix::Fbcsr *orig, matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; @@ -872,7 +873,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *const permutation_indices, const matrix::Fbcsr *orig, matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; @@ -893,10 +894,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void column_permute_impl(const Array *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *column_permuted) - GKO_NOT_IMPLEMENTED; +void column_permute_impl(const IndexType *const permutation_indices, + const matrix::Fbcsr *const orig, + matrix::Fbcsr + *const column_permuted) GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // auto perm = permutation_indices->get_const_data(); @@ -924,7 +925,7 @@ void column_permute_impl(const Array *permutation_indices, template void column_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *permutation_indices, const matrix::Fbcsr *orig, matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; @@ -939,16 +940,14 @@ void column_permute(std::shared_ptr exec, // column_permute_impl(&inv_perm, orig, column_permuted); //} -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL); - template void inverse_column_permute( std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; + const IndexType *const permutation_indices, + const matrix::Fbcsr *const orig, + matrix::Fbcsr *const column_permuted) + GKO_NOT_IMPLEMENTED; //{ // TODO (script:fbcsr): change the code imported from matrix/csr if needed // column_permute_impl(permutation_indices, orig, column_permuted); From 7f15feffb05304f59308e693f95e82fbfcf13aa9 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Mon, 23 Nov 2020 11:10:23 +0100 Subject: [PATCH 14/58] fixed build on dpcpp --- core/device_hooks/common_kernels.inc.cpp | 7 ------- core/matrix/fbcsr.cpp | 1 - core/matrix/fbcsr_kernels.hpp | 2 +- dpcpp/matrix/fbcsr_kernels.dp.cpp | 2 +- 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index f66c749fd73..3cb06d78763 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -804,7 +804,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); } // namespace csr -// TODO (script:fbcsr): adapt this block as needed namespace fbcsr { @@ -895,12 +894,6 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); -template -GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL); - template GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 7d2d94209aa..4f5ba607873 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -76,7 +76,6 @@ GKO_REGISTER_OPERATION(calculate_total_cols, fbcsr::calculate_total_cols); GKO_REGISTER_OPERATION(transpose, fbcsr::transpose); GKO_REGISTER_OPERATION(conj_transpose, fbcsr::conj_transpose); GKO_REGISTER_OPERATION(row_permute, fbcsr::row_permute); -GKO_REGISTER_OPERATION(column_permute, fbcsr::column_permute); GKO_REGISTER_OPERATION(inverse_row_permute, fbcsr::inverse_row_permute); GKO_REGISTER_OPERATION(inverse_column_permute, fbcsr::inverse_column_permute); GKO_REGISTER_OPERATION(calculate_max_nnz_per_row, diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp index 11d76888859..4968acd4713 100644 --- a/core/matrix/fbcsr_kernels.hpp +++ b/core/matrix/fbcsr_kernels.hpp @@ -151,7 +151,7 @@ namespace kernels { #define GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) \ void inverse_column_permute( \ std::shared_ptr exec, \ - const IndexType > *permutation_indices, \ + const IndexType *permutation_indices, \ const matrix::Fbcsr *orig, \ matrix::Fbcsr *column_permuted) diff --git a/dpcpp/matrix/fbcsr_kernels.dp.cpp b/dpcpp/matrix/fbcsr_kernels.dp.cpp index 6375aea4223..749ef7c3d2d 100644 --- a/dpcpp/matrix/fbcsr_kernels.dp.cpp +++ b/dpcpp/matrix/fbcsr_kernels.dp.cpp @@ -279,7 +279,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void row_permute( - std::shared_ptr exec, const Array *perm, + std::shared_ptr exec, const IndexType *perm, const matrix::Fbcsr *orig, matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; From 74e0c21e48dcecd80717c50de837f2f63625011c Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Mon, 23 Nov 2020 15:07:14 +0100 Subject: [PATCH 15/58] fixed hip and omp builds for the permutation kernels' interface change --- hip/matrix/fbcsr_kernels.hip.cpp | 17 +++-------------- omp/matrix/fbcsr_kernels.cpp | 28 +++------------------------- 2 files changed, 6 insertions(+), 39 deletions(-) diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp index da425238ab0..e6dc61946cb 100644 --- a/hip/matrix/fbcsr_kernels.hip.cpp +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -1015,7 +1015,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *permutation_indices, const matrix::Fbcsr *orig, matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; @@ -1024,20 +1024,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); -template -void column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *column_permuted) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL); - - template void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *permutation_indices, const matrix::Fbcsr *orig, matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; @@ -1049,7 +1038,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_column_permute( std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *permutation_indices, const matrix::Fbcsr *orig, matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; diff --git a/omp/matrix/fbcsr_kernels.cpp b/omp/matrix/fbcsr_kernels.cpp index d30d8cd3b16..609ab990cdb 100644 --- a/omp/matrix/fbcsr_kernels.cpp +++ b/omp/matrix/fbcsr_kernels.cpp @@ -684,7 +684,7 @@ void row_permute_impl(std::shared_ptr exec, template void row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *permutation_indices, const matrix::Fbcsr *orig, matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; @@ -699,7 +699,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *permutation_indices, const matrix::Fbcsr *orig, matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; @@ -751,32 +751,10 @@ void column_permute_impl(const Array *permutation_indices, //} -template -void column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *column_permuted) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto perm = permutation_indices->get_const_data(); -// Array inv_perm(*permutation_indices); -// auto iperm = inv_perm.get_data(); -//#pragma omp parallel for -// for (size_type ind = 0; ind < inv_perm.get_num_elems(); ++ind) { -// iperm[perm[ind]] = ind; -// } -// column_permute_impl(&inv_perm, orig, column_permuted); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL); - - template void inverse_column_permute( std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *permutation_indices, const matrix::Fbcsr *orig, matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; //{ From 07d6f603590852e0dd7ae7475bcbfc8ea8b529b1 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Mon, 23 Nov 2020 16:11:24 +0100 Subject: [PATCH 16/58] fixed cuda build for changes to permutation interface Also cleaned up some commented code etc. --- core/CMakeLists.txt | 1 - core/components/fixed_block.hpp | 1 - core/matrix/fbcsr.cpp | 109 +----------------- core/test/matrix/fbcsr_sample.cpp | 8 +- core/test/matrix/fbcsr_sample.hpp | 2 +- cuda/matrix/fbcsr_kernels.cu | 17 +-- include/ginkgo/core/matrix/fbcsr.hpp | 16 +-- .../ginkgo/core/matrix/matrix_strategies.hpp | 2 +- 8 files changed, 20 insertions(+), 136 deletions(-) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 4e1a7086148..7c67744d69a 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -22,7 +22,6 @@ target_sources(ginkgo log/stream.cpp matrix/coo.cpp matrix/csr.cpp - matrix/fbcsr.cpp matrix/dense.cpp matrix/diagonal.cpp matrix/ell.cpp diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index 8b6c433dfff..fbd8a259f69 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -132,7 +132,6 @@ class DenseBlock final { void zero() { for (int i = 0; i < nrows_ * ncols_; i++) - // vals_[i] = gko::zero(); vals_[i] = static_cast(0); } diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 4f5ba607873..b8ff4558408 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -69,10 +69,7 @@ GKO_REGISTER_OPERATION(spgeam, fbcsr::spgeam); GKO_REGISTER_OPERATION(convert_to_coo, fbcsr::convert_to_coo); GKO_REGISTER_OPERATION(convert_to_csr, fbcsr::convert_to_csr); GKO_REGISTER_OPERATION(convert_to_dense, fbcsr::convert_to_dense); -// GKO_REGISTER_OPERATION(convert_to_sellp, fbcsr::convert_to_sellp); GKO_REGISTER_OPERATION(calculate_total_cols, fbcsr::calculate_total_cols); -// GKO_REGISTER_OPERATION(convert_to_ell, fbcsr::convert_to_ell); -// GKO_REGISTER_OPERATION(convert_to_hybrid, fbcsr::convert_to_hybrid); GKO_REGISTER_OPERATION(transpose, fbcsr::transpose); GKO_REGISTER_OPERATION(conj_transpose, fbcsr::conj_transpose); GKO_REGISTER_OPERATION(row_permute, fbcsr::row_permute); @@ -275,73 +272,6 @@ void Fbcsr::move_to(Csr *result) } -// template -// void Fbcsr::convert_to( -// Hybrid *result) const -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto exec = this->get_executor(); -// Array row_nnz(exec, this->get_size()[0]); -// -// size_type ell_lim = zero(); -// size_type coo_lim = zero(); -// result->get_strategy()->compute_hybrid_config(row_nnz, &ell_lim, -// &coo_lim); const auto max_nnz_per_row = -// std::max(result->get_ell_num_stored_elements_per_row(), ell_lim); -// const auto stride = std::max(result->get_ell_stride(), -// this->get_size()[0]); const auto coo_nnz = -// std::max(result->get_coo_num_stored_elements(), coo_lim); -// auto tmp = Hybrid::create( -// exec, this->get_size(), max_nnz_per_row, stride, coo_nnz, -// result->get_strategy()); -// exec->run(fbcsr::make_convert_to_hybrid(this, tmp.get())); -// tmp->move_to(result); -//} - - -// template -// void Fbcsr::move_to(Hybrid -// *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// this->convert_to(result); -//} - - -// template -// void Fbcsr::convert_to( -// Sellp *result) const -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto exec = this->get_executor(); -// const auto stride_factor = (result->get_stride_factor() == 0) -// ? default_stride_factor -// : result->get_stride_factor(); -// const auto slice_size = (result->get_slice_size() == 0) -// ? default_slice_size -// : result->get_slice_size(); -// size_type total_cols = 0; -// exec->run(fbcsr::make_calculate_total_cols(this, &total_cols, -// stride_factor, -// slice_size)); -// auto tmp = Sellp::create( -// exec, this->get_size(), slice_size, stride_factor, total_cols); -// exec->run(fbcsr::make_convert_to_sellp(this, tmp.get())); -// tmp->move_to(result); -//} - - -// template -// void Fbcsr::move_to(Sellp -// *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// this->convert_to(result); -//} - - template void Fbcsr::convert_to( SparsityCsr *result) const @@ -369,31 +299,6 @@ void Fbcsr::move_to( } -// template -// void Fbcsr::convert_to( -// Ell *result) const -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto exec = this->get_executor(); -// size_type max_nnz_per_row; -// exec->run(fbcsr::make_calculate_max_nnz_per_row(this, &max_nnz_per_row)); -// auto tmp = Ell::create(exec, this->get_size(), -// max_nnz_per_row); -// exec->run(fbcsr::make_convert_to_ell(this, tmp.get())); -// tmp->move_to(result); -//} - - -// template -// void Fbcsr::move_to(Ell *result) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// this->convert_to(result); -//} - - /* Within blocks, the storage order is row-major. * Currently, this implementation is sequential and has complexity O(n log n) * assuming nnz = O(n). @@ -410,7 +315,6 @@ void Fbcsr::read(const mat_data &data) const index_type nnz = static_cast(data.nonzeros.size()); const int bs = this->bs_; - // GKO_ASSERT_EQ(nnz%(this->bs_*this->bs_), 0); using Blk_t = blockutils::DenseBlock; @@ -429,7 +333,7 @@ void Fbcsr::read(const mat_data &data) } }; - auto create_block_set = [nnz, bs](const mat_data &data) { + auto create_block_map = [nnz, bs](const mat_data &data) { std::map blocks; for (index_type inz = 0; inz < nnz; inz++) { const index_type row = data.nonzeros[inz].row; @@ -441,9 +345,6 @@ void Fbcsr::read(const mat_data &data) const index_type blockrow = row / bs; const index_type blockcol = col / bs; - // const typename std::map::iterator it - // = blocks.find(FbEntry{row/bs, col/bs, - // DenseBlock()}); Blk_t &nnzblk = blocks[{blockrow, blockcol}]; if (nnzblk.size() == 0) { nnzblk.resize(bs, bs); @@ -452,14 +353,14 @@ void Fbcsr::read(const mat_data &data) } else { if (nnzblk(localrow, localcol) != gko::zero()) throw Error(__FILE__, __LINE__, - "Error in reading fixed block CSR matrix!"); + "Error: re-visited the same non-zero!"); nnzblk(localrow, localcol) = val; } } return blocks; }; - const std::map blocks = create_block_set(data); + const std::map blocks = create_block_map(data); auto tmp = Fbcsr::create(this->get_executor()->get_master(), data.size, blocks.size() * bs * bs, bs, this->get_strategy()); @@ -477,9 +378,8 @@ void Fbcsr::read(const mat_data &data) throw gko::OutOfBoundsError(__FILE__, __LINE__, cur_brow, num_brows); - // set block-column index and block values tmp->col_idxs_.get_data()[cur_bnz] = it->first.block_column; - // vals + for (int ibr = 0; ibr < bs; ibr++) for (int jbr = 0; jbr < bs; jbr++) values(cur_bnz, ibr, jbr) = it->second(ibr, jbr); @@ -497,6 +397,7 @@ void Fbcsr::read(const mat_data &data) tmp->row_ptrs_.get_data()[++cur_brow] = static_cast(blocks.size()); + assert(cur_brow == tmp->get_size()[0] / bs); tmp->make_srow(); diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index b48562c2cf1..246510db57d 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -75,13 +75,13 @@ generate_acopy_impl(const FbcsrType *const mat) const index_type *const rowptrs = mat->get_row_ptrs(); index_type *const acolidxs = amat->get_col_idxs(); index_type *const arowptrs = amat->get_row_ptrs(); - // blockutils + for (index_type i = 0; i < mat->get_num_stored_elements() / (mat->get_block_size() * mat->get_block_size()); i++) acolidxs[i] = colidxs[i]; - // blockutils + for (index_type i = 0; i < mat->get_size()[0] / mat->get_block_size() + 1; i++) arowptrs[i] = rowptrs[i]; @@ -132,8 +132,7 @@ FbcsrSample::generate_fbcsr() const mtx->get_size()[0], mtx->get_size()[1], "block size does not divide the size!"); - // blockutils - for (index_type ibrow = 0; ibrow < mtx->get_size()[0] / bs; ibrow++) { + for (index_type ibrow = 0; ibrow < mtx->get_num_block_rows(); ibrow++) { const index_type *const browptr = mtx->get_row_ptrs(); for (index_type inz = browptr[ibrow]; inz < browptr[ibrow + 1]; inz++) { const index_type bcolind = mtx->get_col_idxs()[inz]; @@ -778,6 +777,7 @@ FbcsrSampleSquare::generate_transpose_fbcsr() const #define GKO_DECLARE_FBCSR_TEST_SAMPLE_SQUARE(ValueType, IndexType) \ class FbcsrSampleSquare + GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_TEST_SAMPLE_SQUARE); diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 8fe2b18497f..5e879964950 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -161,7 +161,7 @@ class FbcsrSample2 { /// Enables use of literals to instantiate value data template - inline constexpr ValueType sct(U u) const + constexpr ValueType sct(U u) const { return static_cast(u); } diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu index 968399ff8ed..b53df43ae55 100644 --- a/cuda/matrix/fbcsr_kernels.cu +++ b/cuda/matrix/fbcsr_kernels.cu @@ -1203,7 +1203,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *permutation_indices, const matrix::Fbcsr *orig, matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; @@ -1212,20 +1212,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); -template -void column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *column_permuted) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_COLUMN_PERMUTE_KERNEL); - - template void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *permutation_indices, const matrix::Fbcsr *orig, matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; @@ -1237,7 +1226,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_column_permute( std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *permutation_indices, const matrix::Fbcsr *orig, matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 141c1a3c3eb..7d5c00cbd02 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -75,14 +75,15 @@ class FbcsrBuilder; /** - * FBCSR is a matrix format which stores only the nonzero coefficients by - * compressing each row of the matrix (compressed sparse row format). + * FBCSR is a matrix format meant for matrices having a natural block structure + * made up of small, dense, disjoint blocks. It is similar to CSR \sa Csr. * However, unlike Csr, each non-zero location stores a small dense block of * entries having a constant size. This reduces the number of integers that need * to be stored in order to refer to a given non-zero entry, and enables * efficient implementation of certain block methods. * - * The entries within each dense block are stored row-major. + * The block size is expected to be known in advance and passed to the + * constructor. * * @note The total number of rows and the number of columns are expected to be * divisible by the block size. @@ -112,7 +113,8 @@ class FbcsrBuilder; * A->apply(alpha, I, beta, B) // B = alpha*A + beta*B * ``` * Both the SpGEMM and SpGEAM operation require the input matrices to be sorted - * by column index, otherwise the algorithms will produce incorrect results. + * by block-column index, otherwise the algorithms will produce incorrect + * results. * * @tparam ValueType precision of matrix elements * @tparam IndexType precision of matrix indexes @@ -128,9 +130,6 @@ class Fbcsr : public EnableLinOp>, public ConvertibleTo>, public ConvertibleTo>, public ConvertibleTo>, - // public ConvertibleTo>, - // public ConvertibleTo>, - // public ConvertibleTo>, public ConvertibleTo>, public DiagonalExtractable, public ReadableFromMatrixData, @@ -143,9 +142,6 @@ class Fbcsr : public EnableLinOp>, friend class EnablePolymorphicObject; friend class Coo; friend class Dense; - // friend class Ell; - // friend class Hybrid; - // friend class Sellp; friend class SparsityCsr; friend class FbcsrBuilder; friend class Fbcsr, IndexType>; diff --git a/include/ginkgo/core/matrix/matrix_strategies.hpp b/include/ginkgo/core/matrix/matrix_strategies.hpp index c774209f7d7..47133b3610f 100644 --- a/include/ginkgo/core/matrix/matrix_strategies.hpp +++ b/include/ginkgo/core/matrix/matrix_strategies.hpp @@ -49,7 +49,7 @@ template class automatic; /** - * strategy_type is to decide how to set the fbcsr algorithm. + * strategy_type is to decide how map the work-items to execution units * * The practical strategy method should inherit strategy_type and implement * its `process`, `calc_size` function and the corresponding device kernel. From 5c434154c71eeb844bb4e05666af70e74a7effe0 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Thu, 26 Nov 2020 11:03:48 +0100 Subject: [PATCH 17/58] removed commented and dead code, and unnecessary includes --- core/device_hooks/common_kernels.inc.cpp | 64 -- core/matrix/fbcsr.cpp | 124 +-- core/matrix/fbcsr_kernels.hpp | 91 -- cuda/matrix/fbcsr_kernels.cu | 1182 ---------------------- cuda/test/matrix/CMakeLists.txt | 1 - cuda/test/matrix/fbcsr_kernels.cpp | 883 ---------------- dpcpp/matrix/fbcsr_kernels.dp.cpp | 148 --- hip/matrix/fbcsr_kernels.hip.cpp | 1003 ------------------ hip/test/matrix/CMakeLists.txt | 1 - hip/test/matrix/fbcsr_kernels.hip.cpp | 866 ---------------- include/ginkgo/core/matrix/fbcsr.hpp | 31 - omp/components/fbcsr_spgeam.hpp | 31 - omp/matrix/fbcsr_kernels.cpp | 575 ----------- omp/test/matrix/CMakeLists.txt | 1 - omp/test/matrix/fbcsr_kernels.cpp | 662 ------------ reference/components/fbcsr_spgeam.hpp | 31 - reference/matrix/fbcsr_kernels.cpp | 556 ---------- reference/test/matrix/fbcsr_kernels.cpp | 661 ------------ 18 files changed, 2 insertions(+), 6909 deletions(-) delete mode 100644 cuda/test/matrix/fbcsr_kernels.cpp delete mode 100644 hip/test/matrix/fbcsr_kernels.hip.cpp delete mode 100644 omp/components/fbcsr_spgeam.hpp delete mode 100644 omp/test/matrix/fbcsr_kernels.cpp delete mode 100644 reference/components/fbcsr_spgeam.hpp diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 3cb06d78763..9cca9e2d26f 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -818,22 +818,6 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); -template -GKO_DECLARE_FBCSR_SPGEMM_KERNEL(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEMM_KERNEL); - -template -GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL); - -template -GKO_DECLARE_FBCSR_SPGEAM_KERNEL(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEAM_KERNEL); - template GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); @@ -846,36 +830,6 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); -template -GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL); - -template -GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL); - -template -GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL); - -template -GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL); - -template -GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL); - template GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); @@ -888,24 +842,6 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); -template -GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); - -template -GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL); - -template -GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL); - template GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index b8ff4558408..54e938d768d 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -63,18 +63,10 @@ namespace fbcsr { GKO_REGISTER_OPERATION(spmv, fbcsr::spmv); GKO_REGISTER_OPERATION(advanced_spmv, fbcsr::advanced_spmv); -GKO_REGISTER_OPERATION(spgemm, fbcsr::spgemm); -GKO_REGISTER_OPERATION(advanced_spgemm, fbcsr::advanced_spgemm); -GKO_REGISTER_OPERATION(spgeam, fbcsr::spgeam); -GKO_REGISTER_OPERATION(convert_to_coo, fbcsr::convert_to_coo); GKO_REGISTER_OPERATION(convert_to_csr, fbcsr::convert_to_csr); GKO_REGISTER_OPERATION(convert_to_dense, fbcsr::convert_to_dense); -GKO_REGISTER_OPERATION(calculate_total_cols, fbcsr::calculate_total_cols); GKO_REGISTER_OPERATION(transpose, fbcsr::transpose); GKO_REGISTER_OPERATION(conj_transpose, fbcsr::conj_transpose); -GKO_REGISTER_OPERATION(row_permute, fbcsr::row_permute); -GKO_REGISTER_OPERATION(inverse_row_permute, fbcsr::inverse_row_permute); -GKO_REGISTER_OPERATION(inverse_column_permute, fbcsr::inverse_column_permute); GKO_REGISTER_OPERATION(calculate_max_nnz_per_row, fbcsr::calculate_max_nnz_per_row); GKO_REGISTER_OPERATION(calculate_nonzeros_per_row, @@ -120,8 +112,6 @@ void Fbcsr::apply_impl(const LinOp *const b, // if b is a FBCSR matrix, we compute a SpGeMM throw /*::gko::*/ NotImplemented(__FILE__, __LINE__, "SpGeMM for Fbcsr"); - auto x_fbcsr = as(x); - this->get_executor()->run(fbcsr::make_spgemm(this, b_fbcsr, x_fbcsr)); } else { // otherwise we assume that b is dense and compute a SpMV/SpMM this->get_executor()->run( @@ -138,17 +128,10 @@ void Fbcsr::apply_impl(const LinOp *alpha, const LinOp *b, using TFbcsr = Fbcsr; if (auto b_fbcsr = dynamic_cast(b)) { // if b is a FBCSR matrix, we compute a SpGeMM - auto x_fbcsr = as(x); - auto x_copy = x_fbcsr->clone(); - this->get_executor()->run(fbcsr::make_advanced_spgemm( - as(alpha), this, b_fbcsr, as(beta), x_copy.get(), - x_fbcsr)); + throw NotImplemented(__FILE__, __LINE__, "Adv SpGeMM for Fbcsr"); } else if (dynamic_cast *>(b)) { // if b is an identity matrix, we compute an SpGEAM - auto x_fbcsr = as(x); - auto x_copy = x_fbcsr->clone(); - this->get_executor()->run(fbcsr::make_spgeam( - as(alpha), this, as(beta), lend(x_copy), x_fbcsr)); + throw NotImplemented(__FILE__, __LINE__, "Adv SpGeMM for Fbcsr"); } else { // otherwise we assume that b is dense and compute a SpMV/SpMM this->get_executor()->run( @@ -211,30 +194,6 @@ void Fbcsr::move_to( } -template -void Fbcsr::convert_to( - Coo *result) const GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto exec = this->get_executor(); -// auto tmp = Coo::create( -// exec, this->get_size(), this->get_num_stored_elements()); -// tmp->values_ = this->values_; -// tmp->col_idxs_ = this->col_idxs_; -// exec->run(fbcsr::make_convert_to_coo(this, tmp.get())); -// tmp->move_to(result); -//} - - -template -void Fbcsr::move_to(Coo *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// this->convert_to(result); -//} - - template void Fbcsr::convert_to(Dense *result) const { @@ -469,85 +428,6 @@ std::unique_ptr Fbcsr::conj_transpose() const } -template -std::unique_ptr Fbcsr::row_permute( - const Array *permutation_indices) const GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); -// auto exec = this->get_executor(); -// auto permute_cpy = -// Fbcsr::create(exec, this->get_size(), this->get_num_stored_elements(), -// this->get_strategy()); -// -// exec->run( -// fbcsr::make_row_permute(permutation_indices, this, -// permute_cpy.get())); -// permute_cpy->make_srow(); -// return std::move(permute_cpy); -//} - - -template -std::unique_ptr Fbcsr::column_permute( - const Array *permutation_indices) const GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); -// auto exec = this->get_executor(); -// auto permute_cpy = -// Fbcsr::create(exec, this->get_size(), this->get_num_stored_elements(), -// this->get_strategy()); -// -// exec->run( -// fbcsr::make_column_permute(permutation_indices, this, -// permute_cpy.get())); -// permute_cpy->make_srow(); -// return std::move(permute_cpy); -//} - - -template -std::unique_ptr Fbcsr::inverse_row_permute( - const Array *inverse_permutation_indices) const - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), -// this->get_size()[0]); -// auto exec = this->get_executor(); -// auto inverse_permute_cpy = -// Fbcsr::create(exec, this->get_size(), this->get_num_stored_elements(), -// this->get_strategy()); -// -// exec->run(fbcsr::make_inverse_row_permute(inverse_permutation_indices, -// this, -// inverse_permute_cpy.get())); -// inverse_permute_cpy->make_srow(); -// return std::move(inverse_permute_cpy); -//} - - -template -std::unique_ptr Fbcsr::inverse_column_permute( - const Array *inverse_permutation_indices) const - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), -// this->get_size()[1]); -// auto exec = this->get_executor(); -// auto inverse_permute_cpy = -// Fbcsr::create(exec, this->get_size(), this->get_num_stored_elements(), -// this->get_strategy()); -// -// exec->run(fbcsr::make_inverse_column_permute( -// inverse_permutation_indices, this, inverse_permute_cpy.get())); -// inverse_permute_cpy->make_srow(); -// return std::move(inverse_permute_cpy); -//} - - template void Fbcsr::sort_by_column_index() GKO_NOT_IMPLEMENTED; //{ diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp index 4968acd4713..c7a2747039e 100644 --- a/core/matrix/fbcsr_kernels.hpp +++ b/core/matrix/fbcsr_kernels.hpp @@ -66,29 +66,6 @@ namespace kernels { const matrix::Dense *beta, \ matrix::Dense *c) -#define GKO_DECLARE_FBCSR_SPGEMM_KERNEL(ValueType, IndexType) \ - void spgemm(std::shared_ptr exec, \ - const matrix::Fbcsr *a, \ - const matrix::Fbcsr *b, \ - matrix::Fbcsr *c) - -#define GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType) \ - void advanced_spgemm(std::shared_ptr exec, \ - const matrix::Dense *alpha, \ - const matrix::Fbcsr *a, \ - const matrix::Fbcsr *b, \ - const matrix::Dense *beta, \ - const matrix::Fbcsr *d, \ - matrix::Fbcsr *c) - -#define GKO_DECLARE_FBCSR_SPGEAM_KERNEL(ValueType, IndexType) \ - void spgeam(std::shared_ptr exec, \ - const matrix::Dense *alpha, \ - const matrix::Fbcsr *a, \ - const matrix::Dense *beta, \ - const matrix::Fbcsr *b, \ - matrix::Fbcsr *c) - #define GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ void convert_to_dense(std::shared_ptr exec, \ const matrix::Fbcsr *source, \ @@ -99,32 +76,6 @@ namespace kernels { const matrix::Fbcsr *source, \ matrix::Csr *result) -#define GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL(ValueType, IndexType) \ - void convert_to_coo(std::shared_ptr exec, \ - const matrix::Fbcsr *source, \ - matrix::Coo *result) - -#define GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL(ValueType, IndexType) \ - void convert_to_ell(std::shared_ptr exec, \ - const matrix::Fbcsr *source, \ - matrix::Ell *result) - -#define GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL(ValueType, IndexType) \ - void convert_to_hybrid(std::shared_ptr exec, \ - const matrix::Fbcsr *source, \ - matrix::Hybrid *result) - -#define GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL(ValueType, IndexType) \ - void convert_to_sellp(std::shared_ptr exec, \ - const matrix::Fbcsr *source, \ - matrix::Sellp *result) - -#define GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL(ValueType, IndexType) \ - void calculate_total_cols( \ - std::shared_ptr exec, \ - const matrix::Fbcsr *source, size_type *result, \ - size_type stride_factor, size_type slice_size) - #define GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL(ValueType, IndexType) \ void transpose(std::shared_ptr exec, \ const matrix::Fbcsr *orig, \ @@ -135,26 +86,6 @@ namespace kernels { const matrix::Fbcsr *orig, \ matrix::Fbcsr *trans) -#define GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ - void row_permute(std::shared_ptr exec, \ - const IndexType *permutation_indices, \ - const matrix::Fbcsr *orig, \ - matrix::Fbcsr *row_permuted) - -#define GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ - void inverse_row_permute( \ - std::shared_ptr exec, \ - const IndexType *permutation_indices, \ - const matrix::Fbcsr *orig, \ - matrix::Fbcsr *row_permuted) - -#define GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) \ - void inverse_column_permute( \ - std::shared_ptr exec, \ - const IndexType *permutation_indices, \ - const matrix::Fbcsr *orig, \ - matrix::Fbcsr *column_permuted) - #define GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, \ IndexType) \ void calculate_max_nnz_per_row( \ @@ -188,36 +119,14 @@ namespace kernels { template \ GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType); \ template \ - GKO_DECLARE_FBCSR_SPGEMM_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_FBCSR_SPGEAM_KERNEL(ValueType, IndexType); \ - template \ GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL(ValueType, IndexType); \ template \ - GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL(ValueType, IndexType); \ - template \ GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType); \ template \ - GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ - template \ GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType); \ diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu index b53df43ae55..f2ac5a85a01 100644 --- a/cuda/matrix/fbcsr_kernels.cu +++ b/cuda/matrix/fbcsr_kernels.cu @@ -39,12 +39,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include #include -#include -#include -#include #include "core/components/fill_array.hpp" @@ -94,285 +90,12 @@ using compiled_kernels = syn::value_list; using classical_kernels = syn::value_list; -using spgeam_kernels = - syn::value_list; - - -namespace host_kernel { - - -template -void merge_path_spmv( - syn::value_list, - std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Dense *b, matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const IndexType total = a->get_size()[0] + a->get_num_stored_elements(); -// const IndexType grid_num = -// ceildiv(total, spmv_block_size * items_per_thread); -// const dim3 grid(grid_num); -// const dim3 block(spmv_block_size); -// Array row_out(exec, grid_num); -// Array val_out(exec, grid_num); -// -// for (IndexType column_id = 0; column_id < b->get_size()[1]; column_id++) { -// if (alpha == nullptr && beta == nullptr) { -// const auto b_vals = b->get_const_values() + column_id; -// auto c_vals = c->get_values() + column_id; -// kernel::abstract_merge_path_spmv -// <<>>( -// static_cast(a->get_size()[0]), -// as_cuda_type(a->get_const_values()), -// a->get_const_col_idxs(), -// as_cuda_type(a->get_const_row_ptrs()), -// as_cuda_type(a->get_const_srow()), as_cuda_type(b_vals), -// b->get_stride(), as_cuda_type(c_vals), c->get_stride(), -// as_cuda_type(row_out.get_data()), -// as_cuda_type(val_out.get_data())); -// kernel::abstract_reduce<<<1, spmv_block_size>>>( -// grid_num, as_cuda_type(val_out.get_data()), -// as_cuda_type(row_out.get_data()), as_cuda_type(c_vals), -// c->get_stride()); -// -// } else if (alpha != nullptr && beta != nullptr) { -// const auto b_vals = b->get_const_values() + column_id; -// auto c_vals = c->get_values() + column_id; -// kernel::abstract_merge_path_spmv -// <<>>( -// static_cast(a->get_size()[0]), -// as_cuda_type(alpha->get_const_values()), -// as_cuda_type(a->get_const_values()), -// a->get_const_col_idxs(), -// as_cuda_type(a->get_const_row_ptrs()), -// as_cuda_type(a->get_const_srow()), as_cuda_type(b_vals), -// b->get_stride(), as_cuda_type(beta->get_const_values()), -// as_cuda_type(c_vals), c->get_stride(), -// as_cuda_type(row_out.get_data()), -// as_cuda_type(val_out.get_data())); -// kernel::abstract_reduce<<<1, spmv_block_size>>>( -// grid_num, as_cuda_type(val_out.get_data()), -// as_cuda_type(row_out.get_data()), -// as_cuda_type(alpha->get_const_values()), as_cuda_type(c_vals), -// c->get_stride()); -// } else { -// GKO_KERNEL_NOT_FOUND; -// } -// } -//} - -GKO_ENABLE_IMPLEMENTATION_SELECTION(select_merge_path_spmv, merge_path_spmv); - - -template -int compute_items_per_thread(std::shared_ptr exec) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const int version = -// (exec->get_major_version() << 4) + exec->get_minor_version(); -// // The num_item is decided to make the occupancy 100% -// // TODO: Extend this list when new GPU is released -// // Tune this parameter -// // 128 threads/block the number of items per threads -// // 3.0 3.5: 6 -// // 3.7: 14 -// // 5.0, 5.3, 6.0, 6.2: 8 -// // 5.2, 6.1, 7.0: 12 -// int num_item = 6; -// switch (version) { -// case 0x50: -// case 0x53: -// case 0x60: -// case 0x62: -// num_item = 8; -// break; -// case 0x52: -// case 0x61: -// case 0x70: -// num_item = 12; -// break; -// case 0x37: -// num_item = 14; -// } -// // Ensure that the following is satisfied: -// // sizeof(IndexType) + sizeof(ValueType) -// // <= items_per_thread * sizeof(IndexType) -// constexpr int minimal_num = -// ceildiv(sizeof(IndexType) + sizeof(ValueType), sizeof(IndexType)); -// int items_per_thread = num_item * 4 / sizeof(IndexType); -// return std::max(minimal_num, items_per_thread); -//} - - -template -void classical_spmv( - syn::value_list, - std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Dense *b, matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto nwarps = exec->get_num_warps_per_sm() * -// exec->get_num_multiprocessor() * classical_overweight; -// const auto gridx = -// std::min(ceildiv(a->get_size()[0], spmv_block_size / subwarp_size), -// int64(nwarps / warps_in_block)); -// const dim3 grid(gridx, b->get_size()[1]); -// const dim3 block(spmv_block_size); -// -// if (alpha == nullptr && beta == nullptr) { -// kernel::abstract_classical_spmv<<>>( -// a->get_size()[0], as_cuda_type(a->get_const_values()), -// a->get_const_col_idxs(), as_cuda_type(a->get_const_row_ptrs()), -// as_cuda_type(b->get_const_values()), b->get_stride(), -// as_cuda_type(c->get_values()), c->get_stride()); -// -// } else if (alpha != nullptr && beta != nullptr) { -// kernel::abstract_classical_spmv<<>>( -// a->get_size()[0], as_cuda_type(alpha->get_const_values()), -// as_cuda_type(a->get_const_values()), a->get_const_col_idxs(), -// as_cuda_type(a->get_const_row_ptrs()), -// as_cuda_type(b->get_const_values()), b->get_stride(), -// as_cuda_type(beta->get_const_values()), -// as_cuda_type(c->get_values()), c->get_stride()); -// } else { -// GKO_KERNEL_NOT_FOUND; -// } -//} - -GKO_ENABLE_IMPLEMENTATION_SELECTION(select_classical_spmv, classical_spmv); - - -} // namespace host_kernel - template void spmv(std::shared_ptr exec, const matrix::Fbcsr *a, const matrix::Dense *b, matrix::Dense *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// if (a->get_strategy()->get_name() == "load_balance") { -// components::fill_array(exec, c->get_values(), -// c->get_num_stored_elements(), -// zero()); -// const IndexType nwarps = a->get_num_srow_elements(); -// if (nwarps > 0) { -// const dim3 fbcsr_block(config::warp_size, warps_in_block, 1); -// const dim3 fbcsr_grid(ceildiv(nwarps, warps_in_block), -// b->get_size()[1]); -// kernel::abstract_spmv<<>>( -// nwarps, static_cast(a->get_size()[0]), -// as_cuda_type(a->get_const_values()), a->get_const_col_idxs(), -// as_cuda_type(a->get_const_row_ptrs()), -// as_cuda_type(a->get_const_srow()), -// as_cuda_type(b->get_const_values()), -// as_cuda_type(b->get_stride()), as_cuda_type(c->get_values()), -// as_cuda_type(c->get_stride())); -// } else { -// GKO_NOT_SUPPORTED(nwarps); -// } -// } else if (a->get_strategy()->get_name() == "merge_path") { -// int items_per_thread = -// host_kernel::compute_items_per_thread(exec); -// host_kernel::select_merge_path_spmv( -// compiled_kernels(), -// [&items_per_thread](int compiled_info) { -// return items_per_thread == compiled_info; -// }, -// syn::value_list(), syn::type_list<>(), exec, a, b, c); -// } else if (a->get_strategy()->get_name() == "classical") { -// IndexType max_length_per_row = 0; -// using Tfbcsr = matrix::Fbcsr; -// if (auto strategy = -// std::dynamic_pointer_cast( -// a->get_strategy())) { -// max_length_per_row = strategy->get_max_length_per_row(); -// } else if (auto strategy = std::dynamic_pointer_cast< -// const typename Tfbcsr::automatical>(a->get_strategy())) -// { -// max_length_per_row = strategy->get_max_length_per_row(); -// } else { -// GKO_NOT_SUPPORTED(a->get_strategy()); -// } -// host_kernel::select_classical_spmv( -// classical_kernels(), -// [&max_length_per_row](int compiled_info) { -// return max_length_per_row >= compiled_info; -// }, -// syn::value_list(), syn::type_list<>(), exec, a, b, c); -// } else if (a->get_strategy()->get_name() == "sparselib" || -// a->get_strategy()->get_name() == "cusparse") { -// if (cusparse::is_supported::value) { -// // TODO: add implementation for int64 and multiple RHS -// auto handle = exec->get_cusparse_handle(); -// { -// cusparse::pointer_mode_guard pm_guard(handle); -// const auto alpha = one(); -// const auto beta = zero(); -// // TODO: add implementation for int64 and multiple RHS -// if (b->get_stride() != 1 || c->get_stride() != 1) -// GKO_NOT_IMPLEMENTED; -// -//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) -// auto descr = cusparse::create_mat_descr(); -// auto row_ptrs = a->get_const_row_ptrs(); -// auto col_idxs = a->get_const_col_idxs(); -// cusparse::spmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, -// a->get_size()[0], a->get_size()[1], -// a->get_num_stored_elements(), &alpha, descr, -// a->get_const_values(), row_ptrs, col_idxs, -// b->get_const_values(), &beta, c->get_values()); -// -// cusparse::destroy(descr); -//#else // CUDA_VERSION >= 11000 -// cusparseOperation_t trans = CUSPARSE_OPERATION_NON_TRANSPOSE; -// cusparseSpMVAlg_t alg = CUSPARSE_FBCSRMV_ALG1; -// auto row_ptrs = -// const_cast(a->get_const_row_ptrs()); -// auto col_idxs = -// const_cast(a->get_const_col_idxs()); -// auto values = const_cast(a->get_const_values()); -// auto mat = cusparse::create_fbcsr( -// a->get_size()[0], a->get_size()[1], -// a->get_num_stored_elements(), row_ptrs, col_idxs, values); -// auto b_val = const_cast(b->get_const_values()); -// auto c_val = c->get_values(); -// auto vecb = -// cusparse::create_dnvec(b->get_num_stored_elements(), -// b_val); -// auto vecc = -// cusparse::create_dnvec(c->get_num_stored_elements(), -// c_val); -// size_type buffer_size = 0; -// cusparse::spmv_buffersize(handle, trans, &alpha, -// mat, -// vecb, &beta, vecc, alg, -// &buffer_size); -// -// gko::Array buffer_array(exec, buffer_size); -// auto buffer = buffer_array.get_data(); -// cusparse::spmv(handle, trans, &alpha, mat, vecb, -// &beta, vecc, alg, buffer); -// cusparse::destroy(vecb); -// cusparse::destroy(vecc); -// cusparse::destroy(mat); -//#endif -// } -// } else { -// GKO_NOT_IMPLEMENTED; -// } -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); @@ -384,489 +107,11 @@ void advanced_spmv(std::shared_ptr exec, const matrix::Dense *b, const matrix::Dense *beta, matrix::Dense *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// if (a->get_strategy()->get_name() == "load_balance") { -// dense::scale(exec, beta, c); -// -// const IndexType nwarps = a->get_num_srow_elements(); -// -// if (nwarps > 0) { -// const dim3 fbcsr_block(config::warp_size, warps_in_block, 1); -// const dim3 fbcsr_grid(ceildiv(nwarps, warps_in_block), -// b->get_size()[1]); -// kernel::abstract_spmv<<>>( -// nwarps, static_cast(a->get_size()[0]), -// as_cuda_type(alpha->get_const_values()), -// as_cuda_type(a->get_const_values()), a->get_const_col_idxs(), -// as_cuda_type(a->get_const_row_ptrs()), -// as_cuda_type(a->get_const_srow()), -// as_cuda_type(b->get_const_values()), -// as_cuda_type(b->get_stride()), as_cuda_type(c->get_values()), -// as_cuda_type(c->get_stride())); -// } else { -// GKO_NOT_SUPPORTED(nwarps); -// } -// } else if (a->get_strategy()->get_name() == "sparselib" || -// a->get_strategy()->get_name() == "cusparse") { -// if (cusparse::is_supported::value) { -// // TODO: add implementation for int64 and multiple RHS -// if (b->get_stride() != 1 || c->get_stride() != 1) -// GKO_NOT_IMPLEMENTED; -// -//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) -// auto descr = cusparse::create_mat_descr(); -// auto row_ptrs = a->get_const_row_ptrs(); -// auto col_idxs = a->get_const_col_idxs(); -// cusparse::spmv(exec->get_cusparse_handle(), -// CUSPARSE_OPERATION_NON_TRANSPOSE, a->get_size()[0], -// a->get_size()[1], a->get_num_stored_elements(), -// alpha->get_const_values(), descr, -// a->get_const_values(), row_ptrs, col_idxs, -// b->get_const_values(), beta->get_const_values(), -// c->get_values()); -// -// cusparse::destroy(descr); -//#else // CUDA_VERSION >= 11000 -// cusparseOperation_t trans = CUSPARSE_OPERATION_NON_TRANSPOSE; -// cusparseSpMVAlg_t alg = CUSPARSE_FBCSRMV_ALG1; -// auto row_ptrs = const_cast(a->get_const_row_ptrs()); -// auto col_idxs = const_cast(a->get_const_col_idxs()); -// auto values = const_cast(a->get_const_values()); -// auto mat = cusparse::create_fbcsr(a->get_size()[0], -// a->get_size()[1], -// a->get_num_stored_elements(), -// row_ptrs, col_idxs, values); -// auto b_val = const_cast(b->get_const_values()); -// auto c_val = c->get_values(); -// auto vecb = -// cusparse::create_dnvec(b->get_num_stored_elements(), b_val); -// auto vecc = -// cusparse::create_dnvec(c->get_num_stored_elements(), c_val); -// size_type buffer_size = 0; -// cusparse::spmv_buffersize( -// exec->get_cusparse_handle(), trans, alpha->get_const_values(), -// mat, vecb, beta->get_const_values(), vecc, alg, &buffer_size); -// gko::Array buffer_array(exec, buffer_size); -// auto buffer = buffer_array.get_data(); -// cusparse::spmv( -// exec->get_cusparse_handle(), trans, alpha->get_const_values(), -// mat, vecb, beta->get_const_values(), vecc, alg, buffer); -// cusparse::destroy(vecb); -// cusparse::destroy(vecc); -// cusparse::destroy(mat); -//#endif -// } else { -// GKO_NOT_IMPLEMENTED; -// } -// } else if (a->get_strategy()->get_name() == "classical") { -// IndexType max_length_per_row = 0; -// using Tfbcsr = matrix::Fbcsr; -// if (auto strategy = -// std::dynamic_pointer_cast( -// a->get_strategy())) { -// max_length_per_row = strategy->get_max_length_per_row(); -// } else if (auto strategy = std::dynamic_pointer_cast< -// const typename Tfbcsr::automatical>(a->get_strategy())) -// { -// max_length_per_row = strategy->get_max_length_per_row(); -// } else { -// GKO_NOT_SUPPORTED(a->get_strategy()); -// } -// host_kernel::select_classical_spmv( -// classical_kernels(), -// [&max_length_per_row](int compiled_info) { -// return max_length_per_row >= compiled_info; -// }, -// syn::value_list(), syn::type_list<>(), exec, a, b, c, alpha, -// beta); -// } else if (a->get_strategy()->get_name() == "merge_path") { -// int items_per_thread = -// host_kernel::compute_items_per_thread(exec); -// host_kernel::select_merge_path_spmv( -// compiled_kernels(), -// [&items_per_thread](int compiled_info) { -// return items_per_thread == compiled_info; -// }, -// syn::value_list(), syn::type_list<>(), exec, a, b, c, alpha, -// beta); -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); -template -void spgemm(std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto a_nnz = IndexType(a->get_num_stored_elements()); -// auto a_vals = a->get_const_values(); -// auto a_row_ptrs = a->get_const_row_ptrs(); -// auto a_col_idxs = a->get_const_col_idxs(); -// auto b_vals = b->get_const_values(); -// auto b_row_ptrs = b->get_const_row_ptrs(); -// auto b_col_idxs = b->get_const_col_idxs(); -// auto c_row_ptrs = c->get_row_ptrs(); -// -// if (cusparse::is_supported::value) { -// auto handle = exec->get_cusparse_handle(); -// cusparse::pointer_mode_guard pm_guard(handle); -// -// auto alpha = one(); -// auto a_nnz = static_cast(a->get_num_stored_elements()); -// auto b_nnz = static_cast(b->get_num_stored_elements()); -// auto null_value = static_cast(nullptr); -// auto null_index = static_cast(nullptr); -// auto zero_nnz = IndexType{}; -// auto m = IndexType(a->get_size()[0]); -// auto n = IndexType(b->get_size()[1]); -// auto k = IndexType(a->get_size()[1]); -// matrix::FbcsrBuilder c_builder{c}; -// auto &c_col_idxs_array = c_builder.get_col_idx_array(); -// auto &c_vals_array = c_builder.get_value_array(); -// -//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) -// auto a_descr = cusparse::create_mat_descr(); -// auto b_descr = cusparse::create_mat_descr(); -// auto c_descr = cusparse::create_mat_descr(); -// auto d_descr = cusparse::create_mat_descr(); -// auto info = cusparse::create_spgemm_info(); -// // allocate buffer -// size_type buffer_size{}; -// cusparse::spgemm_buffer_size( -// handle, m, n, k, &alpha, a_descr, a_nnz, a_row_ptrs, a_col_idxs, -// b_descr, b_nnz, b_row_ptrs, b_col_idxs, null_value, d_descr, -// zero_nnz, null_index, null_index, info, buffer_size); -// Array buffer_array(exec, buffer_size); -// auto buffer = buffer_array.get_data(); -// -// // count nnz -// IndexType c_nnz{}; -// cusparse::spgemm_nnz(handle, m, n, k, a_descr, a_nnz, a_row_ptrs, -// a_col_idxs, b_descr, b_nnz, b_row_ptrs, -// b_col_idxs, d_descr, zero_nnz, null_index, -// null_index, c_descr, c_row_ptrs, &c_nnz, info, -// buffer); -// -// // accumulate non-zeros -// c_col_idxs_array.resize_and_reset(c_nnz); -// c_vals_array.resize_and_reset(c_nnz); -// auto c_col_idxs = c_col_idxs_array.get_data(); -// auto c_vals = c_vals_array.get_data(); -// cusparse::spgemm(handle, m, n, k, &alpha, a_descr, a_nnz, a_vals, -// a_row_ptrs, a_col_idxs, b_descr, b_nnz, b_vals, -// b_row_ptrs, b_col_idxs, null_value, d_descr, -// zero_nnz, null_value, null_index, null_index, -// c_descr, c_vals, c_row_ptrs, c_col_idxs, info, -// buffer); -// -// cusparse::destroy(info); -// cusparse::destroy(d_descr); -// cusparse::destroy(c_descr); -// cusparse::destroy(b_descr); -// cusparse::destroy(a_descr); -// -//#else // CUDA_VERSION >= 11000 -// const auto beta = zero(); -// auto spgemm_descr = cusparse::create_spgemm_descr(); -// auto a_descr = cusparse::create_fbcsr(m, k, a_nnz, -// const_cast(a_row_ptrs), -// const_cast(a_col_idxs), -// const_cast(a_vals)); -// auto b_descr = cusparse::create_fbcsr(k, n, b_nnz, -// const_cast(b_row_ptrs), -// const_cast(b_col_idxs), -// const_cast(b_vals)); -// auto c_descr = cusparse::create_fbcsr(m, n, zero_nnz, null_index, -// null_index, null_value); -// -// // estimate work -// size_type buffer1_size{}; -// cusparse::spgemm_work_estimation(handle, &alpha, a_descr, b_descr, -// &beta, c_descr, spgemm_descr, -// buffer1_size, nullptr); -// Array buffer1{exec, buffer1_size}; -// cusparse::spgemm_work_estimation(handle, &alpha, a_descr, b_descr, -// &beta, c_descr, spgemm_descr, -// buffer1_size, buffer1.get_data()); -// -// // compute spgemm -// size_type buffer2_size{}; -// cusparse::spgemm_compute(handle, &alpha, a_descr, b_descr, &beta, -// c_descr, spgemm_descr, buffer1.get_data(), -// buffer2_size, nullptr); -// Array buffer2{exec, buffer2_size}; -// cusparse::spgemm_compute(handle, &alpha, a_descr, b_descr, &beta, -// c_descr, spgemm_descr, buffer1.get_data(), -// buffer2_size, buffer2.get_data()); -// -// // copy data to result -// auto c_nnz = cusparse::sparse_matrix_nnz(c_descr); -// c_col_idxs_array.resize_and_reset(c_nnz); -// c_vals_array.resize_and_reset(c_nnz); -// cusparse::fbcsr_set_pointers(c_descr, c_row_ptrs, -// c_col_idxs_array.get_data(), -// c_vals_array.get_data()); -// -// cusparse::spgemm_copy(handle, &alpha, a_descr, b_descr, &beta, -// c_descr, -// spgemm_descr); -// -// cusparse::destroy(c_descr); -// cusparse::destroy(b_descr); -// cusparse::destroy(a_descr); -// cusparse::destroy(spgemm_descr); -//#endif // CUDA_VERSION >= 11000 -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEMM_KERNEL); - - -namespace { - - -template -void spgeam(syn::value_list, - std::shared_ptr exec, const ValueType *alpha, - const IndexType *a_row_ptrs, const IndexType *a_col_idxs, - const ValueType *a_vals, const ValueType *beta, - const IndexType *b_row_ptrs, const IndexType *b_col_idxs, - const ValueType *b_vals, - matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto m = static_cast(c->get_size()[0]); -// auto c_row_ptrs = c->get_row_ptrs(); -// // count nnz for alpha * A + beta * B -// auto subwarps_per_block = default_block_size / subwarp_size; -// auto num_blocks = ceildiv(m, subwarps_per_block); -// kernel::spgeam_nnz<<>>( -// a_row_ptrs, a_col_idxs, b_row_ptrs, b_col_idxs, m, c_row_ptrs); -// -// // build row pointers -// components::prefix_sum(exec, c_row_ptrs, m + 1); -// -// // accumulate non-zeros for alpha * A + beta * B -// matrix::FbcsrBuilder c_builder{c}; -// auto c_nnz = exec->copy_val_to_host(c_row_ptrs + m); -// c_builder.get_col_idx_array().resize_and_reset(c_nnz); -// c_builder.get_value_array().resize_and_reset(c_nnz); -// auto c_col_idxs = c->get_col_idxs(); -// auto c_vals = c->get_values(); -// kernel::spgeam<<>>( -// as_cuda_type(alpha), a_row_ptrs, a_col_idxs, as_cuda_type(a_vals), -// as_cuda_type(beta), b_row_ptrs, b_col_idxs, as_cuda_type(b_vals), m, -// c_row_ptrs, c_col_idxs, as_cuda_type(c_vals)); -//} - -GKO_ENABLE_IMPLEMENTATION_SELECTION(select_spgeam, spgeam); - - -} // namespace - - -template -void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - const matrix::Dense *beta, - const matrix::Fbcsr *d, - matrix::Fbcsr *c) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// if (cusparse::is_supported::value) { -// auto handle = exec->get_cusparse_handle(); -// cusparse::pointer_mode_guard pm_guard(handle); -// -// auto valpha = exec->copy_val_to_host(alpha->get_const_values()); -// auto a_nnz = IndexType(a->get_num_stored_elements()); -// auto a_vals = a->get_const_values(); -// auto a_row_ptrs = a->get_const_row_ptrs(); -// auto a_col_idxs = a->get_const_col_idxs(); -// auto b_nnz = IndexType(b->get_num_stored_elements()); -// auto b_vals = b->get_const_values(); -// auto b_row_ptrs = b->get_const_row_ptrs(); -// auto b_col_idxs = b->get_const_col_idxs(); -// auto vbeta = exec->copy_val_to_host(beta->get_const_values()); -// auto d_nnz = IndexType(d->get_num_stored_elements()); -// auto d_vals = d->get_const_values(); -// auto d_row_ptrs = d->get_const_row_ptrs(); -// auto d_col_idxs = d->get_const_col_idxs(); -// auto m = IndexType(a->get_size()[0]); -// auto n = IndexType(b->get_size()[1]); -// auto k = IndexType(a->get_size()[1]); -// auto c_row_ptrs = c->get_row_ptrs(); -// -//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) -// matrix::FbcsrBuilder c_builder{c}; -// auto &c_col_idxs_array = c_builder.get_col_idx_array(); -// auto &c_vals_array = c_builder.get_value_array(); -// auto a_descr = cusparse::create_mat_descr(); -// auto b_descr = cusparse::create_mat_descr(); -// auto c_descr = cusparse::create_mat_descr(); -// auto d_descr = cusparse::create_mat_descr(); -// auto info = cusparse::create_spgemm_info(); -// // allocate buffer -// size_type buffer_size{}; -// cusparse::spgemm_buffer_size( -// handle, m, n, k, &valpha, a_descr, a_nnz, a_row_ptrs, a_col_idxs, -// b_descr, b_nnz, b_row_ptrs, b_col_idxs, &vbeta, d_descr, d_nnz, -// d_row_ptrs, d_col_idxs, info, buffer_size); -// Array buffer_array(exec, buffer_size); -// auto buffer = buffer_array.get_data(); -// -// // count nnz -// IndexType c_nnz{}; -// cusparse::spgemm_nnz(handle, m, n, k, a_descr, a_nnz, a_row_ptrs, -// a_col_idxs, b_descr, b_nnz, b_row_ptrs, -// b_col_idxs, d_descr, d_nnz, d_row_ptrs, -// d_col_idxs, c_descr, c_row_ptrs, &c_nnz, info, -// buffer); -// -// // accumulate non-zeros -// c_col_idxs_array.resize_and_reset(c_nnz); -// c_vals_array.resize_and_reset(c_nnz); -// auto c_col_idxs = c_col_idxs_array.get_data(); -// auto c_vals = c_vals_array.get_data(); -// cusparse::spgemm(handle, m, n, k, &valpha, a_descr, a_nnz, a_vals, -// a_row_ptrs, a_col_idxs, b_descr, b_nnz, b_vals, -// b_row_ptrs, b_col_idxs, &vbeta, d_descr, d_nnz, -// d_vals, d_row_ptrs, d_col_idxs, c_descr, c_vals, -// c_row_ptrs, c_col_idxs, info, buffer); -// -// cusparse::destroy(info); -// cusparse::destroy(d_descr); -// cusparse::destroy(c_descr); -// cusparse::destroy(b_descr); -// cusparse::destroy(a_descr); -//#else // CUDA_VERSION >= 11000 -// auto null_value = static_cast(nullptr); -// auto null_index = static_cast(nullptr); -// auto one_val = one(); -// auto zero_val = zero(); -// auto zero_nnz = IndexType{}; -// auto spgemm_descr = cusparse::create_spgemm_descr(); -// auto a_descr = cusparse::create_fbcsr(m, k, a_nnz, -// const_cast(a_row_ptrs), -// const_cast(a_col_idxs), -// const_cast(a_vals)); -// auto b_descr = cusparse::create_fbcsr(k, n, b_nnz, -// const_cast(b_row_ptrs), -// const_cast(b_col_idxs), -// const_cast(b_vals)); -// auto c_descr = cusparse::create_fbcsr(m, n, zero_nnz, null_index, -// null_index, null_value); -// -// // estimate work -// size_type buffer1_size{}; -// cusparse::spgemm_work_estimation(handle, &one_val, a_descr, b_descr, -// &zero_val, c_descr, spgemm_descr, -// buffer1_size, nullptr); -// Array buffer1{exec, buffer1_size}; -// cusparse::spgemm_work_estimation(handle, &one_val, a_descr, b_descr, -// &zero_val, c_descr, spgemm_descr, -// buffer1_size, buffer1.get_data()); -// -// // compute spgemm -// size_type buffer2_size{}; -// cusparse::spgemm_compute(handle, &one_val, a_descr, b_descr, -// &zero_val, -// c_descr, spgemm_descr, buffer1.get_data(), -// buffer2_size, nullptr); -// Array buffer2{exec, buffer2_size}; -// cusparse::spgemm_compute(handle, &one_val, a_descr, b_descr, -// &zero_val, -// c_descr, spgemm_descr, buffer1.get_data(), -// buffer2_size, buffer2.get_data()); -// -// // write result to temporary storage -// auto c_tmp_nnz = cusparse::sparse_matrix_nnz(c_descr); -// Array c_tmp_row_ptrs_array(exec, m + 1); -// Array c_tmp_col_idxs_array(exec, c_tmp_nnz); -// Array c_tmp_vals_array(exec, c_tmp_nnz); -// cusparse::fbcsr_set_pointers(c_descr, c_tmp_row_ptrs_array.get_data(), -// c_tmp_col_idxs_array.get_data(), -// c_tmp_vals_array.get_data()); -// -// cusparse::spgemm_copy(handle, &one_val, a_descr, b_descr, &zero_val, -// c_descr, spgemm_descr); -// -// cusparse::destroy(c_descr); -// cusparse::destroy(b_descr); -// cusparse::destroy(a_descr); -// cusparse::destroy(spgemm_descr); -// -// auto spgeam_total_nnz = c_tmp_nnz + d->get_num_stored_elements(); -// auto nnz_per_row = spgeam_total_nnz / m; -// select_spgeam( -// spgeam_kernels(), -// [&](int compiled_subwarp_size) { -// return compiled_subwarp_size >= nnz_per_row || -// compiled_subwarp_size == config::warp_size; -// }, -// syn::value_list(), syn::type_list<>(), exec, -// alpha->get_const_values(), c_tmp_row_ptrs_array.get_const_data(), -// c_tmp_col_idxs_array.get_const_data(), -// c_tmp_vals_array.get_const_data(), beta->get_const_values(), -// d_row_ptrs, d_col_idxs, d_vals, c); -//#endif // CUDA_VERSION >= 11000 -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL); - - -template -void spgeam(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Dense *beta, - const matrix::Fbcsr *b, - matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto total_nnz = -// a->get_num_stored_elements() + b->get_num_stored_elements(); -// auto nnz_per_row = total_nnz / a->get_size()[0]; -// select_spgeam( -// spgeam_kernels(), -// [&](int compiled_subwarp_size) { -// return compiled_subwarp_size >= nnz_per_row || -// compiled_subwarp_size == config::warp_size; -// }, -// syn::value_list(), syn::type_list<>(), exec, -// alpha->get_const_values(), a->get_const_row_ptrs(), -// a->get_const_col_idxs(), a->get_const_values(), -// beta->get_const_values(), b->get_const_row_ptrs(), -// b->get_const_col_idxs(), b->get_const_values(), c); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEAM_KERNEL); - - template void convert_row_ptrs_to_idxs(std::shared_ptr exec, const IndexType *ptrs, size_type num_rows, @@ -880,50 +125,10 @@ void convert_row_ptrs_to_idxs(std::shared_ptr exec, //} -template -void convert_to_coo(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Coo *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = result->get_size()[0]; -// -// auto row_idxs = result->get_row_idxs(); -// const auto source_row_ptrs = source->get_const_row_ptrs(); -// -// convert_row_ptrs_to_idxs(exec, source_row_ptrs, num_rows, row_idxs); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL); - - template void convert_to_dense(std::shared_ptr exec, const matrix::Fbcsr *source, matrix::Dense *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto num_rows = result->get_size()[0]; -// const auto num_cols = result->get_size()[1]; -// const auto stride = result->get_stride(); -// const auto row_ptrs = source->get_const_row_ptrs(); -// const auto col_idxs = source->get_const_col_idxs(); -// const auto vals = source->get_const_values(); -// -// const dim3 block_size(config::warp_size, -// config::max_block_size / config::warp_size, 1); -// const dim3 init_grid_dim(ceildiv(num_cols, block_size.x), -// ceildiv(num_rows, block_size.y), 1); -// kernel::initialize_zero_dense<<>>( -// num_rows, num_cols, stride, as_cuda_type(result->get_values())); -// -// auto grid_dim = ceildiv(num_rows, default_block_size); -// kernel::fill_in_dense<<>>( -// num_rows, as_cuda_type(row_ptrs), as_cuda_type(col_idxs), -// as_cuda_type(vals), stride, as_cuda_type(result->get_values())); -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); @@ -939,203 +144,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); -template -void convert_to_sellp(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Sellp *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto num_rows = result->get_size()[0]; -// const auto num_cols = result->get_size()[1]; -// -// auto result_values = result->get_values(); -// auto result_col_idxs = result->get_col_idxs(); -// auto slice_lengths = result->get_slice_lengths(); -// auto slice_sets = result->get_slice_sets(); -// -// const auto slice_size = (result->get_slice_size() == 0) -// ? matrix::default_slice_size -// : result->get_slice_size(); -// const auto stride_factor = (result->get_stride_factor() == 0) -// ? matrix::default_stride_factor -// : result->get_stride_factor(); -// const int slice_num = ceildiv(num_rows, slice_size); -// -// const auto source_values = source->get_const_values(); -// const auto source_row_ptrs = source->get_const_row_ptrs(); -// const auto source_col_idxs = source->get_const_col_idxs(); -// -// auto nnz_per_row = Array(exec, num_rows); -// auto grid_dim = ceildiv(num_rows, default_block_size); -// -// if (grid_dim > 0) { -// kernel::calculate_nnz_per_row<<>>( -// num_rows, as_cuda_type(source_row_ptrs), -// as_cuda_type(nnz_per_row.get_data())); -// } -// -// grid_dim = slice_num; -// -// if (grid_dim > 0) { -// kernel::calculate_slice_lengths<<>>( -// num_rows, slice_size, stride_factor, -// as_cuda_type(nnz_per_row.get_const_data()), -// as_cuda_type(slice_lengths), as_cuda_type(slice_sets)); -// } -// -// components::prefix_sum(exec, slice_sets, slice_num + 1); -// -// grid_dim = ceildiv(num_rows, default_block_size); -// if (grid_dim > 0) { -// kernel::fill_in_sellp<<>>( -// num_rows, slice_size, as_cuda_type(source_values), -// as_cuda_type(source_row_ptrs), as_cuda_type(source_col_idxs), -// as_cuda_type(slice_lengths), as_cuda_type(slice_sets), -// as_cuda_type(result_col_idxs), as_cuda_type(result_values)); -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL); - - -template -void convert_to_ell(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Ell *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto source_values = source->get_const_values(); -// const auto source_row_ptrs = source->get_const_row_ptrs(); -// const auto source_col_idxs = source->get_const_col_idxs(); -// -// auto result_values = result->get_values(); -// auto result_col_idxs = result->get_col_idxs(); -// const auto stride = result->get_stride(); -// const auto max_nnz_per_row = result->get_num_stored_elements_per_row(); -// const auto num_rows = result->get_size()[0]; -// const auto num_cols = result->get_size()[1]; -// -// const auto init_grid_dim = -// ceildiv(max_nnz_per_row * num_rows, default_block_size); -// -// kernel::initialize_zero_ell<<>>( -// max_nnz_per_row, stride, as_cuda_type(result_values), -// as_cuda_type(result_col_idxs)); -// -// const auto grid_dim = -// ceildiv(num_rows * config::warp_size, default_block_size); -// -// kernel::fill_in_ell<<>>( -// num_rows, stride, as_cuda_type(source_values), -// as_cuda_type(source_row_ptrs), as_cuda_type(source_col_idxs), -// as_cuda_type(result_values), as_cuda_type(result_col_idxs)); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL); - - -template -void calculate_total_cols(std::shared_ptr exec, - const matrix::Fbcsr *source, - size_type *result, size_type stride_factor, - size_type slice_size) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto num_rows = source->get_size()[0]; -// -// if (num_rows == 0) { -// *result = 0; -// return; -// } -// -// const auto slice_num = ceildiv(num_rows, slice_size); -// const auto row_ptrs = source->get_const_row_ptrs(); -// -// auto nnz_per_row = Array(exec, num_rows); -// auto grid_dim = ceildiv(num_rows, default_block_size); -// -// kernel::calculate_nnz_per_row<<>>( -// num_rows, as_cuda_type(row_ptrs), -// as_cuda_type(nnz_per_row.get_data())); -// -// grid_dim = ceildiv(slice_num * config::warp_size, default_block_size); -// auto max_nnz_per_slice = Array(exec, slice_num); -// -// kernel::reduce_max_nnz_per_slice<<>>( -// num_rows, slice_size, stride_factor, -// as_cuda_type(nnz_per_row.get_const_data()), -// as_cuda_type(max_nnz_per_slice.get_data())); -// -// grid_dim = ceildiv(slice_num, default_block_size); -// auto block_results = Array(exec, grid_dim); -// -// kernel::reduce_total_cols<<>>( -// slice_num, as_cuda_type(max_nnz_per_slice.get_const_data()), -// as_cuda_type(block_results.get_data())); -// -// auto d_result = Array(exec, 1); -// -// kernel::reduce_total_cols<<<1, default_block_size>>>( -// grid_dim, as_cuda_type(block_results.get_const_data()), -// as_cuda_type(d_result.get_data())); -// -// *result = exec->copy_val_to_host(d_result.get_const_data()); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL); - - template void transpose(std::shared_ptr exec, const matrix::Fbcsr *orig, matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// if (cusparse::is_supported::value) { -//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) -// cusparseAction_t copyValues = CUSPARSE_ACTION_NUMERIC; -// cusparseIndexBase_t idxBase = CUSPARSE_INDEX_BASE_ZERO; -// -// cusparse::transpose( -// exec->get_cusparse_handle(), orig->get_size()[0], -// orig->get_size()[1], orig->get_num_stored_elements(), -// orig->get_const_values(), orig->get_const_row_ptrs(), -// orig->get_const_col_idxs(), trans->get_values(), -// trans->get_row_ptrs(), trans->get_col_idxs(), copyValues, -// idxBase); -//#else // CUDA_VERSION >= 11000 -// cudaDataType_t cu_value = -// gko::kernels::cuda::cuda_data_type(); -// cusparseAction_t copyValues = CUSPARSE_ACTION_NUMERIC; -// cusparseIndexBase_t idxBase = CUSPARSE_INDEX_BASE_ZERO; -// cusparseFbcsr2CscAlg_t alg = CUSPARSE_FBCSR2CSC_ALG1; -// size_type buffer_size = 0; -// cusparse::transpose_buffersize( -// exec->get_cusparse_handle(), orig->get_size()[0], -// orig->get_size()[1], orig->get_num_stored_elements(), -// orig->get_const_values(), orig->get_const_row_ptrs(), -// orig->get_const_col_idxs(), trans->get_values(), -// trans->get_row_ptrs(), trans->get_col_idxs(), cu_value, -// copyValues, idxBase, alg, &buffer_size); -// Array buffer_array(exec, buffer_size); -// auto buffer = buffer_array.get_data(); -// cusparse::transpose( -// exec->get_cusparse_handle(), orig->get_size()[0], -// orig->get_size()[1], orig->get_num_stored_elements(), -// orig->get_const_values(), orig->get_const_row_ptrs(), -// orig->get_const_col_idxs(), trans->get_values(), -// trans->get_row_ptrs(), trans->get_col_idxs(), cu_value, -// copyValues, idxBase, alg, buffer); -//#endif -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); @@ -1146,94 +158,11 @@ void conj_transpose(std::shared_ptr exec, const matrix::Fbcsr *orig, matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// if (cusparse::is_supported::value) { -// const dim3 block_size(default_block_size, 1, 1); -// const dim3 grid_size( -// ceildiv(trans->get_num_stored_elements(), block_size.x), 1, 1); -// -//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) -// cusparseAction_t copyValues = CUSPARSE_ACTION_NUMERIC; -// cusparseIndexBase_t idxBase = CUSPARSE_INDEX_BASE_ZERO; -// -// cusparse::transpose( -// exec->get_cusparse_handle(), orig->get_size()[0], -// orig->get_size()[1], orig->get_num_stored_elements(), -// orig->get_const_values(), orig->get_const_row_ptrs(), -// orig->get_const_col_idxs(), trans->get_values(), -// trans->get_row_ptrs(), trans->get_col_idxs(), copyValues, -// idxBase); -//#else // CUDA_VERSION >= 11000 -// cudaDataType_t cu_value = -// gko::kernels::cuda::cuda_data_type(); -// cusparseAction_t copyValues = CUSPARSE_ACTION_NUMERIC; -// cusparseIndexBase_t idxBase = CUSPARSE_INDEX_BASE_ZERO; -// cusparseFbcsr2CscAlg_t alg = CUSPARSE_FBCSR2CSC_ALG1; -// size_type buffer_size = 0; -// cusparse::transpose_buffersize( -// exec->get_cusparse_handle(), orig->get_size()[0], -// orig->get_size()[1], orig->get_num_stored_elements(), -// orig->get_const_values(), orig->get_const_row_ptrs(), -// orig->get_const_col_idxs(), trans->get_values(), -// trans->get_row_ptrs(), trans->get_col_idxs(), cu_value, -// copyValues, idxBase, alg, &buffer_size); -// Array buffer_array(exec, buffer_size); -// auto buffer = buffer_array.get_data(); -// cusparse::transpose( -// exec->get_cusparse_handle(), orig->get_size()[0], -// orig->get_size()[1], orig->get_num_stored_elements(), -// orig->get_const_values(), orig->get_const_row_ptrs(), -// orig->get_const_col_idxs(), trans->get_values(), -// trans->get_row_ptrs(), trans->get_col_idxs(), cu_value, -// copyValues, idxBase, alg, buffer); -//#endif -// -// conjugate_kernel<<>>( -// trans->get_num_stored_elements(), -// as_cuda_type(trans->get_values())); -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); -template -void row_permute(std::shared_ptr exec, - const IndexType *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *row_permuted) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); - - -template -void inverse_row_permute(std::shared_ptr exec, - const IndexType *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *row_permuted) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL); - - -template -void inverse_column_permute( - std::shared_ptr exec, - const IndexType *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL); - - template void calculate_max_nnz_per_row( std::shared_ptr exec, @@ -1269,52 +198,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); -template -void convert_to_hybrid(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Hybrid *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto ell_val = result->get_ell_values(); -// auto ell_col = result->get_ell_col_idxs(); -// auto coo_val = result->get_coo_values(); -// auto coo_col = result->get_coo_col_idxs(); -// auto coo_row = result->get_coo_row_idxs(); -// const auto stride = result->get_ell_stride(); -// const auto max_nnz_per_row = -// result->get_ell_num_stored_elements_per_row(); const auto num_rows = -// result->get_size()[0]; const auto coo_num_stored_elements = -// result->get_coo_num_stored_elements(); auto grid_dim = -// ceildiv(max_nnz_per_row * num_rows, default_block_size); -// -// kernel::initialize_zero_ell<<>>( -// max_nnz_per_row, stride, as_cuda_type(ell_val), -// as_cuda_type(ell_col)); -// -// grid_dim = ceildiv(num_rows, default_block_size); -// auto coo_offset = Array(exec, num_rows); -// kernel::calculate_hybrid_coo_row_nnz<<>>( -// num_rows, max_nnz_per_row, as_cuda_type(source->get_const_row_ptrs()), -// as_cuda_type(coo_offset.get_data())); -// -// components::prefix_sum(exec, coo_offset.get_data(), num_rows); -// -// grid_dim = ceildiv(num_rows * config::warp_size, default_block_size); -// kernel::fill_in_hybrid<<>>( -// num_rows, stride, max_nnz_per_row, -// as_cuda_type(source->get_const_values()), -// as_cuda_type(source->get_const_row_ptrs()), -// as_cuda_type(source->get_const_col_idxs()), -// as_cuda_type(coo_offset.get_const_data()), as_cuda_type(ell_val), -// as_cuda_type(ell_col), as_cuda_type(coo_val), as_cuda_type(coo_col), -// as_cuda_type(coo_row)); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL); - - template void calculate_nonzeros_per_row( std::shared_ptr exec, @@ -1338,54 +221,6 @@ template void sort_by_column_index(std::shared_ptr exec, matrix::Fbcsr *to_sort) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// if (cusparse::is_supported::value) { -// auto handle = exec->get_cusparse_handle(); -// auto descr = cusparse::create_mat_descr(); -// auto m = IndexType(to_sort->get_size()[0]); -// auto n = IndexType(to_sort->get_size()[1]); -// auto nnz = IndexType(to_sort->get_num_stored_elements()); -// auto row_ptrs = to_sort->get_const_row_ptrs(); -// auto col_idxs = to_sort->get_col_idxs(); -// auto vals = to_sort->get_values(); -// -// // copy values -// Array tmp_vals_array(exec, nnz); -// exec->copy(nnz, vals, tmp_vals_array.get_data()); -// auto tmp_vals = tmp_vals_array.get_const_data(); -// -// // init identity permutation -// Array permutation_array(exec, nnz); -// auto permutation = permutation_array.get_data(); -// cusparse::create_identity_permutation(handle, nnz, permutation); -// -// // allocate buffer -// size_type buffer_size{}; -// cusparse::fbcsrsort_buffer_size(handle, m, n, nnz, row_ptrs, col_idxs, -// buffer_size); -// Array buffer_array{exec, buffer_size}; -// auto buffer = buffer_array.get_data(); -// -// // sort column indices -// cusparse::fbcsrsort(handle, m, n, nnz, descr, row_ptrs, col_idxs, -// permutation, buffer); -// -// // sort values -//#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) -// cusparse::gather(handle, nnz, tmp_vals, vals, permutation); -//#else // CUDA_VERSION >= 11000 -// auto val_vec = cusparse::create_spvec(nnz, nnz, permutation, vals); -// auto tmp_vec = -// cusparse::create_dnvec(nnz, const_cast(tmp_vals)); -// cusparse::gather(handle, tmp_vec, val_vec); -//#endif -// -// cusparse::destroy(descr); -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); @@ -1418,23 +253,6 @@ template void extract_diagonal(std::shared_ptr exec, const matrix::Fbcsr *orig, matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto nnz = orig->get_num_stored_elements(); -// const auto diag_size = diag->get_size()[0]; -// const auto num_blocks = -// ceildiv(config::warp_size * diag_size, default_block_size); -// -// const auto orig_values = orig->get_const_values(); -// const auto orig_row_ptrs = orig->get_const_row_ptrs(); -// const auto orig_col_idxs = orig->get_const_col_idxs(); -// auto diag_values = diag->get_values(); -// -// kernel::extract_diagonal<<>>( -// diag_size, nnz, as_cuda_type(orig_values), -// as_cuda_type(orig_row_ptrs), as_cuda_type(orig_col_idxs), -// as_cuda_type(diag_values)); -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); diff --git a/cuda/test/matrix/CMakeLists.txt b/cuda/test/matrix/CMakeLists.txt index 9d40716bea0..65ce218ac71 100644 --- a/cuda/test/matrix/CMakeLists.txt +++ b/cuda/test/matrix/CMakeLists.txt @@ -3,6 +3,5 @@ ginkgo_create_test(csr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) -#ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(hybrid_kernels) ginkgo_create_test(sellp_kernels) diff --git a/cuda/test/matrix/fbcsr_kernels.cpp b/cuda/test/matrix/fbcsr_kernels.cpp deleted file mode 100644 index 72bc9c149f4..00000000000 --- a/cuda/test/matrix/fbcsr_kernels.cpp +++ /dev/null @@ -1,883 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -#include - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include "core/matrix/fbcsr_kernels.hpp" -#include "cuda/test/utils.hpp" - - -namespace { - - -class Fbcsr : public ::testing::Test { -protected: - using Vec = gko::matrix::Dense<>; - using Mtx = gko::matrix::Fbcsr<>; - using ComplexVec = gko::matrix::Dense>; - using ComplexMtx = gko::matrix::Fbcsr>; - - Fbcsr() : mtx_size(532, 231), rand_engine(42) {} - - void SetUp() - { - ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); - ref = gko::ReferenceExecutor::create(); - cuda = gko::CudaExecutor::create(0, ref); - } - - void TearDown() - { - if (cuda != nullptr) { - ASSERT_NO_THROW(cuda->synchronize()); - } - } - - template - std::unique_ptr gen_mtx(int num_rows, int num_cols, - int min_nnz_row) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(min_nnz_row, num_cols), - std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); - } - - void set_up_apply_data(std::shared_ptr strategy, - int num_vectors = 1) - { - mtx = Mtx::create(ref, strategy); - mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 1)); - square_mtx = Mtx::create(ref, strategy); - square_mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[0], 1)); - expected = gen_mtx(mtx_size[0], num_vectors, 1); - y = gen_mtx(mtx_size[1], num_vectors, 1); - alpha = gko::initialize({2.0}, ref); - beta = gko::initialize({-1.0}, ref); - dmtx = Mtx::create(cuda, strategy); - dmtx->copy_from(mtx.get()); - square_dmtx = Mtx::create(cuda, strategy); - square_dmtx->copy_from(square_mtx.get()); - dresult = Vec::create(cuda); - dresult->copy_from(expected.get()); - dy = Vec::create(cuda); - dy->copy_from(y.get()); - dalpha = Vec::create(cuda); - dalpha->copy_from(alpha.get()); - dbeta = Vec::create(cuda); - dbeta->copy_from(beta.get()); - } - - void set_up_apply_complex_data( - std::shared_ptr strategy) - { - complex_mtx = ComplexMtx::create(ref, strategy); - complex_mtx->copy_from( - gen_mtx(mtx_size[0], mtx_size[1], 1)); - complex_dmtx = ComplexMtx::create(cuda, strategy); - complex_dmtx->copy_from(complex_mtx.get()); - } - - struct matrix_pair { - std::unique_ptr ref; - std::unique_ptr cuda; - }; - - matrix_pair gen_unsorted_mtx() - { - constexpr int min_nnz_per_row = 2; // Must be at least 2 - auto local_mtx_ref = - gen_mtx(mtx_size[0], mtx_size[1], min_nnz_per_row); - for (size_t row = 0; row < mtx_size[0]; ++row) { - const auto row_ptrs = local_mtx_ref->get_const_row_ptrs(); - const auto start_row = row_ptrs[row]; - auto col_idx = local_mtx_ref->get_col_idxs() + start_row; - auto vals = local_mtx_ref->get_values() + start_row; - const auto nnz_in_this_row = row_ptrs[row + 1] - row_ptrs[row]; - auto swap_idx_dist = - std::uniform_int_distribution<>(0, nnz_in_this_row - 1); - // shuffle `nnz_in_this_row / 2` times - for (size_t perm = 0; perm < nnz_in_this_row; perm += 2) { - const auto idx1 = swap_idx_dist(rand_engine); - const auto idx2 = swap_idx_dist(rand_engine); - std::swap(col_idx[idx1], col_idx[idx2]); - std::swap(vals[idx1], vals[idx2]); - } - } - auto local_mtx_cuda = Mtx::create(cuda); - local_mtx_cuda->copy_from(local_mtx_ref.get()); - - return {std::move(local_mtx_ref), std::move(local_mtx_cuda)}; - } - - std::shared_ptr ref; - std::shared_ptr cuda; - - const gko::dim<2> mtx_size; - std::ranlux48 rand_engine; - - std::unique_ptr mtx; - std::unique_ptr complex_mtx; - std::unique_ptr square_mtx; - std::unique_ptr expected; - std::unique_ptr y; - std::unique_ptr alpha; - std::unique_ptr beta; - - std::unique_ptr dmtx; - std::unique_ptr complex_dmtx; - std::unique_ptr square_dmtx; - std::unique_ptr dresult; - std::unique_ptr dy; - std::unique_ptr dalpha; - std::unique_ptr dbeta; -}; - - -TEST_F(Fbcsr, StrategyAfterCopyIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(cuda)); -// -// ASSERT_EQ(mtx->get_strategy()->get_name(), -// dmtx->get_strategy()->get_name()); -//} - - -TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithLoadBalance) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(cuda)); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithLoadBalance) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(cuda)); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithCusparse) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithCusparse) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithMergePath) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithMergePath) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithClassical) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithClassical) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithAutomatical) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(cuda)); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRefWithLoadBalance) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(cuda), 3); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithLoadBalance) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(cuda), 3); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRefWithClassical) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(), 3); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithClassical) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(), 3); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRefWithMergePath) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(), 3); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithMergePath) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(), 3); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyToFbcsrMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto trans = mtx->transpose(); -// auto d_trans = dmtx->transpose(); -// -// mtx->apply(alpha.get(), trans.get(), beta.get(), square_mtx.get()); -// dmtx->apply(dalpha.get(), d_trans.get(), dbeta.get(), square_dmtx.get()); -// -// GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, 1e-14); -// GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); -// ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, SimpleApplyToFbcsrMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto trans = mtx->transpose(); -// auto d_trans = dmtx->transpose(); -// -// mtx->apply(trans.get(), square_mtx.get()); -// dmtx->apply(d_trans.get(), square_dmtx.get()); -// -// GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, 1e-14); -// GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); -// ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, AdvancedApplyToIdentityMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto a = gen_mtx(mtx_size[0], mtx_size[1], 0); -// auto b = gen_mtx(mtx_size[0], mtx_size[1], 0); -// auto da = Mtx::create(cuda); -// auto db = Mtx::create(cuda); -// da->copy_from(a.get()); -// db->copy_from(b.get()); -// auto id = gko::matrix::Identity::create(ref, -// mtx_size[1]); auto did = -// gko::matrix::Identity::create(cuda, mtx_size[1]); -// -// a->apply(alpha.get(), id.get(), beta.get(), b.get()); -// da->apply(dalpha.get(), did.get(), dbeta.get(), db.get()); -// -// GKO_ASSERT_MTX_NEAR(b, db, 1e-14); -// GKO_ASSERT_MTX_EQ_SPARSITY(b, db); -// ASSERT_TRUE(db->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, TransposeIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(cuda)); -// -// auto trans = mtx->transpose(); -// auto d_trans = dmtx->transpose(); -// -// GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), -// static_cast(trans.get()), 0.0); -// ASSERT_TRUE(static_cast(d_trans.get())->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, ConjugateTransposeIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_complex_data(std::make_shared(cuda)); -// -// auto trans = complex_mtx->conj_transpose(); -// auto d_trans = complex_dmtx->conj_transpose(); -// -// GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), -// static_cast(trans.get()), 0.0); -// ASSERT_TRUE( -// static_cast(d_trans.get())->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, ConvertToDenseIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto dense_mtx = gko::matrix::Dense<>::create(ref); -// auto ddense_mtx = gko::matrix::Dense<>::create(cuda); -// -// mtx->convert_to(dense_mtx.get()); -// dmtx->convert_to(ddense_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToDenseIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto dense_mtx = gko::matrix::Dense<>::create(ref); -// auto ddense_mtx = gko::matrix::Dense<>::create(cuda); -// -// mtx->move_to(dense_mtx.get()); -// dmtx->move_to(ddense_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, ConvertToEllIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto ell_mtx = gko::matrix::Ell<>::create(ref); -// auto dell_mtx = gko::matrix::Ell<>::create(cuda); -// -// mtx->convert_to(ell_mtx.get()); -// dmtx->convert_to(dell_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(ell_mtx.get(), dell_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToEllIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto ell_mtx = gko::matrix::Ell<>::create(ref); -// auto dell_mtx = gko::matrix::Ell<>::create(cuda); -// -// mtx->move_to(ell_mtx.get()); -// dmtx->move_to(dell_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(ell_mtx.get(), dell_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, ConvertToSparsityFbcsrIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(ref); -// auto d_sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(cuda); -// -// mtx->convert_to(sparsity_mtx.get()); -// dmtx->convert_to(d_sparsity_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(sparsity_mtx.get(), d_sparsity_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToSparsityFbcsrIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(ref); -// auto d_sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(cuda); -// -// mtx->move_to(sparsity_mtx.get()); -// dmtx->move_to(d_sparsity_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(sparsity_mtx.get(), d_sparsity_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, CalculateMaxNnzPerRowIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// gko::size_type max_nnz_per_row; -// gko::size_type dmax_nnz_per_row; -// -// gko::kernels::reference::fbcsr::calculate_max_nnz_per_row(ref, mtx.get(), -// &max_nnz_per_row); -// gko::kernels::cuda::fbcsr::calculate_max_nnz_per_row(cuda, dmtx.get(), -// &dmax_nnz_per_row); -// -// ASSERT_EQ(max_nnz_per_row, dmax_nnz_per_row); -//} - - -TEST_F(Fbcsr, ConvertToCooIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto coo_mtx = gko::matrix::Coo<>::create(ref); -// auto dcoo_mtx = gko::matrix::Coo<>::create(cuda); -// -// mtx->convert_to(coo_mtx.get()); -// dmtx->convert_to(dcoo_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToCooIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto coo_mtx = gko::matrix::Coo<>::create(ref); -// auto dcoo_mtx = gko::matrix::Coo<>::create(cuda); -// -// mtx->move_to(coo_mtx.get()); -// dmtx->move_to(dcoo_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, ConvertToSellpIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto sellp_mtx = gko::matrix::Sellp<>::create(ref); -// auto dsellp_mtx = gko::matrix::Sellp<>::create(cuda); -// -// mtx->convert_to(sellp_mtx.get()); -// dmtx->convert_to(dsellp_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(sellp_mtx.get(), dsellp_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToSellpIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto sellp_mtx = gko::matrix::Sellp<>::create(ref); -// auto dsellp_mtx = gko::matrix::Sellp<>::create(cuda); -// -// mtx->move_to(sellp_mtx.get()); -// dmtx->move_to(dsellp_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(sellp_mtx.get(), dsellp_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, ConvertsEmptyToSellp) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto dempty_mtx = Mtx::create(cuda); -// auto dsellp_mtx = gko::matrix::Sellp<>::create(cuda); -// -// dempty_mtx->convert_to(dsellp_mtx.get()); -// -// ASSERT_EQ(cuda->copy_val_to_host(dsellp_mtx->get_const_slice_sets()), 0); -// ASSERT_FALSE(dsellp_mtx->get_size()); -//} - - -TEST_F(Fbcsr, CalculateTotalColsIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// gko::size_type total_cols; -// gko::size_type dtotal_cols; -// -// gko::kernels::reference::fbcsr::calculate_total_cols( -// ref, mtx.get(), &total_cols, 2, gko::matrix::default_slice_size); -// gko::kernels::cuda::fbcsr::calculate_total_cols( -// cuda, dmtx.get(), &dtotal_cols, 2, gko::matrix::default_slice_size); -// -// ASSERT_EQ(total_cols, dtotal_cols); -//} - - -TEST_F(Fbcsr, CalculatesNonzerosPerRow) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// gko::Array row_nnz(ref, mtx->get_size()[0]); -// gko::Array drow_nnz(cuda, dmtx->get_size()[0]); -// -// gko::kernels::reference::fbcsr::calculate_nonzeros_per_row(ref, mtx.get(), -// &row_nnz); -// gko::kernels::cuda::fbcsr::calculate_nonzeros_per_row(cuda, dmtx.get(), -// &drow_nnz); -// -// GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); -//} - - -TEST_F(Fbcsr, ConvertToHybridIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Hybrid_type = gko::matrix::Hybrid<>; -// set_up_apply_data(std::make_shared()); -// auto hybrid_mtx = Hybrid_type::create( -// ref, std::make_shared(2)); -// auto dhybrid_mtx = Hybrid_type::create( -// cuda, std::make_shared(2)); -// -// mtx->convert_to(hybrid_mtx.get()); -// dmtx->convert_to(dhybrid_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToHybridIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Hybrid_type = gko::matrix::Hybrid<>; -// set_up_apply_data(std::make_shared()); -// auto hybrid_mtx = Hybrid_type::create( -// ref, std::make_shared(2)); -// auto dhybrid_mtx = Hybrid_type::create( -// cuda, std::make_shared(2)); -// -// mtx->move_to(hybrid_mtx.get()); -// dmtx->move_to(dhybrid_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, RecognizeSortedMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// bool is_sorted_cuda{}; -// bool is_sorted_ref{}; -// -// is_sorted_ref = mtx->is_sorted_by_column_index(); -// is_sorted_cuda = dmtx->is_sorted_by_column_index(); -// -// ASSERT_EQ(is_sorted_ref, is_sorted_cuda); -//} - - -TEST_F(Fbcsr, RecognizeUnsortedMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto uns_mtx = gen_unsorted_mtx(); -// bool is_sorted_cuda{}; -// bool is_sorted_ref{}; -// -// is_sorted_ref = uns_mtx.ref->is_sorted_by_column_index(); -// is_sorted_cuda = uns_mtx.cuda->is_sorted_by_column_index(); -// -// ASSERT_EQ(is_sorted_ref, is_sorted_cuda); -//} - - -TEST_F(Fbcsr, SortSortedMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// mtx->sort_by_column_index(); -// dmtx->sort_by_column_index(); -// -// // Values must be unchanged, therefore, tolerance is `0` -// GKO_ASSERT_MTX_NEAR(mtx, dmtx, 0); -//} - - -TEST_F(Fbcsr, SortUnsortedMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto uns_mtx = gen_unsorted_mtx(); -// -// uns_mtx.ref->sort_by_column_index(); -// uns_mtx.cuda->sort_by_column_index(); -// -// // Values must be unchanged, therefore, tolerance is `0` -// GKO_ASSERT_MTX_NEAR(uns_mtx.ref, uns_mtx.cuda, 0); -//} - - -TEST_F(Fbcsr, OneAutomaticalWorksWithDifferentMatrices) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto automatical = std::make_shared(); -// auto row_len_limit = std::max(automatical->nvidia_row_len_limit, -// automatical->amd_row_len_limit); -// auto load_balance_mtx = Mtx::create(ref); -// auto classical_mtx = Mtx::create(ref); -// load_balance_mtx->copy_from( -// gen_mtx(1, row_len_limit + 1000, row_len_limit + 1)); -// classical_mtx->copy_from(gen_mtx(50, 50, 1)); -// auto load_balance_mtx_d = Mtx::create(cuda); -// auto classical_mtx_d = Mtx::create(cuda); -// load_balance_mtx_d->copy_from(load_balance_mtx.get()); -// classical_mtx_d->copy_from(classical_mtx.get()); -// -// load_balance_mtx_d->set_strategy(automatical); -// classical_mtx_d->set_strategy(automatical); -// -// EXPECT_EQ("load_balance", load_balance_mtx_d->get_strategy()->get_name()); -// EXPECT_EQ("classical", classical_mtx_d->get_strategy()->get_name()); -// ASSERT_NE(load_balance_mtx_d->get_strategy().get(), -// classical_mtx_d->get_strategy().get()); -//} - - -TEST_F(Fbcsr, ExtractDiagonalIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// auto diag = mtx->extract_diagonal(); -// auto ddiag = dmtx->extract_diagonal(); -// -// GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); -//} - - -TEST_F(Fbcsr, InplaceAbsoluteMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(cuda)); -// -// mtx->compute_absolute_inplace(); -// dmtx->compute_absolute_inplace(); -// -// GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); -//} - - -TEST_F(Fbcsr, OutplaceAbsoluteMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(cuda)); -// -// auto abs_mtx = mtx->compute_absolute(); -// auto dabs_mtx = dmtx->compute_absolute(); -// -// GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); -//} - - -TEST_F(Fbcsr, InplaceAbsoluteComplexMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_complex_data(std::make_shared(cuda)); -// -// complex_mtx->compute_absolute_inplace(); -// complex_dmtx->compute_absolute_inplace(); -// -// GKO_ASSERT_MTX_NEAR(complex_mtx, complex_dmtx, 1e-14); -//} - - -TEST_F(Fbcsr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_complex_data(std::make_shared(cuda)); -// -// auto abs_mtx = complex_mtx->compute_absolute(); -// auto dabs_mtx = complex_dmtx->compute_absolute(); -// -// GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); -//} - - -} // namespace diff --git a/dpcpp/matrix/fbcsr_kernels.dp.cpp b/dpcpp/matrix/fbcsr_kernels.dp.cpp index 749ef7c3d2d..17c44830b9a 100644 --- a/dpcpp/matrix/fbcsr_kernels.dp.cpp +++ b/dpcpp/matrix/fbcsr_kernels.dp.cpp @@ -89,82 +89,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); -template -void spgemm_insert_row(unordered_set &cols, - const matrix::Fbcsr *c, - size_type row) GKO_NOT_IMPLEMENTED; - - -template -void spgemm_insert_row2(unordered_set &cols, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - size_type row) GKO_NOT_IMPLEMENTED; - - -template -void spgemm_accumulate_row(map &cols, - const matrix::Fbcsr *c, - ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; - - -template -void spgemm_accumulate_row2(map &cols, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; - - -template -void spgemm(std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEMM_KERNEL); - - -template -void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - const matrix::Dense *beta, - const matrix::Fbcsr *d, - matrix::Fbcsr *c) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL); - - -template -void spgeam(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Dense *beta, - const matrix::Fbcsr *b, - matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEAM_KERNEL); - - template void convert_row_ptrs_to_idxs(std::shared_ptr exec, const IndexType *ptrs, size_type num_rows, IndexType *idxs) GKO_NOT_IMPLEMENTED; -template -void convert_to_coo(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Coo *result) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL); - - template void convert_to_dense(std::shared_ptr exec, const matrix::Fbcsr *source, @@ -184,26 +114,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); -template -void convert_to_sellp(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Sellp *result) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL); - - -template -void convert_to_ell(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Ell *result) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL); - - template inline void convert_fbcsr_to_csc(size_type num_rows, const IndexType *row_ptrs, const IndexType *col_idxs, @@ -239,16 +149,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); -template -void calculate_total_cols(std::shared_ptr exec, - const matrix::Fbcsr *source, - size_type *result, size_type stride_factor, - size_type slice_size) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL); - - template void calculate_max_nnz_per_row( std::shared_ptr exec, @@ -259,54 +159,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); -template -void convert_to_hybrid(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Hybrid *result) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL); - - -// template -// void invert_permutation(std::shared_ptr exec, -// size_type size, const IndexType *permutation_indices, -// IndexType *inv_permutation) GKO_NOT_IMPLEMENTED; - -// GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); - - -template -void row_permute( - std::shared_ptr exec, const IndexType *perm, - const matrix::Fbcsr *orig, - matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); - - -template -void inverse_row_permute( - std::shared_ptr exec, const IndexType *perm, - const matrix::Fbcsr *orig, - matrix::Fbcsr *row_permuted) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL); - - -template -void inverse_column_permute( - std::shared_ptr exec, const IndexType *perm, - const matrix::Fbcsr *orig, - matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL); - - template void calculate_nonzeros_per_row( std::shared_ptr exec, diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp index e6dc61946cb..891e644b25c 100644 --- a/hip/matrix/fbcsr_kernels.hip.cpp +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -42,12 +42,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include #include -#include -#include -#include #include "core/components/fill_array.hpp" @@ -97,274 +93,12 @@ using compiled_kernels = syn::value_list; using classical_kernels = syn::value_list; -using spgeam_kernels = - syn::value_list; - - -namespace host_kernel { - - -template -void merge_path_spmv( - syn::value_list, - std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Dense *b, matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const IndexType total = a->get_size()[0] + a->get_num_stored_elements(); -// const IndexType grid_num = -// ceildiv(total, spmv_block_size * items_per_thread); -// const dim3 grid(grid_num); -// const dim3 block(spmv_block_size); -// Array row_out(exec, grid_num); -// Array val_out(exec, grid_num); -// -// for (IndexType column_id = 0; column_id < b->get_size()[1]; column_id++) { -// if (alpha == nullptr && beta == nullptr) { -// const auto b_vals = b->get_const_values() + column_id; -// auto c_vals = c->get_values() + column_id; -// hipLaunchKernelGGL( -// HIP_KERNEL_NAME( -// kernel::abstract_merge_path_spmv), -// dim3(grid), dim3(block), 0, 0, -// static_cast(a->get_size()[0]), -// as_hip_type(a->get_const_values()), a->get_const_col_idxs(), -// as_hip_type(a->get_const_row_ptrs()), -// as_hip_type(a->get_const_srow()), as_hip_type(b_vals), -// b->get_stride(), as_hip_type(c_vals), c->get_stride(), -// as_hip_type(row_out.get_data()), -// as_hip_type(val_out.get_data())); -// hipLaunchKernelGGL(kernel::abstract_reduce, dim3(1), -// dim3(spmv_block_size), 0, 0, grid_num, -// as_hip_type(val_out.get_data()), -// as_hip_type(row_out.get_data()), -// as_hip_type(c_vals), c->get_stride()); -// -// } else if (alpha != nullptr && beta != nullptr) { -// const auto b_vals = b->get_const_values() + column_id; -// auto c_vals = c->get_values() + column_id; -// hipLaunchKernelGGL( -// HIP_KERNEL_NAME( -// kernel::abstract_merge_path_spmv), -// dim3(grid), dim3(block), 0, 0, -// static_cast(a->get_size()[0]), -// as_hip_type(alpha->get_const_values()), -// as_hip_type(a->get_const_values()), a->get_const_col_idxs(), -// as_hip_type(a->get_const_row_ptrs()), -// as_hip_type(a->get_const_srow()), as_hip_type(b_vals), -// b->get_stride(), as_hip_type(beta->get_const_values()), -// as_hip_type(c_vals), c->get_stride(), -// as_hip_type(row_out.get_data()), -// as_hip_type(val_out.get_data())); -// hipLaunchKernelGGL(kernel::abstract_reduce, dim3(1), -// dim3(spmv_block_size), 0, 0, grid_num, -// as_hip_type(val_out.get_data()), -// as_hip_type(row_out.get_data()), -// as_hip_type(alpha->get_const_values()), -// as_hip_type(c_vals), c->get_stride()); -// } else { -// GKO_KERNEL_NOT_FOUND; -// } -// } -//} - -GKO_ENABLE_IMPLEMENTATION_SELECTION(select_merge_path_spmv, merge_path_spmv); - - -template -int compute_items_per_thread(std::shared_ptr exec) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -//#if GINKGO_HIP_PLATFORM_NVCC -// -// -// const int version = -// (exec->get_major_version() << 4) + exec->get_minor_version(); -// // The num_item is decided to make the occupancy 100% -// // TODO: Extend this list when new GPU is released -// // Tune this parameter -// // 128 threads/block the number of items per threads -// // 3.0 3.5: 6 -// // 3.7: 14 -// // 5.0, 5.3, 6.0, 6.2: 8 -// // 5.2, 6.1, 7.0: 12 -// int num_item = 6; -// switch (version) { -// case 0x50: -// case 0x53: -// case 0x60: -// case 0x62: -// num_item = 8; -// break; -// case 0x52: -// case 0x61: -// case 0x70: -// num_item = 12; -// break; -// case 0x37: -// num_item = 14; -// } -// -// -//#else -// -// -// // HIP uses the minimal num_item to make the code work correctly. -// // TODO: this parameter should be tuned. -// int num_item = 6; -// -// -//#endif // GINKGO_HIP_PLATFORM_NVCC -// -// -// // Ensure that the following is satisfied: -// // sizeof(IndexType) + sizeof(ValueType) -// // <= items_per_thread * sizeof(IndexType) -// constexpr int minimal_num = -// ceildiv(sizeof(IndexType) + sizeof(ValueType), sizeof(IndexType)); -// int items_per_thread = num_item * 4 / sizeof(IndexType); -// return std::max(minimal_num, items_per_thread); -//} - - -template -void classical_spmv( - syn::value_list, std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Dense *b, matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto nwarps = exec->get_num_warps_per_sm() * -// exec->get_num_multiprocessor() * classical_overweight; -// const auto gridx = -// std::min(ceildiv(a->get_size()[0], spmv_block_size / subwarp_size), -// int64(nwarps / warps_in_block)); -// const dim3 grid(gridx, b->get_size()[1]); -// const dim3 block(spmv_block_size); -// -// if (alpha == nullptr && beta == nullptr) { -// hipLaunchKernelGGL( -// HIP_KERNEL_NAME(kernel::abstract_classical_spmv), -// dim3(grid), dim3(block), 0, 0, a->get_size()[0], -// as_hip_type(a->get_const_values()), a->get_const_col_idxs(), -// as_hip_type(a->get_const_row_ptrs()), -// as_hip_type(b->get_const_values()), b->get_stride(), -// as_hip_type(c->get_values()), c->get_stride()); -// -// } else if (alpha != nullptr && beta != nullptr) { -// hipLaunchKernelGGL( -// HIP_KERNEL_NAME(kernel::abstract_classical_spmv), -// dim3(grid), dim3(block), 0, 0, a->get_size()[0], -// as_hip_type(alpha->get_const_values()), -// as_hip_type(a->get_const_values()), a->get_const_col_idxs(), -// as_hip_type(a->get_const_row_ptrs()), -// as_hip_type(b->get_const_values()), b->get_stride(), -// as_hip_type(beta->get_const_values()), -// as_hip_type(c->get_values()), c->get_stride()); -// } else { -// GKO_KERNEL_NOT_FOUND; -// } -//} - -GKO_ENABLE_IMPLEMENTATION_SELECTION(select_classical_spmv, classical_spmv); - - -} // namespace host_kernel - template void spmv(std::shared_ptr exec, const matrix::Fbcsr *a, const matrix::Dense *b, matrix::Dense *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// if (a->get_strategy()->get_name() == "load_balance") { -// components::fill_array(exec, c->get_values(), -// c->get_num_stored_elements(), -// zero()); -// const IndexType nwarps = a->get_num_srow_elements(); -// if (nwarps > 0) { -// const dim3 fbcsr_block(config::warp_size, warps_in_block, 1); -// const dim3 fbcsr_grid(ceildiv(nwarps, warps_in_block), -// b->get_size()[1]); -// hipLaunchKernelGGL( -// kernel::abstract_spmv, dim3(fbcsr_grid), dim3(fbcsr_block), 0, -// 0, nwarps, static_cast(a->get_size()[0]), -// as_hip_type(a->get_const_values()), a->get_const_col_idxs(), -// as_hip_type(a->get_const_row_ptrs()), -// as_hip_type(a->get_const_srow()), -// as_hip_type(b->get_const_values()), -// as_hip_type(b->get_stride()), as_hip_type(c->get_values()), -// as_hip_type(c->get_stride())); -// } else { -// GKO_NOT_SUPPORTED(nwarps); -// } -// } else if (a->get_strategy()->get_name() == "merge_path") { -// int items_per_thread = -// host_kernel::compute_items_per_thread(exec); -// host_kernel::select_merge_path_spmv( -// compiled_kernels(), -// [&items_per_thread](int compiled_info) { -// return items_per_thread == compiled_info; -// }, -// syn::value_list(), syn::type_list<>(), exec, a, b, c); -// } else if (a->get_strategy()->get_name() == "classical") { -// IndexType max_length_per_row = 0; -// using Tfbcsr = matrix::Fbcsr; -// if (auto strategy = -// std::dynamic_pointer_cast( -// a->get_strategy())) { -// max_length_per_row = strategy->get_max_length_per_row(); -// } else if (auto strategy = std::dynamic_pointer_cast< -// const typename Tfbcsr::automatical>(a->get_strategy())) -// { -// max_length_per_row = strategy->get_max_length_per_row(); -// } else { -// GKO_NOT_SUPPORTED(a->get_strategy()); -// } -// host_kernel::select_classical_spmv( -// classical_kernels(), -// [&max_length_per_row](int compiled_info) { -// return max_length_per_row >= compiled_info; -// }, -// syn::value_list(), syn::type_list<>(), exec, a, b, c); -// } else if (a->get_strategy()->get_name() == "sparselib" || -// a->get_strategy()->get_name() == "cusparse") { -// if (hipsparse::is_supported::value) { -// // TODO: add implementation for int64 and multiple RHS -// auto handle = exec->get_hipsparse_handle(); -// auto descr = hipsparse::create_mat_descr(); -// { -// hipsparse::pointer_mode_guard pm_guard(handle); -// auto row_ptrs = a->get_const_row_ptrs(); -// auto col_idxs = a->get_const_col_idxs(); -// auto alpha = one(); -// auto beta = zero(); -// if (b->get_stride() != 1 || c->get_stride() != 1) { -// GKO_NOT_IMPLEMENTED; -// } -// hipsparse::spmv(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, -// a->get_size()[0], a->get_size()[1], -// a->get_num_stored_elements(), &alpha, descr, -// a->get_const_values(), row_ptrs, col_idxs, -// b->get_const_values(), &beta, -// c->get_values()); -// } -// hipsparse::destroy(descr); -// } else { -// GKO_NOT_IMPLEMENTED; -// } -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); @@ -376,403 +110,15 @@ void advanced_spmv(std::shared_ptr exec, const matrix::Dense *b, const matrix::Dense *beta, matrix::Dense *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// if (a->get_strategy()->get_name() == "load_balance") { -// dense::scale(exec, beta, c); -// -// const IndexType nwarps = a->get_num_srow_elements(); -// -// if (nwarps > 0) { -// const dim3 fbcsr_block(config::warp_size, warps_in_block, 1); -// const dim3 fbcsr_grid(ceildiv(nwarps, warps_in_block), -// b->get_size()[1]); -// hipLaunchKernelGGL( -// kernel::abstract_spmv, dim3(fbcsr_grid), dim3(fbcsr_block), 0, -// 0, nwarps, static_cast(a->get_size()[0]), -// as_hip_type(alpha->get_const_values()), -// as_hip_type(a->get_const_values()), a->get_const_col_idxs(), -// as_hip_type(a->get_const_row_ptrs()), -// as_hip_type(a->get_const_srow()), -// as_hip_type(b->get_const_values()), -// as_hip_type(b->get_stride()), as_hip_type(c->get_values()), -// as_hip_type(c->get_stride())); -// } else { -// GKO_NOT_SUPPORTED(nwarps); -// } -// } else if (a->get_strategy()->get_name() == "sparselib" || -// a->get_strategy()->get_name() == "cusparse") { -// if (hipsparse::is_supported::value) { -// // TODO: add implementation for int64 and multiple RHS -// auto descr = hipsparse::create_mat_descr(); -// -// auto row_ptrs = a->get_const_row_ptrs(); -// auto col_idxs = a->get_const_col_idxs(); -// -// if (b->get_stride() != 1 || c->get_stride() != 1) -// GKO_NOT_IMPLEMENTED; -// -// hipsparse::spmv(exec->get_hipsparse_handle(), -// HIPSPARSE_OPERATION_NON_TRANSPOSE, -// a->get_size()[0], a->get_size()[1], -// a->get_num_stored_elements(), -// alpha->get_const_values(), descr, -// a->get_const_values(), row_ptrs, col_idxs, -// b->get_const_values(), beta->get_const_values(), -// c->get_values()); -// -// hipsparse::destroy(descr); -// } else { -// GKO_NOT_IMPLEMENTED; -// } -// } else if (a->get_strategy()->get_name() == "classical") { -// IndexType max_length_per_row = 0; -// using Tfbcsr = matrix::Fbcsr; -// if (auto strategy = -// std::dynamic_pointer_cast( -// a->get_strategy())) { -// max_length_per_row = strategy->get_max_length_per_row(); -// } else if (auto strategy = std::dynamic_pointer_cast< -// const typename Tfbcsr::automatical>(a->get_strategy())) -// { -// max_length_per_row = strategy->get_max_length_per_row(); -// } else { -// GKO_NOT_SUPPORTED(a->get_strategy()); -// } -// host_kernel::select_classical_spmv( -// classical_kernels(), -// [&max_length_per_row](int compiled_info) { -// return max_length_per_row >= compiled_info; -// }, -// syn::value_list(), syn::type_list<>(), exec, a, b, c, alpha, -// beta); -// } else if (a->get_strategy()->get_name() == "merge_path") { -// int items_per_thread = -// host_kernel::compute_items_per_thread(exec); -// host_kernel::select_merge_path_spmv( -// compiled_kernels(), -// [&items_per_thread](int compiled_info) { -// return items_per_thread == compiled_info; -// }, -// syn::value_list(), syn::type_list<>(), exec, a, b, c, alpha, -// beta); -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); -template -void spgemm(std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// if (hipsparse::is_supported::value) { -// auto handle = exec->get_hipsparse_handle(); -// hipsparse::pointer_mode_guard pm_guard(handle); -// auto a_descr = hipsparse::create_mat_descr(); -// auto b_descr = hipsparse::create_mat_descr(); -// auto c_descr = hipsparse::create_mat_descr(); -// auto d_descr = hipsparse::create_mat_descr(); -// auto info = hipsparse::create_spgemm_info(); -// -// auto alpha = one(); -// auto a_nnz = static_cast(a->get_num_stored_elements()); -// auto a_vals = a->get_const_values(); -// auto a_row_ptrs = a->get_const_row_ptrs(); -// auto a_col_idxs = a->get_const_col_idxs(); -// auto b_nnz = static_cast(b->get_num_stored_elements()); -// auto b_vals = b->get_const_values(); -// auto b_row_ptrs = b->get_const_row_ptrs(); -// auto b_col_idxs = b->get_const_col_idxs(); -// auto null_value = static_cast(nullptr); -// auto null_index = static_cast(nullptr); -// auto zero_nnz = IndexType{}; -// auto m = static_cast(a->get_size()[0]); -// auto n = static_cast(b->get_size()[1]); -// auto k = static_cast(a->get_size()[1]); -// auto c_row_ptrs = c->get_row_ptrs(); -// matrix::FbcsrBuilder c_builder{c}; -// auto &c_col_idxs_array = c_builder.get_col_idx_array(); -// auto &c_vals_array = c_builder.get_value_array(); -// -// // allocate buffer -// size_type buffer_size{}; -// hipsparse::spgemm_buffer_size( -// handle, m, n, k, &alpha, a_descr, a_nnz, a_row_ptrs, a_col_idxs, -// b_descr, b_nnz, b_row_ptrs, b_col_idxs, null_value, d_descr, -// zero_nnz, null_index, null_index, info, buffer_size); -// Array buffer_array(exec, buffer_size); -// auto buffer = buffer_array.get_data(); -// -// // count nnz -// IndexType c_nnz{}; -// hipsparse::spgemm_nnz( -// handle, m, n, k, a_descr, a_nnz, a_row_ptrs, a_col_idxs, b_descr, -// b_nnz, b_row_ptrs, b_col_idxs, d_descr, zero_nnz, null_index, -// null_index, c_descr, c_row_ptrs, &c_nnz, info, buffer); -// -// // accumulate non-zeros -// c_col_idxs_array.resize_and_reset(c_nnz); -// c_vals_array.resize_and_reset(c_nnz); -// auto c_col_idxs = c_col_idxs_array.get_data(); -// auto c_vals = c_vals_array.get_data(); -// hipsparse::spgemm(handle, m, n, k, &alpha, a_descr, a_nnz, a_vals, -// a_row_ptrs, a_col_idxs, b_descr, b_nnz, b_vals, -// b_row_ptrs, b_col_idxs, null_value, d_descr, -// zero_nnz, null_value, null_index, null_index, -// c_descr, c_vals, c_row_ptrs, c_col_idxs, info, -// buffer); -// -// hipsparse::destroy_spgemm_info(info); -// hipsparse::destroy(d_descr); -// hipsparse::destroy(c_descr); -// hipsparse::destroy(b_descr); -// hipsparse::destroy(a_descr); -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEMM_KERNEL); - - -namespace { - - -template -void spgeam(syn::value_list, - std::shared_ptr exec, const ValueType *alpha, - const IndexType *a_row_ptrs, const IndexType *a_col_idxs, - const ValueType *a_vals, const ValueType *beta, - const IndexType *b_row_ptrs, const IndexType *b_col_idxs, - const ValueType *b_vals, - matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto m = static_cast(c->get_size()[0]); -// auto c_row_ptrs = c->get_row_ptrs(); -// // count nnz for alpha * A + beta * B -// auto subwarps_per_block = default_block_size / subwarp_size; -// auto num_blocks = ceildiv(m, subwarps_per_block); -// hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::spgeam_nnz), -// dim3(num_blocks), dim3(default_block_size), 0, 0, -// a_row_ptrs, a_col_idxs, b_row_ptrs, b_col_idxs, m, -// c_row_ptrs); -// -// // build row pointers -// components::prefix_sum(exec, c_row_ptrs, m + 1); -// -// // accumulate non-zeros for alpha * A + beta * B -// matrix::FbcsrBuilder c_builder{c}; -// auto c_nnz = exec->copy_val_to_host(c_row_ptrs + m); -// c_builder.get_col_idx_array().resize_and_reset(c_nnz); -// c_builder.get_value_array().resize_and_reset(c_nnz); -// auto c_col_idxs = c->get_col_idxs(); -// auto c_vals = c->get_values(); -// hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::spgeam), -// dim3(num_blocks), dim3(default_block_size), 0, 0, -// as_hip_type(alpha), a_row_ptrs, a_col_idxs, -// as_hip_type(a_vals), as_hip_type(beta), b_row_ptrs, -// b_col_idxs, as_hip_type(b_vals), m, c_row_ptrs, -// c_col_idxs, as_hip_type(c_vals)); -//} - -GKO_ENABLE_IMPLEMENTATION_SELECTION(select_spgeam, spgeam); - - -} // namespace - - -template -void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - const matrix::Dense *beta, - const matrix::Fbcsr *d, - matrix::Fbcsr *c) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// if (hipsparse::is_supported::value) { -// auto handle = exec->get_hipsparse_handle(); -// hipsparse::pointer_mode_guard pm_guard(handle); -// auto a_descr = hipsparse::create_mat_descr(); -// auto b_descr = hipsparse::create_mat_descr(); -// auto c_descr = hipsparse::create_mat_descr(); -// auto d_descr = hipsparse::create_mat_descr(); -// auto info = hipsparse::create_spgemm_info(); -// -// auto a_nnz = static_cast(a->get_num_stored_elements()); -// auto a_vals = a->get_const_values(); -// auto a_row_ptrs = a->get_const_row_ptrs(); -// auto a_col_idxs = a->get_const_col_idxs(); -// auto b_nnz = static_cast(b->get_num_stored_elements()); -// auto b_vals = b->get_const_values(); -// auto b_row_ptrs = b->get_const_row_ptrs(); -// auto b_col_idxs = b->get_const_col_idxs(); -// auto d_vals = d->get_const_values(); -// auto d_row_ptrs = d->get_const_row_ptrs(); -// auto d_col_idxs = d->get_const_col_idxs(); -// auto null_value = static_cast(nullptr); -// auto null_index = static_cast(nullptr); -// auto one_value = one(); -// auto m = static_cast(a->get_size()[0]); -// auto n = static_cast(b->get_size()[1]); -// auto k = static_cast(a->get_size()[1]); -// -// // allocate buffer -// size_type buffer_size{}; -// hipsparse::spgemm_buffer_size( -// handle, m, n, k, &one_value, a_descr, a_nnz, a_row_ptrs, -// a_col_idxs, b_descr, b_nnz, b_row_ptrs, b_col_idxs, null_value, -// d_descr, IndexType{}, null_index, null_index, info, buffer_size); -// Array buffer_array(exec, buffer_size); -// auto buffer = buffer_array.get_data(); -// -// // count nnz -// Array c_tmp_row_ptrs_array(exec, m + 1); -// auto c_tmp_row_ptrs = c_tmp_row_ptrs_array.get_data(); -// IndexType c_nnz{}; -// hipsparse::spgemm_nnz( -// handle, m, n, k, a_descr, a_nnz, a_row_ptrs, a_col_idxs, b_descr, -// b_nnz, b_row_ptrs, b_col_idxs, d_descr, IndexType{}, null_index, -// null_index, c_descr, c_tmp_row_ptrs, &c_nnz, info, buffer); -// -// // accumulate non-zeros for A * B -// Array c_tmp_col_idxs_array(exec, c_nnz); -// Array c_tmp_vals_array(exec, c_nnz); -// auto c_tmp_col_idxs = c_tmp_col_idxs_array.get_data(); -// auto c_tmp_vals = c_tmp_vals_array.get_data(); -// hipsparse::spgemm(handle, m, n, k, &one_value, a_descr, a_nnz, a_vals, -// a_row_ptrs, a_col_idxs, b_descr, b_nnz, b_vals, -// b_row_ptrs, b_col_idxs, null_value, d_descr, -// IndexType{}, null_value, null_index, null_index, -// c_descr, c_tmp_vals, c_tmp_row_ptrs, c_tmp_col_idxs, -// info, buffer); -// -// // destroy hipsparse context -// hipsparse::destroy_spgemm_info(info); -// hipsparse::destroy(d_descr); -// hipsparse::destroy(c_descr); -// hipsparse::destroy(b_descr); -// hipsparse::destroy(a_descr); -// -// auto total_nnz = c_nnz + d->get_num_stored_elements(); -// auto nnz_per_row = total_nnz / m; -// select_spgeam( -// spgeam_kernels(), -// [&](int compiled_subwarp_size) { -// return compiled_subwarp_size >= nnz_per_row || -// compiled_subwarp_size == config::warp_size; -// }, -// syn::value_list(), syn::type_list<>(), exec, -// alpha->get_const_values(), c_tmp_row_ptrs, c_tmp_col_idxs, -// c_tmp_vals, beta->get_const_values(), d_row_ptrs, d_col_idxs, -// d_vals, c); -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL); - - -template -void spgeam(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Dense *beta, - const matrix::Fbcsr *b, - matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto total_nnz = -// a->get_num_stored_elements() + b->get_num_stored_elements(); -// auto nnz_per_row = total_nnz / a->get_size()[0]; -// select_spgeam( -// spgeam_kernels(), -// [&](int compiled_subwarp_size) { -// return compiled_subwarp_size >= nnz_per_row || -// compiled_subwarp_size == config::warp_size; -// }, -// syn::value_list(), syn::type_list<>(), exec, -// alpha->get_const_values(), a->get_const_row_ptrs(), -// a->get_const_col_idxs(), a->get_const_values(), -// beta->get_const_values(), b->get_const_row_ptrs(), -// b->get_const_col_idxs(), b->get_const_values(), c); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEAM_KERNEL); - - -template -void convert_row_ptrs_to_idxs(std::shared_ptr exec, - const IndexType *ptrs, size_type num_rows, - IndexType *idxs) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto grid_dim = ceildiv(num_rows, default_block_size); -// -// hipLaunchKernelGGL(kernel::convert_row_ptrs_to_idxs, dim3(grid_dim), -// dim3(default_block_size), 0, 0, num_rows, -// as_hip_type(ptrs), as_hip_type(idxs)); -//} - - -template -void convert_to_coo(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Coo *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = result->get_size()[0]; -// -// auto row_idxs = result->get_row_idxs(); -// const auto source_row_ptrs = source->get_const_row_ptrs(); -// -// convert_row_ptrs_to_idxs(exec, source_row_ptrs, num_rows, row_idxs); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL); - - template void convert_to_dense(std::shared_ptr exec, const matrix::Fbcsr *source, matrix::Dense *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto num_rows = result->get_size()[0]; -// const auto num_cols = result->get_size()[1]; -// const auto stride = result->get_stride(); -// const auto row_ptrs = source->get_const_row_ptrs(); -// const auto col_idxs = source->get_const_col_idxs(); -// const auto vals = source->get_const_values(); -// -// const dim3 block_size(config::warp_size, -// config::max_block_size / config::warp_size, 1); -// const dim3 init_grid_dim(ceildiv(num_cols, block_size.x), -// ceildiv(num_rows, block_size.y), 1); -// hipLaunchKernelGGL(kernel::initialize_zero_dense, dim3(init_grid_dim), -// dim3(block_size), 0, 0, num_rows, num_cols, stride, -// as_hip_type(result->get_values())); -// -// auto grid_dim = ceildiv(num_rows, default_block_size); -// hipLaunchKernelGGL( -// kernel::fill_in_dense, dim3(grid_dim), dim3(default_block_size), 0, 0, -// num_rows, as_hip_type(row_ptrs), as_hip_type(col_idxs), -// as_hip_type(vals), stride, as_hip_type(result->get_values())); -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); @@ -788,190 +134,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); -template -void convert_to_sellp(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Sellp *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto num_rows = result->get_size()[0]; -// const auto num_cols = result->get_size()[1]; -// -// auto result_values = result->get_values(); -// auto result_col_idxs = result->get_col_idxs(); -// auto slice_lengths = result->get_slice_lengths(); -// auto slice_sets = result->get_slice_sets(); -// -// const auto slice_size = (result->get_slice_size() == 0) -// ? matrix::default_slice_size -// : result->get_slice_size(); -// const auto stride_factor = (result->get_stride_factor() == 0) -// ? matrix::default_stride_factor -// : result->get_stride_factor(); -// const int slice_num = ceildiv(num_rows, slice_size); -// -// const auto source_values = source->get_const_values(); -// const auto source_row_ptrs = source->get_const_row_ptrs(); -// const auto source_col_idxs = source->get_const_col_idxs(); -// -// auto nnz_per_row = Array(exec, num_rows); -// auto grid_dim = ceildiv(num_rows, default_block_size); -// -// if (grid_dim > 0) { -// hipLaunchKernelGGL(kernel::calculate_nnz_per_row, dim3(grid_dim), -// dim3(default_block_size), 0, 0, num_rows, -// as_hip_type(source_row_ptrs), -// as_hip_type(nnz_per_row.get_data())); -// } -// -// grid_dim = slice_num; -// -// if (grid_dim > 0) { -// hipLaunchKernelGGL(kernel::calculate_slice_lengths, dim3(grid_dim), -// dim3(config::warp_size), 0, 0, num_rows, -// slice_size, stride_factor, -// as_hip_type(nnz_per_row.get_const_data()), -// as_hip_type(slice_lengths), -// as_hip_type(slice_sets)); -// } -// -// components::prefix_sum(exec, slice_sets, slice_num + 1); -// -// grid_dim = ceildiv(num_rows, default_block_size); -// if (grid_dim > 0) { -// hipLaunchKernelGGL( -// kernel::fill_in_sellp, dim3(grid_dim), dim3(default_block_size), -// 0, 0, num_rows, slice_size, as_hip_type(source_values), -// as_hip_type(source_row_ptrs), as_hip_type(source_col_idxs), -// as_hip_type(slice_lengths), as_hip_type(slice_sets), -// as_hip_type(result_col_idxs), as_hip_type(result_values)); -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL); - - -template -void convert_to_ell(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Ell *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto source_values = source->get_const_values(); -// const auto source_row_ptrs = source->get_const_row_ptrs(); -// const auto source_col_idxs = source->get_const_col_idxs(); -// -// auto result_values = result->get_values(); -// auto result_col_idxs = result->get_col_idxs(); -// const auto stride = result->get_stride(); -// const auto max_nnz_per_row = result->get_num_stored_elements_per_row(); -// const auto num_rows = result->get_size()[0]; -// const auto num_cols = result->get_size()[1]; -// -// const auto init_grid_dim = -// ceildiv(max_nnz_per_row * num_rows, default_block_size); -// -// hipLaunchKernelGGL(kernel::initialize_zero_ell, dim3(init_grid_dim), -// dim3(default_block_size), 0, 0, max_nnz_per_row, -// stride, as_hip_type(result_values), -// as_hip_type(result_col_idxs)); -// -// const auto grid_dim = -// ceildiv(num_rows * config::warp_size, default_block_size); -// -// hipLaunchKernelGGL(kernel::fill_in_ell, dim3(grid_dim), -// dim3(default_block_size), 0, 0, num_rows, stride, -// as_hip_type(source_values), -// as_hip_type(source_row_ptrs), -// as_hip_type(source_col_idxs), -// as_hip_type(result_values), -// as_hip_type(result_col_idxs)); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL); - - -template -void calculate_total_cols(std::shared_ptr exec, - const matrix::Fbcsr *source, - size_type *result, size_type stride_factor, - size_type slice_size) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto num_rows = source->get_size()[0]; -// -// if (num_rows == 0) { -// *result = 0; -// return; -// } -// -// const auto slice_num = ceildiv(num_rows, slice_size); -// const auto row_ptrs = source->get_const_row_ptrs(); -// -// auto nnz_per_row = Array(exec, num_rows); -// auto grid_dim = ceildiv(num_rows, default_block_size); -// -// hipLaunchKernelGGL(kernel::calculate_nnz_per_row, dim3(grid_dim), -// dim3(default_block_size), 0, 0, num_rows, -// as_hip_type(row_ptrs), -// as_hip_type(nnz_per_row.get_data())); -// -// grid_dim = ceildiv(slice_num * config::warp_size, default_block_size); -// auto max_nnz_per_slice = Array(exec, slice_num); -// -// hipLaunchKernelGGL(kernel::reduce_max_nnz_per_slice, dim3(grid_dim), -// dim3(default_block_size), 0, 0, num_rows, slice_size, -// stride_factor, -// as_hip_type(nnz_per_row.get_const_data()), -// as_hip_type(max_nnz_per_slice.get_data())); -// -// grid_dim = ceildiv(slice_num, default_block_size); -// auto block_results = Array(exec, grid_dim); -// -// hipLaunchKernelGGL(kernel::reduce_total_cols, dim3(grid_dim), -// dim3(default_block_size), 0, 0, slice_num, -// as_hip_type(max_nnz_per_slice.get_const_data()), -// as_hip_type(block_results.get_data())); -// -// auto d_result = Array(exec, 1); -// -// hipLaunchKernelGGL(kernel::reduce_total_cols, dim3(1), -// dim3(default_block_size), 0, 0, grid_dim, -// as_hip_type(block_results.get_const_data()), -// as_hip_type(d_result.get_data())); -// -// *result = exec->copy_val_to_host(d_result.get_const_data()); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL); - - template void transpose(std::shared_ptr exec, const matrix::Fbcsr *orig, matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// if (hipsparse::is_supported::value) { -// hipsparseAction_t copyValues = HIPSPARSE_ACTION_NUMERIC; -// hipsparseIndexBase_t idxBase = HIPSPARSE_INDEX_BASE_ZERO; -// -// hipsparse::transpose( -// exec->get_hipsparse_handle(), orig->get_size()[0], -// orig->get_size()[1], orig->get_num_stored_elements(), -// orig->get_const_values(), orig->get_const_row_ptrs(), -// orig->get_const_col_idxs(), trans->get_values(), -// trans->get_row_ptrs(), trans->get_col_idxs(), copyValues, -// idxBase); -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); @@ -982,70 +148,11 @@ void conj_transpose(std::shared_ptr exec, const matrix::Fbcsr *orig, matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// if (hipsparse::is_supported::value) { -// const dim3 block_size(default_block_size, 1, 1); -// const dim3 grid_size( -// ceildiv(trans->get_num_stored_elements(), block_size.x), 1, 1); -// -// hipsparseAction_t copyValues = HIPSPARSE_ACTION_NUMERIC; -// hipsparseIndexBase_t idxBase = HIPSPARSE_INDEX_BASE_ZERO; -// -// hipsparse::transpose( -// exec->get_hipsparse_handle(), orig->get_size()[0], -// orig->get_size()[1], orig->get_num_stored_elements(), -// orig->get_const_values(), orig->get_const_row_ptrs(), -// orig->get_const_col_idxs(), trans->get_values(), -// trans->get_col_idxs(), trans->get_row_ptrs(), copyValues, -// idxBase); -// -// hipLaunchKernelGGL(conjugate_kernel, dim3(grid_size), -// dim3(block_size), -// 0, 0, trans->get_num_stored_elements(), -// as_hip_type(trans->get_values())); -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); -template -void row_permute(std::shared_ptr exec, - const IndexType *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *row_permuted) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); - - -template -void inverse_row_permute(std::shared_ptr exec, - const IndexType *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *row_permuted) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL); - - -template -void inverse_column_permute( - std::shared_ptr exec, - const IndexType *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL); - - template void calculate_max_nnz_per_row( std::shared_ptr exec, @@ -1084,56 +191,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); -template -void convert_to_hybrid(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Hybrid *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto ell_val = result->get_ell_values(); -// auto ell_col = result->get_ell_col_idxs(); -// auto coo_val = result->get_coo_values(); -// auto coo_col = result->get_coo_col_idxs(); -// auto coo_row = result->get_coo_row_idxs(); -// const auto stride = result->get_ell_stride(); -// const auto max_nnz_per_row = -// result->get_ell_num_stored_elements_per_row(); const auto num_rows = -// result->get_size()[0]; const auto coo_num_stored_elements = -// result->get_coo_num_stored_elements(); auto grid_dim = -// ceildiv(max_nnz_per_row * num_rows, default_block_size); -// -// hipLaunchKernelGGL(kernel::initialize_zero_ell, dim3(grid_dim), -// dim3(default_block_size), 0, 0, max_nnz_per_row, -// stride, as_hip_type(ell_val), as_hip_type(ell_col)); -// -// grid_dim = ceildiv(num_rows, default_block_size); -// auto coo_offset = Array(exec, num_rows); -// hipLaunchKernelGGL(kernel::calculate_hybrid_coo_row_nnz, dim3(grid_dim), -// dim3(default_block_size), 0, 0, num_rows, -// max_nnz_per_row, -// as_hip_type(source->get_const_row_ptrs()), -// as_hip_type(coo_offset.get_data())); -// -// components::prefix_sum(exec, coo_offset.get_data(), num_rows); -// -// grid_dim = ceildiv(num_rows * config::warp_size, default_block_size); -// hipLaunchKernelGGL(kernel::fill_in_hybrid, dim3(grid_dim), -// dim3(default_block_size), 0, 0, num_rows, stride, -// max_nnz_per_row, -// as_hip_type(source->get_const_values()), -// as_hip_type(source->get_const_row_ptrs()), -// as_hip_type(source->get_const_col_idxs()), -// as_hip_type(coo_offset.get_const_data()), -// as_hip_type(ell_val), as_hip_type(ell_col), -// as_hip_type(coo_val), as_hip_type(coo_col), -// as_hip_type(coo_row)); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL); - - template void calculate_nonzeros_per_row( std::shared_ptr exec, @@ -1159,48 +216,6 @@ template void sort_by_column_index(std::shared_ptr exec, matrix::Fbcsr *to_sort) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// if (hipsparse::is_supported::value) { -// auto handle = exec->get_hipsparse_handle(); -// auto descr = hipsparse::create_mat_descr(); -// auto m = IndexType(to_sort->get_size()[0]); -// auto n = IndexType(to_sort->get_size()[1]); -// auto nnz = IndexType(to_sort->get_num_stored_elements()); -// auto row_ptrs = to_sort->get_const_row_ptrs(); -// auto col_idxs = to_sort->get_col_idxs(); -// auto vals = to_sort->get_values(); -// -// // copy values -// Array tmp_vals_array(exec, nnz); -// exec->copy(nnz, vals, tmp_vals_array.get_data()); -// auto tmp_vals = tmp_vals_array.get_const_data(); -// -// // init identity permutation -// Array permutation_array(exec, nnz); -// auto permutation = permutation_array.get_data(); -// hipsparse::create_identity_permutation(handle, nnz, permutation); -// -// // allocate buffer -// size_type buffer_size{}; -// hipsparse::fbcsrsort_buffer_size(handle, m, n, nnz, row_ptrs, -// col_idxs, -// buffer_size); -// Array buffer_array{exec, buffer_size}; -// auto buffer = buffer_array.get_data(); -// -// // sort column indices -// hipsparse::fbcsrsort(handle, m, n, nnz, descr, row_ptrs, col_idxs, -// permutation, buffer); -// -// // sort values -// hipsparse::gather(handle, nnz, tmp_vals, vals, permutation); -// -// hipsparse::destroy(descr); -// } else { -// GKO_NOT_IMPLEMENTED; -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); @@ -1234,24 +249,6 @@ template void extract_diagonal(std::shared_ptr exec, const matrix::Fbcsr *orig, matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto nnz = orig->get_num_stored_elements(); -// const auto diag_size = diag->get_size()[0]; -// const auto num_blocks = -// ceildiv(config::warp_size * diag_size, default_block_size); -// -// const auto orig_values = orig->get_const_values(); -// const auto orig_row_ptrs = orig->get_const_row_ptrs(); -// const auto orig_col_idxs = orig->get_const_col_idxs(); -// auto diag_values = diag->get_values(); -// -// hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::extract_diagonal), -// dim3(num_blocks), dim3(default_block_size), 0, 0, -// diag_size, nnz, as_hip_type(orig_values), -// as_hip_type(orig_row_ptrs), as_hip_type(orig_col_idxs), -// as_hip_type(diag_values)); -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); diff --git a/hip/test/matrix/CMakeLists.txt b/hip/test/matrix/CMakeLists.txt index 95d058e8069..94e92f08f5c 100644 --- a/hip/test/matrix/CMakeLists.txt +++ b/hip/test/matrix/CMakeLists.txt @@ -3,6 +3,5 @@ ginkgo_create_hip_test(csr_kernels) ginkgo_create_hip_test(dense_kernels) ginkgo_create_hip_test(diagonal_kernels) ginkgo_create_hip_test(ell_kernels) -#ginkgo_create_hip_test(fbcsr_kernels) ginkgo_create_hip_test(hybrid_kernels) ginkgo_create_hip_test(sellp_kernels) diff --git a/hip/test/matrix/fbcsr_kernels.hip.cpp b/hip/test/matrix/fbcsr_kernels.hip.cpp deleted file mode 100644 index 79b873cb02d..00000000000 --- a/hip/test/matrix/fbcsr_kernels.hip.cpp +++ /dev/null @@ -1,866 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -#include - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include "core/matrix/fbcsr_kernels.hpp" -#include "hip/test/utils.hip.hpp" - - -namespace { - - -class Fbcsr : public ::testing::Test { -protected: - using Vec = gko::matrix::Dense<>; - using Mtx = gko::matrix::Fbcsr<>; - using ComplexVec = gko::matrix::Dense>; - using ComplexMtx = gko::matrix::Fbcsr>; - - Fbcsr() : mtx_size(532, 231), rand_engine(42) {} - - void SetUp() - { - ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); - ref = gko::ReferenceExecutor::create(); - hip = gko::HipExecutor::create(0, ref); - } - - void TearDown() - { - if (hip != nullptr) { - ASSERT_NO_THROW(hip->synchronize()); - } - } - - template - std::unique_ptr gen_mtx(int num_rows, int num_cols, - int min_nnz_row) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(min_nnz_row, num_cols), - std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); - } - - void set_up_apply_data(std::shared_ptr strategy, - int num_vectors = 1) - { - mtx = Mtx::create(ref, strategy); - mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 1)); - square_mtx = Mtx::create(ref, strategy); - square_mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[0], 1)); - expected = gen_mtx(mtx_size[0], num_vectors, 1); - y = gen_mtx(mtx_size[1], num_vectors, 1); - alpha = gko::initialize({2.0}, ref); - beta = gko::initialize({-1.0}, ref); - dmtx = Mtx::create(hip, strategy); - dmtx->copy_from(mtx.get()); - square_dmtx = Mtx::create(hip, strategy); - square_dmtx->copy_from(square_mtx.get()); - dresult = Vec::create(hip); - dresult->copy_from(expected.get()); - dy = Vec::create(hip); - dy->copy_from(y.get()); - dalpha = Vec::create(hip); - dalpha->copy_from(alpha.get()); - dbeta = Vec::create(hip); - dbeta->copy_from(beta.get()); - } - - void set_up_apply_complex_data( - std::shared_ptr strategy) - { - complex_mtx = ComplexMtx::create(ref, strategy); - complex_mtx->copy_from( - gen_mtx(mtx_size[0], mtx_size[1], 1)); - complex_dmtx = ComplexMtx::create(hip, strategy); - complex_dmtx->copy_from(complex_mtx.get()); - } - - struct matrix_pair { - std::unique_ptr ref; - std::unique_ptr hip; - }; - - matrix_pair gen_unsorted_mtx() - { - constexpr int min_nnz_per_row = 2; // Must be at least 2 - auto local_mtx_ref = - gen_mtx(mtx_size[0], mtx_size[1], min_nnz_per_row); - for (size_t row = 0; row < mtx_size[0]; ++row) { - const auto row_ptrs = local_mtx_ref->get_const_row_ptrs(); - const auto start_row = row_ptrs[row]; - auto col_idx = local_mtx_ref->get_col_idxs() + start_row; - auto vals = local_mtx_ref->get_values() + start_row; - const auto nnz_in_this_row = row_ptrs[row + 1] - row_ptrs[row]; - auto swap_idx_dist = - std::uniform_int_distribution<>(0, nnz_in_this_row - 1); - // shuffle `nnz_in_this_row / 2` times - for (size_t perm = 0; perm < nnz_in_this_row; perm += 2) { - const auto idx1 = swap_idx_dist(rand_engine); - const auto idx2 = swap_idx_dist(rand_engine); - std::swap(col_idx[idx1], col_idx[idx2]); - std::swap(vals[idx1], vals[idx2]); - } - } - auto local_mtx_hip = Mtx::create(hip); - local_mtx_hip->copy_from(local_mtx_ref.get()); - - return {std::move(local_mtx_ref), std::move(local_mtx_hip)}; - } - - std::shared_ptr ref; - std::shared_ptr hip; - - const gko::dim<2> mtx_size; - std::ranlux48 rand_engine; - - std::unique_ptr mtx; - std::unique_ptr complex_mtx; - std::unique_ptr square_mtx; - std::unique_ptr expected; - std::unique_ptr y; - std::unique_ptr alpha; - std::unique_ptr beta; - - std::unique_ptr dmtx; - std::unique_ptr complex_dmtx; - std::unique_ptr square_dmtx; - std::unique_ptr dresult; - std::unique_ptr dy; - std::unique_ptr dalpha; - std::unique_ptr dbeta; -}; - - -TEST_F(Fbcsr, StrategyAfterCopyIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip)); -// -// ASSERT_EQ(mtx->get_strategy()->get_name(), -// dmtx->get_strategy()->get_name()); -//} - - -TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithLoadBalance) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip)); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithLoadBalance) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip)); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithHipsparse) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithHipsparse) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithMergePath) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithMergePath) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithClassical) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRefWithClassical) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyIsEquivalentToRefWithAutomatical) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip)); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRefWithLoadBalance) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip), 3); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithLoadBalance) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip), 3); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRefWithClassical) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(), 3); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithClassical) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(), 3); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRefWithMergePath) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(), 3); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithMergePath) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(), 3); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyToFbcsrMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip)); -// auto trans = mtx->transpose(); -// auto d_trans = dmtx->transpose(); -// -// mtx->apply(alpha.get(), trans.get(), beta.get(), square_mtx.get()); -// dmtx->apply(dalpha.get(), d_trans.get(), dbeta.get(), square_dmtx.get()); -// -// GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, 1e-14); -// GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); -// ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, SimpleApplyToFbcsrMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip)); -// auto trans = mtx->transpose(); -// auto d_trans = dmtx->transpose(); -// -// mtx->apply(trans.get(), square_mtx.get()); -// dmtx->apply(d_trans.get(), square_dmtx.get()); -// -// GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, 1e-14); -// GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); -// ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, AdvancedApplyToIdentityMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip)); -// auto a = gen_mtx(mtx_size[0], mtx_size[1], 0); -// auto b = gen_mtx(mtx_size[0], mtx_size[1], 0); -// auto da = Mtx::create(hip); -// auto db = Mtx::create(hip); -// da->copy_from(a.get()); -// db->copy_from(b.get()); -// auto id = gko::matrix::Identity::create(ref, -// mtx_size[1]); auto did = -// gko::matrix::Identity::create(hip, mtx_size[1]); -// -// a->apply(alpha.get(), id.get(), beta.get(), b.get()); -// da->apply(dalpha.get(), did.get(), dbeta.get(), db.get()); -// -// GKO_ASSERT_MTX_NEAR(b, db, 1e-14); -// GKO_ASSERT_MTX_EQ_SPARSITY(b, db); -// ASSERT_TRUE(db->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, TransposeIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip)); -// -// auto trans = mtx->transpose(); -// auto d_trans = dmtx->transpose(); -// -// GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), -// static_cast(trans.get()), 0.0); -// ASSERT_TRUE(static_cast(d_trans.get())->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, ConvertToDenseIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto dense_mtx = gko::matrix::Dense<>::create(ref); -// auto ddense_mtx = gko::matrix::Dense<>::create(hip); -// -// mtx->convert_to(dense_mtx.get()); -// dmtx->convert_to(ddense_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToDenseIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto dense_mtx = gko::matrix::Dense<>::create(ref); -// auto ddense_mtx = gko::matrix::Dense<>::create(hip); -// -// mtx->move_to(dense_mtx.get()); -// dmtx->move_to(ddense_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, ConvertToEllIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto ell_mtx = gko::matrix::Ell<>::create(ref); -// auto dell_mtx = gko::matrix::Ell<>::create(hip); -// -// mtx->convert_to(ell_mtx.get()); -// dmtx->convert_to(dell_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(ell_mtx.get(), dell_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToEllIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto ell_mtx = gko::matrix::Ell<>::create(ref); -// auto dell_mtx = gko::matrix::Ell<>::create(hip); -// -// mtx->move_to(ell_mtx.get()); -// dmtx->move_to(dell_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(ell_mtx.get(), dell_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, ConvertToSparsityFbcsrIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(ref); -// auto d_sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(hip); -// -// mtx->convert_to(sparsity_mtx.get()); -// dmtx->convert_to(d_sparsity_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(sparsity_mtx.get(), d_sparsity_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToSparsityFbcsrIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(ref); -// auto d_sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(hip); -// -// mtx->move_to(sparsity_mtx.get()); -// dmtx->move_to(d_sparsity_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(sparsity_mtx.get(), d_sparsity_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, CalculateMaxNnzPerRowIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// gko::size_type max_nnz_per_row; -// gko::size_type dmax_nnz_per_row; -// -// gko::kernels::reference::fbcsr::calculate_max_nnz_per_row(ref, mtx.get(), -// &max_nnz_per_row); -// gko::kernels::hip::fbcsr::calculate_max_nnz_per_row(hip, dmtx.get(), -// &dmax_nnz_per_row); -// -// ASSERT_EQ(max_nnz_per_row, dmax_nnz_per_row); -//} - - -TEST_F(Fbcsr, ConvertToCooIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto coo_mtx = gko::matrix::Coo<>::create(ref); -// auto dcoo_mtx = gko::matrix::Coo<>::create(hip); -// -// mtx->convert_to(coo_mtx.get()); -// dmtx->convert_to(dcoo_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToCooIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto coo_mtx = gko::matrix::Coo<>::create(ref); -// auto dcoo_mtx = gko::matrix::Coo<>::create(hip); -// -// mtx->move_to(coo_mtx.get()); -// dmtx->move_to(dcoo_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, ConvertToSellpIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto sellp_mtx = gko::matrix::Sellp<>::create(ref); -// auto dsellp_mtx = gko::matrix::Sellp<>::create(hip); -// -// mtx->convert_to(sellp_mtx.get()); -// dmtx->convert_to(dsellp_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(sellp_mtx.get(), dsellp_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToSellpIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// auto sellp_mtx = gko::matrix::Sellp<>::create(ref); -// auto dsellp_mtx = gko::matrix::Sellp<>::create(hip); -// -// mtx->move_to(sellp_mtx.get()); -// dmtx->move_to(dsellp_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(sellp_mtx.get(), dsellp_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, ConvertsEmptyToSellp) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto dempty_mtx = Mtx::create(hip); -// auto dsellp_mtx = gko::matrix::Sellp<>::create(hip); -// -// dempty_mtx->convert_to(dsellp_mtx.get()); -// -// ASSERT_EQ(hip->copy_val_to_host(dsellp_mtx->get_const_slice_sets()), 0); -// ASSERT_FALSE(dsellp_mtx->get_size()); -//} - - -TEST_F(Fbcsr, CalculateTotalColsIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// gko::size_type total_cols; -// gko::size_type dtotal_cols; -// -// gko::kernels::reference::fbcsr::calculate_total_cols( -// ref, mtx.get(), &total_cols, 2, gko::matrix::default_slice_size); -// gko::kernels::hip::fbcsr::calculate_total_cols( -// hip, dmtx.get(), &dtotal_cols, 2, gko::matrix::default_slice_size); -// -// ASSERT_EQ(total_cols, dtotal_cols); -//} - - -TEST_F(Fbcsr, CalculatesNonzerosPerRow) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared()); -// gko::Array row_nnz(ref, mtx->get_size()[0]); -// gko::Array drow_nnz(hip, dmtx->get_size()[0]); -// -// gko::kernels::reference::fbcsr::calculate_nonzeros_per_row(ref, mtx.get(), -// &row_nnz); -// gko::kernels::hip::fbcsr::calculate_nonzeros_per_row(hip, dmtx.get(), -// &drow_nnz); -// -// GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); -//} - - -TEST_F(Fbcsr, ConvertToHybridIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Hybrid_type = gko::matrix::Hybrid<>; -// set_up_apply_data(std::make_shared()); -// auto hybrid_mtx = Hybrid_type::create( -// ref, std::make_shared(2)); -// auto dhybrid_mtx = Hybrid_type::create( -// hip, std::make_shared(2)); -// -// mtx->convert_to(hybrid_mtx.get()); -// dmtx->convert_to(dhybrid_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToHybridIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Hybrid_type = gko::matrix::Hybrid<>; -// set_up_apply_data(std::make_shared()); -// auto hybrid_mtx = Hybrid_type::create( -// ref, std::make_shared(2)); -// auto dhybrid_mtx = Hybrid_type::create( -// hip, std::make_shared(2)); -// -// mtx->move_to(hybrid_mtx.get()); -// dmtx->move_to(dhybrid_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, RecognizeSortedMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip)); -// bool is_sorted_hip{}; -// bool is_sorted_ref{}; -// -// is_sorted_ref = mtx->is_sorted_by_column_index(); -// is_sorted_hip = dmtx->is_sorted_by_column_index(); -// -// ASSERT_EQ(is_sorted_ref, is_sorted_hip); -//} - - -TEST_F(Fbcsr, RecognizeUnsortedMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto uns_mtx = gen_unsorted_mtx(); -// bool is_sorted_hip{}; -// bool is_sorted_ref{}; -// -// is_sorted_ref = uns_mtx.ref->is_sorted_by_column_index(); -// is_sorted_hip = uns_mtx.hip->is_sorted_by_column_index(); -// -// ASSERT_EQ(is_sorted_ref, is_sorted_hip); -//} - - -TEST_F(Fbcsr, SortSortedMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip)); -// -// mtx->sort_by_column_index(); -// dmtx->sort_by_column_index(); -// -// // Values must be unchanged, therefore, tolerance is `0` -// GKO_ASSERT_MTX_NEAR(mtx, dmtx, 0); -//} - - -TEST_F(Fbcsr, SortUnsortedMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto uns_mtx = gen_unsorted_mtx(); -// -// uns_mtx.ref->sort_by_column_index(); -// uns_mtx.hip->sort_by_column_index(); -// -// // Values must be unchanged, therefore, tolerance is `0` -// GKO_ASSERT_MTX_NEAR(uns_mtx.ref, uns_mtx.hip, 0); -//} - - -TEST_F(Fbcsr, OneAutomaticalWorksWithDifferentMatrices) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto automatical = std::make_shared(hip); -// auto row_len_limit = std::max(automatical->nvidia_row_len_limit, -// automatical->amd_row_len_limit); -// auto load_balance_mtx = Mtx::create(ref); -// auto classical_mtx = Mtx::create(ref); -// load_balance_mtx->copy_from( -// gen_mtx(1, row_len_limit + 1000, row_len_limit + 1)); -// classical_mtx->copy_from(gen_mtx(50, 50, 1)); -// auto load_balance_mtx_d = Mtx::create(hip); -// auto classical_mtx_d = Mtx::create(hip); -// load_balance_mtx_d->copy_from(load_balance_mtx.get()); -// classical_mtx_d->copy_from(classical_mtx.get()); -// -// load_balance_mtx_d->set_strategy(automatical); -// classical_mtx_d->set_strategy(automatical); -// -// EXPECT_EQ("load_balance", load_balance_mtx_d->get_strategy()->get_name()); -// EXPECT_EQ("classical", classical_mtx_d->get_strategy()->get_name()); -// ASSERT_NE(load_balance_mtx_d->get_strategy().get(), -// classical_mtx_d->get_strategy().get()); -//} - - -TEST_F(Fbcsr, ExtractDiagonalIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip)); -// -// auto diag = mtx->extract_diagonal(); -// auto ddiag = dmtx->extract_diagonal(); -// -// GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); -//} - - -TEST_F(Fbcsr, InplaceAbsoluteMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip)); -// -// mtx->compute_absolute_inplace(); -// dmtx->compute_absolute_inplace(); -// -// GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); -//} - - -TEST_F(Fbcsr, OutplaceAbsoluteMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(std::make_shared(hip)); -// -// auto abs_mtx = mtx->compute_absolute(); -// auto dabs_mtx = dmtx->compute_absolute(); -// -// GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); -//} - - -TEST_F(Fbcsr, InplaceAbsoluteComplexMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_complex_data(std::make_shared(hip)); -// -// complex_mtx->compute_absolute_inplace(); -// complex_dmtx->compute_absolute_inplace(); -// -// GKO_ASSERT_MTX_NEAR(complex_mtx, complex_dmtx, 1e-14); -//} - - -TEST_F(Fbcsr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_complex_data(std::make_shared(hip)); -// -// auto abs_mtx = complex_mtx->compute_absolute(); -// auto dabs_mtx = complex_dmtx->compute_absolute(); -// -// GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); -//} - - -} // namespace diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 7d5c00cbd02..33e18a99794 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -52,18 +52,6 @@ class Dense; template class Csr; -template -class Coo; - -template -class Ell; - -template -class Hybrid; - -template -class Sellp; - template class SparsityCsr; @@ -129,18 +117,15 @@ class Fbcsr : public EnableLinOp>, public ConvertibleTo, IndexType>>, public ConvertibleTo>, public ConvertibleTo>, - public ConvertibleTo>, public ConvertibleTo>, public DiagonalExtractable, public ReadableFromMatrixData, public WritableToMatrixData, public Transposable, - public Permutable, public EnableAbsoluteComputation< remove_complex>> { friend class EnableCreateMethod; friend class EnablePolymorphicObject; - friend class Coo; friend class Dense; friend class SparsityCsr; friend class FbcsrBuilder; @@ -176,10 +161,6 @@ class Fbcsr : public EnableLinOp>, void move_to(Csr *result) override; - void convert_to(Coo *result) const override; - - void move_to(Coo *result) override; - /// Get the block sparsity pattern in CSR-like format /** Note that the actual non-zero values are never copied; * the result always has a value array of size 1 with the value 1. @@ -199,18 +180,6 @@ class Fbcsr : public EnableLinOp>, std::unique_ptr conj_transpose() const override; - std::unique_ptr row_permute( - const Array *permutation_indices) const override; - - std::unique_ptr column_permute( - const Array *permutation_indices) const override; - - std::unique_ptr inverse_row_permute( - const Array *inverse_permutation_indices) const override; - - std::unique_ptr inverse_column_permute( - const Array *inverse_permutation_indices) const override; - std::unique_ptr> extract_diagonal() const override; std::unique_ptr compute_absolute() const override; diff --git a/omp/components/fbcsr_spgeam.hpp b/omp/components/fbcsr_spgeam.hpp deleted file mode 100644 index e4a06532ed3..00000000000 --- a/omp/components/fbcsr_spgeam.hpp +++ /dev/null @@ -1,31 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ diff --git a/omp/matrix/fbcsr_kernels.cpp b/omp/matrix/fbcsr_kernels.cpp index 609ab990cdb..a38b3c9bda7 100644 --- a/omp/matrix/fbcsr_kernels.cpp +++ b/omp/matrix/fbcsr_kernels.cpp @@ -44,17 +44,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include -#include -#include #include "core/base/allocator.hpp" #include "core/base/iterator_factory.hpp" #include "core/components/prefix_sum.hpp" #include "core/matrix/fbcsr_builder.hpp" -#include "omp/components/fbcsr_spgeam.hpp" #include "omp/components/format_conversion.hpp" @@ -74,27 +70,6 @@ void spmv(std::shared_ptr exec, const matrix::Fbcsr *a, const matrix::Dense *b, matrix::Dense *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto row_ptrs = a->get_const_row_ptrs(); -// auto col_idxs = a->get_const_col_idxs(); -// auto vals = a->get_const_values(); -// -//#pragma omp parallel for -// for (size_type row = 0; row < a->get_size()[0]; ++row) { -// for (size_type j = 0; j < c->get_size()[1]; ++j) { -// c->at(row, j) = zero(); -// } -// for (size_type k = row_ptrs[row]; -// k < static_cast(row_ptrs[row + 1]); ++k) { -// auto val = vals[k]; -// auto col = col_idxs[k]; -// for (size_type j = 0; j < c->get_size()[1]; ++j) { -// c->at(row, j) += val * b->at(col, j); -// } -// } -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); @@ -106,29 +81,6 @@ void advanced_spmv(std::shared_ptr exec, const matrix::Dense *b, const matrix::Dense *beta, matrix::Dense *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto row_ptrs = a->get_const_row_ptrs(); -// auto col_idxs = a->get_const_col_idxs(); -// auto vals = a->get_const_values(); -// auto valpha = alpha->at(0, 0); -// auto vbeta = beta->at(0, 0); -// -//#pragma omp parallel for -// for (size_type row = 0; row < a->get_size()[0]; ++row) { -// for (size_type j = 0; j < c->get_size()[1]; ++j) { -// c->at(row, j) *= vbeta; -// } -// for (size_type k = row_ptrs[row]; -// k < static_cast(row_ptrs[row + 1]); ++k) { -// auto val = vals[k]; -// auto col = col_idxs[k]; -// for (size_type j = 0; j < c->get_size()[1]; ++j) { -// c->at(row, j) += valpha * val * b->at(col, j); -// } -// } -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); @@ -167,213 +119,6 @@ void spgemm_insert_row2(unordered_set &cols, //} -template -void spgemm_accumulate_row(map &cols, - const matrix::Fbcsr *c, - ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto row_ptrs = c->get_const_row_ptrs(); -// auto col_idxs = c->get_const_col_idxs(); -// auto vals = c->get_const_values(); -// for (size_type c_nz = row_ptrs[row]; c_nz < size_type(row_ptrs[row + 1]); -// ++c_nz) { -// auto c_col = col_idxs[c_nz]; -// auto c_val = vals[c_nz]; -// cols[c_col] += scale * c_val; -// } -//} - - -template -void spgemm_accumulate_row2(map &cols, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto a_row_ptrs = a->get_const_row_ptrs(); -// auto a_col_idxs = a->get_const_col_idxs(); -// auto a_vals = a->get_const_values(); -// auto b_row_ptrs = b->get_const_row_ptrs(); -// auto b_col_idxs = b->get_const_col_idxs(); -// auto b_vals = b->get_const_values(); -// for (size_type a_nz = a_row_ptrs[row]; -// a_nz < size_type(a_row_ptrs[row + 1]); ++a_nz) { -// auto a_col = a_col_idxs[a_nz]; -// auto a_val = a_vals[a_nz]; -// auto b_row = a_col; -// for (size_type b_nz = b_row_ptrs[b_row]; -// b_nz < size_type(b_row_ptrs[b_row + 1]); ++b_nz) { -// auto b_col = b_col_idxs[b_nz]; -// auto b_val = b_vals[b_nz]; -// cols[b_col] += scale * a_val * b_val; -// } -// } -//} - - -template -void spgemm(std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = a->get_size()[0]; -// -// // first sweep: count nnz for each row -// auto c_row_ptrs = c->get_row_ptrs(); -// -// unordered_set local_col_idxs(exec); -//#pragma omp parallel for firstprivate(local_col_idxs) -// for (size_type a_row = 0; a_row < num_rows; ++a_row) { -// local_col_idxs.clear(); -// spgemm_insert_row2(local_col_idxs, a, b, a_row); -// c_row_ptrs[a_row] = local_col_idxs.size(); -// } -// -// // build row pointers -// components::prefix_sum(exec, c_row_ptrs, num_rows + 1); -// -// // second sweep: accumulate non-zeros -// auto new_nnz = c_row_ptrs[num_rows]; -// matrix::FbcsrBuilder c_builder{c}; -// auto &c_col_idxs_array = c_builder.get_col_idx_array(); -// auto &c_vals_array = c_builder.get_value_array(); -// c_col_idxs_array.resize_and_reset(new_nnz); -// c_vals_array.resize_and_reset(new_nnz); -// auto c_col_idxs = c_col_idxs_array.get_data(); -// auto c_vals = c_vals_array.get_data(); -// -// map local_row_nzs(exec); -//#pragma omp parallel for firstprivate(local_row_nzs) -// for (size_type a_row = 0; a_row < num_rows; ++a_row) { -// local_row_nzs.clear(); -// spgemm_accumulate_row2(local_row_nzs, a, b, one(), a_row); -// // store result -// auto c_nz = c_row_ptrs[a_row]; -// for (auto pair : local_row_nzs) { -// c_col_idxs[c_nz] = pair.first; -// c_vals[c_nz] = pair.second; -// ++c_nz; -// } -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEMM_KERNEL); - - -template -void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - const matrix::Dense *beta, - const matrix::Fbcsr *d, - matrix::Fbcsr *c) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = a->get_size()[0]; -// auto valpha = alpha->at(0, 0); -// auto vbeta = beta->at(0, 0); -// -// // first sweep: count nnz for each row -// auto c_row_ptrs = c->get_row_ptrs(); -// -// unordered_set local_col_idxs(exec); -//#pragma omp parallel for firstprivate(local_col_idxs) -// for (size_type a_row = 0; a_row < num_rows; ++a_row) { -// local_col_idxs.clear(); -// spgemm_insert_row(local_col_idxs, d, a_row); -// spgemm_insert_row2(local_col_idxs, a, b, a_row); -// c_row_ptrs[a_row] = local_col_idxs.size(); -// } -// -// // build row pointers -// components::prefix_sum(exec, c_row_ptrs, num_rows + 1); -// -// // second sweep: accumulate non-zeros -// auto new_nnz = c_row_ptrs[num_rows]; -// matrix::FbcsrBuilder c_builder{c}; -// auto &c_col_idxs_array = c_builder.get_col_idx_array(); -// auto &c_vals_array = c_builder.get_value_array(); -// c_col_idxs_array.resize_and_reset(new_nnz); -// c_vals_array.resize_and_reset(new_nnz); -// auto c_col_idxs = c_col_idxs_array.get_data(); -// auto c_vals = c_vals_array.get_data(); -// -// map local_row_nzs(exec); -//#pragma omp parallel for firstprivate(local_row_nzs) -// for (size_type a_row = 0; a_row < num_rows; ++a_row) { -// local_row_nzs.clear(); -// spgemm_accumulate_row(local_row_nzs, d, vbeta, a_row); -// spgemm_accumulate_row2(local_row_nzs, a, b, valpha, a_row); -// // store result -// auto c_nz = c_row_ptrs[a_row]; -// for (auto pair : local_row_nzs) { -// c_col_idxs[c_nz] = pair.first; -// c_vals[c_nz] = pair.second; -// ++c_nz; -// } -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL); - - -template -void spgeam(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Dense *beta, - const matrix::Fbcsr *b, - matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = a->get_size()[0]; -// auto valpha = alpha->at(0, 0); -// auto vbeta = beta->at(0, 0); -// -// // first sweep: count nnz for each row -// auto c_row_ptrs = c->get_row_ptrs(); -// -// abstract_spgeam( -// a, b, [](IndexType) { return IndexType{}; }, -// [](IndexType, IndexType, ValueType, ValueType, IndexType &nnz) { -// ++nnz; -// }, -// [&](IndexType row, IndexType nnz) { c_row_ptrs[row] = nnz; }); -// -// // build row pointers -// components::prefix_sum(exec, c_row_ptrs, num_rows + 1); -// -// // second sweep: accumulate non-zeros -// auto new_nnz = c_row_ptrs[num_rows]; -// matrix::FbcsrBuilder c_builder{c}; -// auto &c_col_idxs_array = c_builder.get_col_idx_array(); -// auto &c_vals_array = c_builder.get_value_array(); -// c_col_idxs_array.resize_and_reset(new_nnz); -// c_vals_array.resize_and_reset(new_nnz); -// auto c_col_idxs = c_col_idxs_array.get_data(); -// auto c_vals = c_vals_array.get_data(); -// -// abstract_spgeam( -// a, b, [&](IndexType row) { return c_row_ptrs[row]; }, -// [&](IndexType, IndexType col, ValueType a_val, ValueType b_val, -// IndexType &nz) { -// c_vals[nz] = valpha * a_val + vbeta * b_val; -// c_col_idxs[nz] = col; -// ++nz; -// }, -// [](IndexType, IndexType) {}); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEAM_KERNEL); - - template void convert_row_ptrs_to_idxs(std::shared_ptr exec, const IndexType *ptrs, size_type num_rows, @@ -384,48 +129,10 @@ void convert_row_ptrs_to_idxs(std::shared_ptr exec, //} -template -void convert_to_coo(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Coo *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = result->get_size()[0]; -// -// auto row_idxs = result->get_row_idxs(); -// const auto source_row_ptrs = source->get_const_row_ptrs(); -// -// convert_row_ptrs_to_idxs(exec, source_row_ptrs, num_rows, row_idxs); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL); - - template void convert_to_dense(std::shared_ptr exec, const matrix::Fbcsr *source, matrix::Dense *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = source->get_size()[0]; -// auto num_cols = source->get_size()[1]; -// auto row_ptrs = source->get_const_row_ptrs(); -// auto col_idxs = source->get_const_col_idxs(); -// auto vals = source->get_const_values(); -// -//#pragma omp parallel for -// for (size_type row = 0; row < num_rows; ++row) { -// for (size_type col = 0; col < num_cols; ++col) { -// result->at(row, col) = zero(); -// } -// for (size_type i = row_ptrs[row]; -// i < static_cast(row_ptrs[row + 1]); ++i) { -// result->at(row, col_idxs[i]) = vals[i]; -// } -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); @@ -441,26 +148,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); -template -void convert_to_sellp(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Sellp *result) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL); - - -template -void convert_to_ell(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Ell *result) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL); - - template inline void convert_fbcsr_to_csc(size_type num_rows, const IndexType *row_ptrs, const IndexType *col_idxs, @@ -468,16 +155,6 @@ inline void convert_fbcsr_to_csc(size_type num_rows, const IndexType *row_ptrs, IndexType *row_idxs, IndexType *col_ptrs, ValueType *csc_vals, UnaryOperator op) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// for (size_type row = 0; row < num_rows; ++row) { -// for (auto i = row_ptrs[row]; i < row_ptrs[row + 1]; ++i) { -// const auto dest_idx = col_ptrs[col_idxs[i]]++; -// row_idxs[dest_idx] = row; -// csc_vals[dest_idx] = op(fbcsr_vals[i]); -// } -// } -//} template @@ -537,16 +214,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); -template -void calculate_total_cols(std::shared_ptr exec, - const matrix::Fbcsr *source, - size_type *result, size_type stride_factor, - size_type slice_size) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL); - - template void calculate_max_nnz_per_row( std::shared_ptr exec, @@ -557,215 +224,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); -template -void convert_to_hybrid(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Hybrid *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = result->get_size()[0]; -// auto num_cols = result->get_size()[1]; -// auto strategy = result->get_strategy(); -// auto ell_lim = strategy->get_ell_num_stored_elements_per_row(); -// auto coo_lim = strategy->get_coo_nnz(); -// auto coo_val = result->get_coo_values(); -// auto coo_col = result->get_coo_col_idxs(); -// auto coo_row = result->get_coo_row_idxs(); -// const auto max_nnz_per_row = -// result->get_ell_num_stored_elements_per_row(); -// -//// Initial Hybrid Matrix -//#pragma omp parallel for -// for (size_type i = 0; i < max_nnz_per_row; i++) { -// for (size_type j = 0; j < result->get_ell_stride(); j++) { -// result->ell_val_at(j, i) = zero(); -// result->ell_col_at(j, i) = 0; -// } -// } -// -// const auto fbcsr_row_ptrs = source->get_const_row_ptrs(); -// const auto fbcsr_vals = source->get_const_values(); -// auto coo_offset = Array(exec, num_rows); -// auto coo_offset_val = coo_offset.get_data(); -// -// coo_offset_val[0] = 0; -//#pragma omp parallel for -// for (size_type i = 1; i < num_rows; i++) { -// auto temp = fbcsr_row_ptrs[i] - fbcsr_row_ptrs[i - 1]; -// coo_offset_val[i] = (temp > max_nnz_per_row) * (temp - -// max_nnz_per_row); -// } -// -// auto workspace = Array(exec, num_rows); -// auto workspace_val = workspace.get_data(); -// for (size_type i = 1; i < num_rows; i <<= 1) { -//#pragma omp parallel for -// for (size_type j = i; j < num_rows; j++) { -// workspace_val[j] = coo_offset_val[j] + coo_offset_val[j - i]; -// } -//#pragma omp parallel for -// for (size_type j = i; j < num_rows; j++) { -// coo_offset_val[j] = workspace_val[j]; -// } -// } -// -//#pragma omp parallel for -// for (IndexType row = 0; row < num_rows; row++) { -// size_type ell_idx = 0; -// size_type fbcsr_idx = fbcsr_row_ptrs[row]; -// size_type coo_idx = coo_offset_val[row]; -// while (fbcsr_idx < fbcsr_row_ptrs[row + 1]) { -// const auto val = fbcsr_vals[fbcsr_idx]; -// if (ell_idx < ell_lim) { -// result->ell_val_at(row, ell_idx) = val; -// result->ell_col_at(row, ell_idx) = -// source->get_const_col_idxs()[fbcsr_idx]; -// ell_idx++; -// } else { -// coo_val[coo_idx] = val; -// coo_col[coo_idx] = source->get_const_col_idxs()[fbcsr_idx]; -// coo_row[coo_idx] = row; -// coo_idx++; -// } -// fbcsr_idx++; -// } -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL); - - -template -void row_permute_impl(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *row_permuted) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto perm = permutation_indices->get_const_data(); -// auto orig_row_ptrs = orig->get_const_row_ptrs(); -// auto orig_col_idxs = orig->get_const_col_idxs(); -// auto orig_vals = orig->get_const_values(); -// auto rp_row_ptrs = row_permuted->get_row_ptrs(); -// auto rp_col_idxs = row_permuted->get_col_idxs(); -// auto rp_vals = row_permuted->get_values(); -// size_type num_rows = orig->get_size()[0]; -// size_type num_nnz = orig->get_num_stored_elements(); -// -// size_type cur_ptr = 0; -// rp_row_ptrs[0] = cur_ptr; -// vector orig_num_nnz_per_row(num_rows, 0, exec); -//#pragma omp parallel for -// for (size_type row = 0; row < num_rows; ++row) { -// orig_num_nnz_per_row[row] = orig_row_ptrs[row + 1] - -// orig_row_ptrs[row]; -// } -// for (size_type row = 0; row < num_rows; ++row) { -// rp_row_ptrs[row + 1] = -// rp_row_ptrs[row] + orig_num_nnz_per_row[perm[row]]; -// } -// rp_row_ptrs[num_rows] = orig_row_ptrs[num_rows]; -//#pragma omp parallel for -// for (size_type row = 0; row < num_rows; ++row) { -// auto new_row = perm[row]; -// auto new_k = orig_row_ptrs[new_row]; -// for (size_type k = rp_row_ptrs[row]; -// k < size_type(rp_row_ptrs[row + 1]); ++k) { -// rp_col_idxs[k] = orig_col_idxs[new_k]; -// rp_vals[k] = orig_vals[new_k]; -// new_k++; -// } -// } -//} - - -template -void row_permute(std::shared_ptr exec, - const IndexType *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *row_permuted) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// row_permute_impl(exec, permutation_indices, orig, row_permuted); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); - - -template -void inverse_row_permute(std::shared_ptr exec, - const IndexType *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *row_permuted) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto perm = permutation_indices->get_const_data(); -// Array inv_perm(*permutation_indices); -// auto iperm = inv_perm.get_data(); -//#pragma omp parallel for -// for (size_type ind = 0; ind < inv_perm.get_num_elems(); ++ind) { -// iperm[perm[ind]] = ind; -// } -// -// row_permute_impl(exec, &inv_perm, orig, row_permuted); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL); - - -template -void column_permute_impl(const Array *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *column_permuted) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto perm = permutation_indices->get_const_data(); -// auto orig_row_ptrs = orig->get_const_row_ptrs(); -// auto orig_col_idxs = orig->get_const_col_idxs(); -// auto orig_vals = orig->get_const_values(); -// auto cp_row_ptrs = column_permuted->get_row_ptrs(); -// auto cp_col_idxs = column_permuted->get_col_idxs(); -// auto cp_vals = column_permuted->get_values(); -// auto num_nnz = orig->get_num_stored_elements(); -// size_type num_rows = orig->get_size()[0]; -// size_type num_cols = orig->get_size()[1]; -// -//#pragma omp parallel for -// for (size_type row = 0; row < num_rows; ++row) { -// cp_row_ptrs[row] = orig_row_ptrs[row]; -// for (size_type k = orig_row_ptrs[row]; -// k < size_type(orig_row_ptrs[row + 1]); ++k) { -// cp_col_idxs[k] = perm[orig_col_idxs[k]]; -// cp_vals[k] = orig_vals[k]; -// } -// } -// cp_row_ptrs[num_rows] = orig_row_ptrs[num_rows]; -//} - - -template -void inverse_column_permute( - std::shared_ptr exec, - const IndexType *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *column_permuted) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// column_permute_impl(permutation_indices, orig, column_permuted); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL); - - template void calculate_nonzeros_per_row( std::shared_ptr exec, @@ -790,21 +248,6 @@ template void sort_by_column_index(std::shared_ptr exec, matrix::Fbcsr *to_sort) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto values = to_sort->get_values(); -// auto row_ptrs = to_sort->get_row_ptrs(); -// auto col_idxs = to_sort->get_col_idxs(); -// const auto number_rows = to_sort->get_size()[0]; -//#pragma omp parallel for -// for (size_type i = 0; i < number_rows; ++i) { -// auto start_row_idx = row_ptrs[i]; -// auto row_nnz = row_ptrs[i + 1] - start_row_idx; -// auto helper = detail::IteratorFactory( -// col_idxs + start_row_idx, values + start_row_idx, row_nnz); -// std::sort(helper.begin(), helper.end()); -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); @@ -844,24 +287,6 @@ template void extract_diagonal(std::shared_ptr exec, const matrix::Fbcsr *orig, matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto row_ptrs = orig->get_const_row_ptrs(); -// const auto col_idxs = orig->get_const_col_idxs(); -// const auto values = orig->get_const_values(); -// const auto diag_size = diag->get_size()[0]; -// auto diag_values = diag->get_values(); -// -//#pragma omp parallel for -// for (size_type row = 0; row < diag_size; ++row) { -// for (size_type idx = row_ptrs[row]; idx < row_ptrs[row + 1]; ++idx) { -// if (col_idxs[idx] == row) { -// diag_values[row] = values[idx]; -// break; -// } -// } -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); diff --git a/omp/test/matrix/CMakeLists.txt b/omp/test/matrix/CMakeLists.txt index 3c61fd32243..0c9fd00eb30 100644 --- a/omp/test/matrix/CMakeLists.txt +++ b/omp/test/matrix/CMakeLists.txt @@ -3,7 +3,6 @@ ginkgo_create_test(csr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) -#ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(hybrid_kernels) ginkgo_create_test(sellp_kernels) ginkgo_create_test(sparsity_csr_kernels) diff --git a/omp/test/matrix/fbcsr_kernels.cpp b/omp/test/matrix/fbcsr_kernels.cpp deleted file mode 100644 index a1826bfa899..00000000000 --- a/omp/test/matrix/fbcsr_kernels.cpp +++ /dev/null @@ -1,662 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include -#include -#include -#include - - -#include - - -#include -#include -#include -#include -#include -#include -#include - - -#include "core/matrix/fbcsr_kernels.hpp" -#include "core/test/utils.hpp" - - -namespace { - - -class Fbcsr : public ::testing::Test { -protected: - using Arr = gko::Array; - using Mtx = gko::matrix::Fbcsr<>; - using Vec = gko::matrix::Dense<>; - using ComplexVec = gko::matrix::Dense>; - using ComplexMtx = gko::matrix::Fbcsr>; - - Fbcsr() : mtx_size(532, 231), rand_engine(42) {} - - void SetUp() - { - ref = gko::ReferenceExecutor::create(); - omp = gko::OmpExecutor::create(); - } - - void TearDown() - { - if (omp != nullptr) { - ASSERT_NO_THROW(omp->synchronize()); - } - } - - template - std::unique_ptr gen_mtx(int num_rows, int num_cols, - int min_nnz_row) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(min_nnz_row, num_cols), - std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); - } - - void set_up_apply_data(int num_vectors = 1) - { - mtx = Mtx::create(ref); - mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 1)); - complex_mtx = ComplexMtx::create(ref); - complex_mtx->copy_from( - gen_mtx(mtx_size[0], mtx_size[1], 1)); - square_mtx = Mtx::create(ref); - square_mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[0], 1)); - expected = gen_mtx(mtx_size[0], num_vectors, 1); - y = gen_mtx(mtx_size[1], num_vectors, 1); - alpha = gko::initialize({2.0}, ref); - beta = gko::initialize({-1.0}, ref); - dmtx = Mtx::create(omp); - dmtx->copy_from(mtx.get()); - complex_dmtx = ComplexMtx::create(omp); - complex_dmtx->copy_from(complex_mtx.get()); - square_dmtx = Mtx::create(omp); - square_dmtx->copy_from(square_mtx.get()); - dresult = Vec::create(omp); - dresult->copy_from(expected.get()); - dy = Vec::create(omp); - dy->copy_from(y.get()); - dalpha = Vec::create(omp); - dalpha->copy_from(alpha.get()); - dbeta = Vec::create(omp); - dbeta->copy_from(beta.get()); - - std::vector tmp(mtx->get_size()[0], 0); - auto rng = std::default_random_engine{}; - std::iota(tmp.begin(), tmp.end(), 0); - std::shuffle(tmp.begin(), tmp.end(), rng); - std::vector tmp2(mtx->get_size()[1], 0); - std::iota(tmp2.begin(), tmp2.end(), 0); - std::shuffle(tmp2.begin(), tmp2.end(), rng); - rpermute_idxs = - std::unique_ptr(new Arr{ref, tmp.begin(), tmp.end()}); - drpermute_idxs = - std::unique_ptr(new Arr{omp, tmp.begin(), tmp.end()}); - cpermute_idxs = - std::unique_ptr(new Arr{ref, tmp2.begin(), tmp2.end()}); - dcpermute_idxs = - std::unique_ptr(new Arr{omp, tmp2.begin(), tmp2.end()}); - } - - struct matrix_pair { - std::unique_ptr ref; - std::unique_ptr omp; - }; - - matrix_pair gen_unsorted_mtx() - { - constexpr int min_nnz_per_row = 2; // Must be at least 2 - auto local_mtx_ref = - gen_mtx(mtx_size[0], mtx_size[1], min_nnz_per_row); - for (size_t row = 0; row < mtx_size[0]; ++row) { - const auto row_ptrs = local_mtx_ref->get_const_row_ptrs(); - const auto start_row = row_ptrs[row]; - auto col_idx = local_mtx_ref->get_col_idxs() + start_row; - auto vals = local_mtx_ref->get_values() + start_row; - const auto nnz_in_this_row = row_ptrs[row + 1] - row_ptrs[row]; - auto swap_idx_dist = - std::uniform_int_distribution<>(0, nnz_in_this_row - 1); - // shuffle `nnz_in_this_row / 2` times - for (size_t perm = 0; perm < nnz_in_this_row; perm += 2) { - const auto idx1 = swap_idx_dist(rand_engine); - const auto idx2 = swap_idx_dist(rand_engine); - std::swap(col_idx[idx1], col_idx[idx2]); - std::swap(vals[idx1], vals[idx2]); - } - } - auto local_mtx_omp = Mtx::create(omp); - local_mtx_omp->copy_from(local_mtx_ref.get()); - - return {std::move(local_mtx_ref), std::move(local_mtx_omp)}; - } - - std::shared_ptr ref; - std::shared_ptr omp; - - const gko::dim<2> mtx_size; - std::ranlux48 rand_engine; - - std::unique_ptr mtx; - std::unique_ptr complex_mtx; - std::unique_ptr square_mtx; - std::unique_ptr expected; - std::unique_ptr y; - std::unique_ptr alpha; - std::unique_ptr beta; - - std::unique_ptr dmtx; - std::unique_ptr complex_dmtx; - std::unique_ptr square_dmtx; - std::unique_ptr dresult; - std::unique_ptr dy; - std::unique_ptr dalpha; - std::unique_ptr dbeta; - std::unique_ptr rpermute_idxs; - std::unique_ptr drpermute_idxs; - std::unique_ptr cpermute_idxs; - std::unique_ptr dcpermute_idxs; -}; - - -TEST_F(Fbcsr, SimpleApplyIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, SimpleApplyToDenseMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(3); -// -// mtx->apply(y.get(), expected.get()); -// dmtx->apply(dy.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, AdvancedApplyToFbcsrMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// auto trans = mtx->transpose(); -// auto d_trans = dmtx->transpose(); -// -// mtx->apply(alpha.get(), trans.get(), beta.get(), square_mtx.get()); -// dmtx->apply(dalpha.get(), d_trans.get(), dbeta.get(), square_dmtx.get()); -// -// GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, 1e-14); -// GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); -// ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, SimpleApplyToFbcsrMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// auto trans = mtx->transpose(); -// auto d_trans = dmtx->transpose(); -// -// mtx->apply(trans.get(), square_mtx.get()); -// dmtx->apply(d_trans.get(), square_dmtx.get()); -// -// GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, 1e-14); -// GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); -// ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, AdvancedApplyToIdentityMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// auto a = gen_mtx(mtx_size[0], mtx_size[1], 0); -// auto b = gen_mtx(mtx_size[0], mtx_size[1], 0); -// auto da = Mtx::create(omp); -// auto db = Mtx::create(omp); -// da->copy_from(a.get()); -// db->copy_from(b.get()); -// auto id = gko::matrix::Identity::create(ref, -// mtx_size[1]); auto did = -// gko::matrix::Identity::create(omp, mtx_size[1]); -// -// a->apply(alpha.get(), id.get(), beta.get(), b.get()); -// da->apply(dalpha.get(), did.get(), dbeta.get(), db.get()); -// -// GKO_ASSERT_MTX_NEAR(b, db, 1e-14); -// GKO_ASSERT_MTX_EQ_SPARSITY(b, db); -// ASSERT_TRUE(db->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, AdvancedApplyToDenseMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(3); -// -// mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); -// dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); -// -// GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -//} - - -TEST_F(Fbcsr, TransposeIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// -// auto trans = mtx->transpose(); -// auto d_trans = dmtx->transpose(); -// -// GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), -// static_cast(trans.get()), 0.0); -// ASSERT_TRUE(static_cast(d_trans.get())->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, ConjugateTransposeIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// -// auto trans = complex_mtx->conj_transpose(); -// auto d_trans = complex_dmtx->conj_transpose(); -// -// GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), -// static_cast(trans.get()), 0.0); -// ASSERT_TRUE( -// static_cast(d_trans.get())->is_sorted_by_column_index()); -//} - - -TEST_F(Fbcsr, ConvertToCooIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// auto coo_mtx = gko::matrix::Coo<>::create(ref); -// auto dcoo_mtx = gko::matrix::Coo<>::create(omp); -// -// mtx->convert_to(coo_mtx.get()); -// dmtx->convert_to(dcoo_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToCooIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// auto coo_mtx = gko::matrix::Coo<>::create(ref); -// auto dcoo_mtx = gko::matrix::Coo<>::create(omp); -// -// mtx->move_to(coo_mtx.get()); -// dmtx->move_to(dcoo_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, ConvertToDenseIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// auto dense_mtx = gko::matrix::Dense<>::create(ref); -// auto ddense_mtx = gko::matrix::Dense<>::create(omp); -// -// mtx->convert_to(dense_mtx.get()); -// dmtx->convert_to(ddense_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(ddense_mtx.get(), dense_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToDenseIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// auto dense_mtx = gko::matrix::Dense<>::create(ref); -// auto ddense_mtx = gko::matrix::Dense<>::create(omp); -// -// mtx->move_to(dense_mtx.get()); -// dmtx->move_to(ddense_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(ddense_mtx.get(), dense_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, ConvertToSparsityFbcsrIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// auto sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(ref); -// auto d_sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(omp); -// -// mtx->convert_to(sparsity_mtx.get()); -// dmtx->convert_to(d_sparsity_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(d_sparsity_mtx.get(), sparsity_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToSparsityFbcsrIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// auto sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(ref); -// auto d_sparsity_mtx = gko::matrix::SparsityFbcsr<>::create(omp); -// -// mtx->move_to(sparsity_mtx.get()); -// dmtx->move_to(d_sparsity_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(d_sparsity_mtx.get(), sparsity_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, CalculatesNonzerosPerRow) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// gko::Array row_nnz(ref, mtx->get_size()[0]); -// gko::Array drow_nnz(omp, dmtx->get_size()[0]); -// -// gko::kernels::reference::fbcsr::calculate_nonzeros_per_row(ref, mtx.get(), -// &row_nnz); -// gko::kernels::omp::fbcsr::calculate_nonzeros_per_row(omp, dmtx.get(), -// &drow_nnz); -// -// GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); -//} - - -TEST_F(Fbcsr, ConvertToHybridIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Hybrid_type = gko::matrix::Hybrid<>; -// set_up_apply_data(); -// auto hybrid_mtx = Hybrid_type::create( -// ref, std::make_shared(2)); -// auto dhybrid_mtx = Hybrid_type::create( -// omp, std::make_shared(2)); -// -// mtx->convert_to(hybrid_mtx.get()); -// dmtx->convert_to(dhybrid_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, MoveToHybridIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Hybrid_type = gko::matrix::Hybrid<>; -// set_up_apply_data(); -// auto hybrid_mtx = Hybrid_type::create( -// ref, std::make_shared(2)); -// auto dhybrid_mtx = Hybrid_type::create( -// omp, std::make_shared(2)); -// -// mtx->move_to(hybrid_mtx.get()); -// dmtx->move_to(dhybrid_mtx.get()); -// -// GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), 1e-14); -//} - - -TEST_F(Fbcsr, IsRowPermutable) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// auto r_permute = mtx->row_permute(rpermute_idxs.get()); -// auto dr_permute = dmtx->row_permute(drpermute_idxs.get()); -// -// GKO_ASSERT_MTX_NEAR(static_cast(r_permute.get()), -// static_cast(dr_permute.get()), 0); -//} - - -TEST_F(Fbcsr, IsColPermutable) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// auto c_permute = mtx->column_permute(cpermute_idxs.get()); -// auto dc_permute = dmtx->column_permute(dcpermute_idxs.get()); -// -// GKO_ASSERT_MTX_NEAR(static_cast(c_permute.get()), -// static_cast(dc_permute.get()), 0); -//} - - -TEST_F(Fbcsr, IsInverseRowPermutable) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// auto inverse_r_permute = mtx->inverse_row_permute(rpermute_idxs.get()); -// auto d_inverse_r_permute = -// dmtx->inverse_row_permute(drpermute_idxs.get()); -// -// GKO_ASSERT_MTX_NEAR(static_cast(inverse_r_permute.get()), -// static_cast(d_inverse_r_permute.get()), 0); -//} - - -TEST_F(Fbcsr, IsInverseColPermutable) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// auto inverse_c_permute = mtx->inverse_column_permute(cpermute_idxs.get()); -// auto d_inverse_c_permute = -// dmtx->inverse_column_permute(dcpermute_idxs.get()); -// -// GKO_ASSERT_MTX_NEAR(static_cast(inverse_c_permute.get()), -// static_cast(d_inverse_c_permute.get()), 0); -//} - - -TEST_F(Fbcsr, RecognizeSortedMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// bool is_sorted_omp{}; -// bool is_sorted_ref{}; -// -// is_sorted_ref = mtx->is_sorted_by_column_index(); -// is_sorted_omp = dmtx->is_sorted_by_column_index(); -// -// ASSERT_EQ(is_sorted_ref, is_sorted_omp); -//} - - -TEST_F(Fbcsr, RecognizeUnsortedMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto uns_mtx = gen_unsorted_mtx(); -// bool is_sorted_omp{}; -// bool is_sorted_ref{}; -// -// is_sorted_ref = uns_mtx.ref->is_sorted_by_column_index(); -// is_sorted_omp = uns_mtx.omp->is_sorted_by_column_index(); -// -// ASSERT_EQ(is_sorted_ref, is_sorted_omp); -//} - - -TEST_F(Fbcsr, SortSortedMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// -// mtx->sort_by_column_index(); -// dmtx->sort_by_column_index(); -// -// // Values must be unchanged, therefore, tolerance is `0` -// GKO_ASSERT_MTX_NEAR(mtx, dmtx, 0); -//} - - -TEST_F(Fbcsr, SortUnsortedMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto uns_mtx = gen_unsorted_mtx(); -// -// uns_mtx.ref->sort_by_column_index(); -// uns_mtx.omp->sort_by_column_index(); -// -// // Values must be unchanged, therefore, tolerance is `0` -// GKO_ASSERT_MTX_NEAR(uns_mtx.ref, uns_mtx.omp, 0); -//} - - -TEST_F(Fbcsr, ExtractDiagonalIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// -// auto diag = mtx->extract_diagonal(); -// auto ddiag = dmtx->extract_diagonal(); -// -// GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); -//} - - -TEST_F(Fbcsr, InplaceAbsoluteMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// -// mtx->compute_absolute_inplace(); -// dmtx->compute_absolute_inplace(); -// -// GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); -//} - - -TEST_F(Fbcsr, OutplaceAbsoluteMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// -// auto abs_mtx = mtx->compute_absolute(); -// auto dabs_mtx = dmtx->compute_absolute(); -// -// GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); -//} - - -TEST_F(Fbcsr, InplaceAbsoluteComplexMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// -// complex_mtx->compute_absolute_inplace(); -// complex_dmtx->compute_absolute_inplace(); -// -// GKO_ASSERT_MTX_NEAR(complex_mtx, complex_dmtx, 1e-14); -//} - - -TEST_F(Fbcsr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) -GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// set_up_apply_data(); -// -// auto abs_mtx = complex_mtx->compute_absolute(); -// auto dabs_mtx = complex_dmtx->compute_absolute(); -// -// GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); -//} - - -} // namespace diff --git a/reference/components/fbcsr_spgeam.hpp b/reference/components/fbcsr_spgeam.hpp deleted file mode 100644 index e4a06532ed3..00000000000 --- a/reference/components/fbcsr_spgeam.hpp +++ /dev/null @@ -1,31 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 042357d0429..8060d777ec2 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -43,11 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include -#include -#include -#include #include "core/base/allocator.hpp" @@ -55,7 +51,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/fixed_block.hpp" #include "core/components/prefix_sum.hpp" #include "core/matrix/fbcsr_builder.hpp" -#include "reference/components/fbcsr_spgeam.hpp" #include "reference/components/format_conversion.hpp" @@ -192,18 +187,6 @@ template void spgemm_accumulate_row(map &cols, const matrix::Fbcsr *c, ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto row_ptrs = c->get_const_row_ptrs(); -// auto col_idxs = c->get_const_col_idxs(); -// auto vals = c->get_const_values(); -// for (size_type c_nz = row_ptrs[row]; c_nz < size_type(row_ptrs[row + 1]); -// ++c_nz) { -// auto c_col = col_idxs[c_nz]; -// auto c_val = vals[c_nz]; -// cols[c_col] += scale * c_val; -// } -//} template @@ -211,27 +194,6 @@ void spgemm_accumulate_row2(map &cols, const matrix::Fbcsr *a, const matrix::Fbcsr *b, ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto a_row_ptrs = a->get_const_row_ptrs(); -// auto a_col_idxs = a->get_const_col_idxs(); -// auto a_vals = a->get_const_values(); -// auto b_row_ptrs = b->get_const_row_ptrs(); -// auto b_col_idxs = b->get_const_col_idxs(); -// auto b_vals = b->get_const_values(); -// for (size_type a_nz = a_row_ptrs[row]; -// a_nz < size_type(a_row_ptrs[row + 1]); ++a_nz) { -// auto a_col = a_col_idxs[a_nz]; -// auto a_val = a_vals[a_nz]; -// auto b_row = a_col; -// for (size_type b_nz = b_row_ptrs[b_row]; -// b_nz < size_type(b_row_ptrs[b_row + 1]); ++b_nz) { -// auto b_col = b_col_idxs[b_nz]; -// auto b_val = b_vals[b_nz]; -// cols[b_col] += scale * a_val * b_val; -// } -// } -//} template @@ -239,48 +201,6 @@ void spgemm(std::shared_ptr exec, const matrix::Fbcsr *a, const matrix::Fbcsr *b, matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = a->get_size()[0]; -// -// // first sweep: count nnz for each row -// auto c_row_ptrs = c->get_row_ptrs(); -// -// unordered_set local_col_idxs(exec); -// for (size_type a_row = 0; a_row < num_rows; ++a_row) { -// local_col_idxs.clear(); -// spgemm_insert_row2(local_col_idxs, a, b, a_row); -// c_row_ptrs[a_row] = local_col_idxs.size(); -// } -// -// // build row pointers -// components::prefix_sum(exec, c_row_ptrs, num_rows + 1); -// -// // second sweep: accumulate non-zeros -// auto new_nnz = c_row_ptrs[num_rows]; -// matrix::FbcsrBuilder c_builder{c}; -// auto &c_col_idxs_array = c_builder.get_col_idx_array(); -// auto &c_vals_array = c_builder.get_value_array(); -// c_col_idxs_array.resize_and_reset(new_nnz); -// c_vals_array.resize_and_reset(new_nnz); -// auto c_col_idxs = c_col_idxs_array.get_data(); -// auto c_vals = c_vals_array.get_data(); -// -// map local_row_nzs(exec); -// for (size_type a_row = 0; a_row < num_rows; ++a_row) { -// local_row_nzs.clear(); -// spgemm_accumulate_row2(local_row_nzs, a, b, one(), a_row); -// // store result -// auto c_nz = c_row_ptrs[a_row]; -// for (auto pair : local_row_nzs) { -// c_col_idxs[c_nz] = pair.first; -// c_vals[c_nz] = pair.second; -// ++c_nz; -// } -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEMM_KERNEL); template @@ -292,122 +212,7 @@ void advanced_spgemm(std::shared_ptr exec, const matrix::Fbcsr *d, matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = a->get_size()[0]; -// auto valpha = alpha->at(0, 0); -// auto vbeta = beta->at(0, 0); -// -// // first sweep: count nnz for each row -// auto c_row_ptrs = c->get_row_ptrs(); -// -// unordered_set local_col_idxs(exec); -// for (size_type a_row = 0; a_row < num_rows; ++a_row) { -// local_col_idxs.clear(); -// spgemm_insert_row(local_col_idxs, d, a_row); -// spgemm_insert_row2(local_col_idxs, a, b, a_row); -// c_row_ptrs[a_row] = local_col_idxs.size(); -// } -// -// // build row pointers -// components::prefix_sum(exec, c_row_ptrs, num_rows + 1); -// -// // second sweep: accumulate non-zeros -// auto new_nnz = c_row_ptrs[num_rows]; -// matrix::FbcsrBuilder c_builder{c}; -// auto &c_col_idxs_array = c_builder.get_col_idx_array(); -// auto &c_vals_array = c_builder.get_value_array(); -// c_col_idxs_array.resize_and_reset(new_nnz); -// c_vals_array.resize_and_reset(new_nnz); -// auto c_col_idxs = c_col_idxs_array.get_data(); -// auto c_vals = c_vals_array.get_data(); -// -// map local_row_nzs(exec); -// for (size_type a_row = 0; a_row < num_rows; ++a_row) { -// local_row_nzs.clear(); -// spgemm_accumulate_row(local_row_nzs, d, vbeta, a_row); -// spgemm_accumulate_row2(local_row_nzs, a, b, valpha, a_row); -// // store result -// auto c_nz = c_row_ptrs[a_row]; -// for (auto pair : local_row_nzs) { -// c_col_idxs[c_nz] = pair.first; -// c_vals[c_nz] = pair.second; -// ++c_nz; -// } -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ADVANCED_SPGEMM_KERNEL); - - -template -void spgeam(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Dense *beta, - const matrix::Fbcsr *b, - matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = a->get_size()[0]; -// auto valpha = alpha->at(0, 0); -// auto vbeta = beta->at(0, 0); -// -// // first sweep: count nnz for each row -// auto c_row_ptrs = c->get_row_ptrs(); -// -// abstract_spgeam( -// a, b, [](IndexType) { return IndexType{}; }, -// [](IndexType, IndexType, ValueType, ValueType, IndexType &nnz) { -// ++nnz; -// }, -// [&](IndexType row, IndexType nnz) { c_row_ptrs[row] = nnz; }); -// -// // build row pointers -// components::prefix_sum(exec, c_row_ptrs, num_rows + 1); -// -// // second sweep: accumulate non-zeros -// auto new_nnz = c_row_ptrs[num_rows]; -// matrix::FbcsrBuilder c_builder{c}; -// auto &c_col_idxs_array = c_builder.get_col_idx_array(); -// auto &c_vals_array = c_builder.get_value_array(); -// c_col_idxs_array.resize_and_reset(new_nnz); -// c_vals_array.resize_and_reset(new_nnz); -// auto c_col_idxs = c_col_idxs_array.get_data(); -// auto c_vals = c_vals_array.get_data(); -// -// abstract_spgeam( -// a, b, [&](IndexType row) { return c_row_ptrs[row]; }, -// [&](IndexType, IndexType col, ValueType a_val, ValueType b_val, -// IndexType &nz) { -// c_vals[nz] = valpha * a_val + vbeta * b_val; -// c_col_idxs[nz] = col; -// ++nz; -// }, -// [](IndexType, IndexType) {}); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPGEAM_KERNEL); - - -// template -// void convert_row_ptrs_to_idxs(std::shared_ptr exec, -// const IndexType *ptrs, size_type num_rows, -// IndexType *idxs) -// { -// convert_ptrs_to_idxs(ptrs, num_rows, idxs); -// } - - -template -void convert_to_coo(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Coo *result) - GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_COO_KERNEL); template void convert_to_dense(const std::shared_ptr exec, @@ -502,151 +307,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); -template -void convert_to_sellp(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Sellp *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = result->get_size()[0]; -// auto num_cols = result->get_size()[1]; -// auto vals = result->get_values(); -// auto col_idxs = result->get_col_idxs(); -// auto slice_lengths = result->get_slice_lengths(); -// auto slice_sets = result->get_slice_sets(); -// auto slice_size = (result->get_slice_size() == 0) -// ? matrix::default_slice_size -// : result->get_slice_size(); -// auto stride_factor = (result->get_stride_factor() == 0) -// ? matrix::default_stride_factor -// : result->get_stride_factor(); -// -// const auto source_row_ptrs = source->get_const_row_ptrs(); -// const auto source_col_idxs = source->get_const_col_idxs(); -// const auto source_values = source->get_const_values(); -// -// int slice_num = ceildiv(num_rows, slice_size); -// slice_sets[0] = 0; -// for (size_type slice = 0; slice < slice_num; slice++) { -// if (slice > 0) { -// slice_sets[slice] = -// slice_sets[slice - 1] + slice_lengths[slice - 1]; -// } -// slice_lengths[slice] = 0; -// for (size_type row = 0; row < slice_size; row++) { -// size_type global_row = slice * slice_size + row; -// if (global_row >= num_rows) { -// break; -// } -// slice_lengths[slice] = -// (slice_lengths[slice] > -// source_row_ptrs[global_row + 1] - -// source_row_ptrs[global_row]) -// ? slice_lengths[slice] -// : source_row_ptrs[global_row + 1] - -// source_row_ptrs[global_row]; -// } -// slice_lengths[slice] = -// stride_factor * ceildiv(slice_lengths[slice], stride_factor); -// for (size_type row = 0; row < slice_size; row++) { -// size_type global_row = slice * slice_size + row; -// if (global_row >= num_rows) { -// break; -// } -// size_type sellp_ind = slice_sets[slice] * slice_size + row; -// for (size_type fbcsr_ind = source_row_ptrs[global_row]; -// fbcsr_ind < source_row_ptrs[global_row + 1]; fbcsr_ind++) { -// vals[sellp_ind] = source_values[fbcsr_ind]; -// col_idxs[sellp_ind] = source_col_idxs[fbcsr_ind]; -// sellp_ind += slice_size; -// } -// for (size_type i = sellp_ind; -// i < -// (slice_sets[slice] + slice_lengths[slice]) * slice_size + -// row; i += slice_size) { -// col_idxs[i] = 0; -// vals[i] = zero(); -// } -// } -// } -// if (slice_num > 0) { -// slice_sets[slice_num] = -// slice_sets[slice_num - 1] + slice_lengths[slice_num - 1]; -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_SELLP_KERNEL); - - -template -void calculate_total_cols(std::shared_ptr exec, - const matrix::Fbcsr *source, - size_type *result, size_type stride_factor, - size_type slice_size) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// size_type total_cols = 0; -// const auto num_rows = source->get_size()[0]; -// const auto slice_num = ceildiv(num_rows, slice_size); -// -// const auto row_ptrs = source->get_const_row_ptrs(); -// -// for (size_type slice = 0; slice < slice_num; slice++) { -// IndexType max_nnz_per_row_in_this_slice = 0; -// for (size_type row = 0; -// row < slice_size && row + slice * slice_size < num_rows; row++) { -// size_type global_row = slice * slice_size + row; -// max_nnz_per_row_in_this_slice = -// max(row_ptrs[global_row + 1] - row_ptrs[global_row], -// max_nnz_per_row_in_this_slice); -// } -// total_cols += ceildiv(max_nnz_per_row_in_this_slice, stride_factor) * -// stride_factor; -// } -// -// *result = total_cols; -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CALCULATE_TOTAL_COLS_KERNEL); - - -template -void convert_to_ell(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Ell *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto num_rows = source->get_size()[0]; -// const auto num_cols = source->get_size()[1]; -// const auto vals = source->get_const_values(); -// const auto col_idxs = source->get_const_col_idxs(); -// const auto row_ptrs = source->get_const_row_ptrs(); -// -// const auto num_stored_elements_per_row = -// result->get_num_stored_elements_per_row(); -// -// for (size_type row = 0; row < num_rows; row++) { -// for (size_type i = 0; i < num_stored_elements_per_row; i++) { -// result->val_at(row, i) = zero(); -// result->col_at(row, i) = 0; -// } -// for (size_type col_idx = 0; col_idx < row_ptrs[row + 1] - -// row_ptrs[row]; -// col_idx++) { -// result->val_at(row, col_idx) = vals[row_ptrs[row] + col_idx]; -// result->col_at(row, col_idx) = col_idxs[row_ptrs[row] + col_idx]; -// } -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_ELL_KERNEL); - - template void convert_fbcsr_to_fbcsc(const size_type num_blk_rows, const int blksz, @@ -755,208 +415,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); -template -void convert_to_hybrid(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Hybrid *result) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto num_rows = result->get_size()[0]; -// auto num_cols = result->get_size()[1]; -// auto strategy = result->get_strategy(); -// auto ell_lim = strategy->get_ell_num_stored_elements_per_row(); -// auto coo_lim = strategy->get_coo_nnz(); -// auto coo_val = result->get_coo_values(); -// auto coo_col = result->get_coo_col_idxs(); -// auto coo_row = result->get_coo_row_idxs(); -// -// // Initial Hybrid Matrix -// for (size_type i = 0; i < result->get_ell_num_stored_elements_per_row(); -// i++) { -// for (size_type j = 0; j < result->get_ell_stride(); j++) { -// result->ell_val_at(j, i) = zero(); -// result->ell_col_at(j, i) = 0; -// } -// } -// for (size_type i = 0; i < result->get_coo_num_stored_elements(); i++) { -// coo_val[i] = zero(); -// coo_col[i] = 0; -// coo_row[i] = 0; -// } -// -// const auto fbcsr_row_ptrs = source->get_const_row_ptrs(); -// const auto fbcsr_vals = source->get_const_values(); -// size_type fbcsr_idx = 0; -// size_type coo_idx = 0; -// for (IndexType row = 0; row < num_rows; row++) { -// size_type ell_idx = 0; -// while (fbcsr_idx < fbcsr_row_ptrs[row + 1]) { -// const auto val = fbcsr_vals[fbcsr_idx]; -// if (ell_idx < ell_lim) { -// result->ell_val_at(row, ell_idx) = val; -// result->ell_col_at(row, ell_idx) = -// source->get_const_col_idxs()[fbcsr_idx]; -// ell_idx++; -// } else { -// coo_val[coo_idx] = val; -// coo_col[coo_idx] = source->get_const_col_idxs()[fbcsr_idx]; -// coo_row[coo_idx] = row; -// coo_idx++; -// } -// fbcsr_idx++; -// } -// } -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_HYBRID_KERNEL); - - -template -void row_permute_impl(std::shared_ptr exec, - const IndexType *const permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *row_permuted) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto perm = permutation_indices->get_const_data(); -// auto orig_row_ptrs = orig->get_const_row_ptrs(); -// auto orig_col_idxs = orig->get_const_col_idxs(); -// auto orig_vals = orig->get_const_values(); -// auto rp_row_ptrs = row_permuted->get_row_ptrs(); -// auto rp_col_idxs = row_permuted->get_col_idxs(); -// auto rp_vals = row_permuted->get_values(); -// size_type num_rows = orig->get_size()[0]; -// size_type num_nnz = orig->get_num_stored_elements(); -// -// size_type cur_ptr = 0; -// rp_row_ptrs[0] = cur_ptr; -// vector orig_num_nnz_per_row(num_rows, 0, exec); -// for (size_type row = 0; row < num_rows; ++row) { -// orig_num_nnz_per_row[row] = orig_row_ptrs[row + 1] - -// orig_row_ptrs[row]; -// } -// for (size_type row = 0; row < num_rows; ++row) { -// rp_row_ptrs[row + 1] = -// rp_row_ptrs[row] + orig_num_nnz_per_row[perm[row]]; -// } -// rp_row_ptrs[num_rows] = orig_row_ptrs[num_rows]; -// for (size_type row = 0; row < num_rows; ++row) { -// auto new_row = perm[row]; -// auto new_k = orig_row_ptrs[new_row]; -// for (size_type k = rp_row_ptrs[row]; -// k < size_type(rp_row_ptrs[row + 1]); ++k) { -// rp_col_idxs[k] = orig_col_idxs[new_k]; -// rp_vals[k] = orig_vals[new_k]; -// new_k++; -// } -// } -//} - - -template -void row_permute(std::shared_ptr exec, - const IndexType *const permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *row_permuted) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// row_permute_impl(exec, permutation_indices, orig, row_permuted); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ROW_PERMUTE_KERNEL); - - -template -void inverse_row_permute(std::shared_ptr exec, - const IndexType *const permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *row_permuted) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto perm = permutation_indices->get_const_data(); -// Array inv_perm(*permutation_indices); -// auto iperm = inv_perm.get_data(); -// for (size_type ind = 0; ind < inv_perm.get_num_elems(); ++ind) { -// iperm[perm[ind]] = ind; -// } -// -// row_permute_impl(exec, &inv_perm, orig, row_permuted); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_INVERSE_ROW_PERMUTE_KERNEL); - - -template -void column_permute_impl(const IndexType *const permutation_indices, - const matrix::Fbcsr *const orig, - matrix::Fbcsr - *const column_permuted) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto perm = permutation_indices->get_const_data(); -// auto orig_row_ptrs = orig->get_const_row_ptrs(); -// auto orig_col_idxs = orig->get_const_col_idxs(); -// auto orig_vals = orig->get_const_values(); -// auto cp_row_ptrs = column_permuted->get_row_ptrs(); -// auto cp_col_idxs = column_permuted->get_col_idxs(); -// auto cp_vals = column_permuted->get_values(); -// auto num_nnz = orig->get_num_stored_elements(); -// size_type num_rows = orig->get_size()[0]; -// size_type num_cols = orig->get_size()[1]; -// -// for (size_type row = 0; row < num_rows; ++row) { -// cp_row_ptrs[row] = orig_row_ptrs[row]; -// for (size_type k = orig_row_ptrs[row]; -// k < size_type(orig_row_ptrs[row + 1]); ++k) { -// cp_col_idxs[k] = perm[orig_col_idxs[k]]; -// cp_vals[k] = orig_vals[k]; -// } -// } -// cp_row_ptrs[num_rows] = orig_row_ptrs[num_rows]; -//} - - -template -void column_permute(std::shared_ptr exec, - const IndexType *permutation_indices, - const matrix::Fbcsr *orig, - matrix::Fbcsr *column_permuted) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto perm = permutation_indices->get_const_data(); -// Array inv_perm(*permutation_indices); -// auto iperm = inv_perm.get_data(); -// for (size_type ind = 0; ind < inv_perm.get_num_elems(); ++ind) { -// iperm[perm[ind]] = ind; -// } -// column_permute_impl(&inv_perm, orig, column_permuted); -//} - - -template -void inverse_column_permute( - std::shared_ptr exec, - const IndexType *const permutation_indices, - const matrix::Fbcsr *const orig, - matrix::Fbcsr *const column_permuted) - GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// column_permute_impl(permutation_indices, orig, column_permuted); -//} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_INVERSE_COLUMN_PERMUTE_KERNEL); - - template void calculate_nonzeros_per_row( std::shared_ptr exec, @@ -981,20 +439,6 @@ template void sort_by_column_index(std::shared_ptr exec, matrix::Fbcsr *to_sort) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto values = to_sort->get_values(); -// auto row_ptrs = to_sort->get_row_ptrs(); -// auto col_idxs = to_sort->get_col_idxs(); -// const auto number_rows = to_sort->get_size()[0]; -// for (size_type i = 0; i < number_rows; ++i) { -// auto start_row_idx = row_ptrs[i]; -// auto row_nnz = row_ptrs[i + 1] - start_row_idx; -// auto helper = detail::IteratorFactory( -// col_idxs + start_row_idx, values + start_row_idx, row_nnz); -// std::sort(helper.begin(), helper.end()); -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 4b75c240236..367d643af14 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -44,14 +44,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include #include -#include -#include #include #include -#include #include @@ -75,12 +71,8 @@ class Fbcsr : public ::testing::Test { typename std::tuple_element<1, decltype(ValueIndexType())>::type; using Mtx = gko::matrix::Fbcsr; using Csr = gko::matrix::Csr; - using Coo = gko::matrix::Coo; using Dense = gko::matrix::Dense; - using Sellp = gko::matrix::Sellp; using SparCsr = gko::matrix::SparsityCsr; - using Ell = gko::matrix::Ell; - using Hybrid = gko::matrix::Hybrid; using Diag = gko::matrix::Diagonal; using Vec = gko::matrix::Dense; @@ -93,7 +85,6 @@ class Fbcsr : public ::testing::Test { refmtx(fbsample.generate_fbcsr()), refcsrmtx(fbsample.generate_csr()), refdenmtx(fbsample.generate_dense()), - refcoomtx(fbsample.generate_coo()), refspcmtx(fbsample.generate_sparsity_csr()), mtx2(fbsample2.generate_fbcsr()), m2diag(fbsample2.extract_diagonal()), @@ -116,20 +107,6 @@ class Fbcsr : public ::testing::Test { } } - void assert_equal_to_mtx(const Coo *m) - { - ASSERT_EQ(m->get_size(), refcoomtx->get_size()); - ASSERT_EQ(m->get_num_stored_elements(), - refcoomtx->get_num_stored_elements()); - for (index_type i = 0; i < m->get_num_stored_elements(); i++) { - ASSERT_EQ(m->get_const_row_idxs()[i], - refcoomtx->get_const_row_idxs[i]); - ASSERT_EQ(m->get_const_col_idxs()[i], - refcoomtx->get_const_col_idxs[i]); - ASSERT_EQ(m->get_const_values()[i], refcoomtx->get_const_values[i]); - } - } - void assert_equal_to_mtx(const SparCsr *m) { ASSERT_EQ(m->get_size(), refspcmtx->get_size()); @@ -152,7 +129,6 @@ class Fbcsr : public ::testing::Test { const std::unique_ptr refmtx; const std::unique_ptr refcsrmtx; const std::unique_ptr refdenmtx; - const std::unique_ptr refcoomtx; const std::unique_ptr refspcmtx; const std::unique_ptr mtx2; const std::unique_ptr m2diag; @@ -318,111 +294,6 @@ TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseMatrix) } -// TYPED_TEST(Fbcsr, AppliesToFbcsrMatrix) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using T = typename TestFixture::value_type; -// this->mtx->apply(this->mtx3_unsorted.get(), this->mtx2.get()); -// -// ASSERT_EQ(this->mtx2->get_size(), gko::dim<2>(2, 3)); -// ASSERT_EQ(this->mtx2->get_num_stored_elements(), 6); -// ASSERT_TRUE(this->mtx2->is_sorted_by_column_index()); -// auto r = this->mtx2->get_const_row_ptrs(); -// auto c = this->mtx2->get_const_col_idxs(); -// auto v = this->mtx2->get_const_values(); -// // 13 5 31 -// // 15 5 40 -// EXPECT_EQ(r[0], 0); -// EXPECT_EQ(r[1], 3); -// EXPECT_EQ(r[2], 6); -// EXPECT_EQ(c[0], 0); -// EXPECT_EQ(c[1], 1); -// EXPECT_EQ(c[2], 2); -// EXPECT_EQ(c[3], 0); -// EXPECT_EQ(c[4], 1); -// EXPECT_EQ(c[5], 2); -// EXPECT_EQ(v[0], T{13}); -// EXPECT_EQ(v[1], T{5}); -// EXPECT_EQ(v[2], T{31}); -// EXPECT_EQ(v[3], T{15}); -// EXPECT_EQ(v[4], T{5}); -// EXPECT_EQ(v[5], T{40}); -//} - - -// TYPED_TEST(Fbcsr, AppliesLinearCombinationToFbcsrMatrix) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Vec = typename TestFixture::Vec; -// using T = typename TestFixture::value_type; -// auto alpha = gko::initialize({-1.0}, this->exec); -// auto beta = gko::initialize({2.0}, this->exec); -// -// this->mtx->apply(alpha.get(), this->mtx3_unsorted.get(), beta.get(), -// this->mtx2.get()); -// -// ASSERT_EQ(this->mtx2->get_size(), gko::dim<2>(2, 3)); -// ASSERT_EQ(this->mtx2->get_num_stored_elements(), 6); -// ASSERT_TRUE(this->mtx2->is_sorted_by_column_index()); -// auto r = this->mtx2->get_const_row_ptrs(); -// auto c = this->mtx2->get_const_col_idxs(); -// auto v = this->mtx2->get_const_values(); -// // -11 1 -27 -// // -15 5 -40 -// EXPECT_EQ(r[0], 0); -// EXPECT_EQ(r[1], 3); -// EXPECT_EQ(r[2], 6); -// EXPECT_EQ(c[0], 0); -// EXPECT_EQ(c[1], 1); -// EXPECT_EQ(c[2], 2); -// EXPECT_EQ(c[3], 0); -// EXPECT_EQ(c[4], 1); -// EXPECT_EQ(c[5], 2); -// EXPECT_EQ(v[0], T{-11}); -// EXPECT_EQ(v[1], T{1}); -// EXPECT_EQ(v[2], T{-27}); -// EXPECT_EQ(v[3], T{-15}); -// EXPECT_EQ(v[4], T{5}); -// EXPECT_EQ(v[5], T{-40}); -//} - - -// TYPED_TEST(Fbcsr, AppliesLinearCombinationToIdentityMatrix) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using T = typename TestFixture::value_type; -// using Vec = typename TestFixture::Vec; -// using Mtx = typename TestFixture::Mtx; -// auto alpha = gko::initialize({-3.0}, this->exec); -// auto beta = gko::initialize({2.0}, this->exec); -// auto a = gko::initialize( -// {I{2.0, 0.0, 3.0}, I{0.0, 1.0, -1.5}, I{0.0, -2.0, 0.0}, -// I{5.0, 0.0, 0.0}, I{1.0, 0.0, 4.0}, I{2.0, -2.0, 0.0}, -// I{0.0, 0.0, 0.0}}, -// this->exec); -// auto b = gko::initialize( -// {I{2.0, -2.0, 0.0}, I{1.0, 0.0, 4.0}, I{2.0, 0.0, 3.0}, -// I{0.0, 1.0, -1.5}, I{1.0, 0.0, 0.0}, I{0.0, 0.0, 0.0}, -// I{0.0, 0.0, 0.0}}, -// this->exec); -// auto expect = gko::initialize( -// {I{-2.0, -4.0, -9.0}, I{2.0, -3.0, 12.5}, I{4.0, 6.0, 6.0}, -// I{-15.0, 2.0, -3.0}, I{-1.0, 0.0, -12.0}, I{-6.0, 6.0, 0.0}, -// I{0.0, 0.0, 0.0}}, -// this->exec); -// auto id = gko::matrix::Identity::create(this->exec, a->get_size()[1]); -// -// a->apply(gko::lend(alpha), gko::lend(id), gko::lend(beta), gko::lend(b)); -// -// GKO_ASSERT_MTX_NEAR(b, expect, r::value); -// GKO_ASSERT_MTX_EQ_SPARSITY(b, expect); -// ASSERT_TRUE(b->is_sorted_by_column_index()); -//} - - TYPED_TEST(Fbcsr, ApplyFailsOnWrongInnerDimension) { using Vec = typename TestFixture::Vec; @@ -545,57 +416,6 @@ TYPED_TEST(Fbcsr, MovesToCsr) } -// TYPED_TEST(Fbcsr, ConvertsToCoo) -// { -// using Coo = typename TestFixture::Coo; -// auto coo_mtx = Coo::create(this->mtx->get_executor()); - -// this->mtx->convert_to(coo_mtx.get()); - -// this->assert_equal_to_mtx(coo_mtx.get()); -// } - - -// TYPED_TEST(Fbcsr, MovesToCoo) -// { -// using Coo = typename TestFixture::Coo; -// auto coo_mtx = Coo::create(this->mtx->get_executor()); - -// this->mtx->move_to(coo_mtx.get()); - -// this->assert_equal_to_mtx(coo_mtx.get()); -// } - - -// TYPED_TEST(Fbcsr, ConvertsToSellp) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Sellp = typename TestFixture::Sellp; -// auto sellp_mtx = Sellp::create(this->mtx->get_executor()); -// -// this->mtx->convert_to(sellp_mtx.get()); -// -// this->assert_equal_to_mtx(sellp_mtx.get()); -//} - - -// TYPED_TEST(Fbcsr, MovesToSellp) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Sellp = typename TestFixture::Sellp; -// using Fbcsr = typename TestFixture::Mtx; -// auto sellp_mtx = Sellp::create(this->mtx->get_executor()); -// auto fbcsr_ref = Fbcsr::create(this->mtx->get_executor()); -// -// fbcsr_ref->copy_from(this->mtx.get()); -// fbcsr_ref->move_to(sellp_mtx.get()); -// -// this->assert_equal_to_mtx(sellp_mtx.get()); -//} - - TYPED_TEST(Fbcsr, ConvertsToSparsityCsr) { using SparsityCsr = typename TestFixture::SparCsr; @@ -621,68 +441,6 @@ TYPED_TEST(Fbcsr, MovesToSparsityCsr) } -// TYPED_TEST(Fbcsr, ConvertsToHybridAutomatically) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Hybrid = typename TestFixture::Hybrid; -// auto hybrid_mtx = Hybrid::create(this->mtx->get_executor()); -// -// this->mtx->convert_to(hybrid_mtx.get()); -// -// this->assert_equal_to_mtx(hybrid_mtx.get()); -//} - - -// TYPED_TEST(Fbcsr, MovesToHybridAutomatically) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Hybrid = typename TestFixture::Hybrid; -// using Fbcsr = typename TestFixture::Mtx; -// auto hybrid_mtx = Hybrid::create(this->mtx->get_executor()); -// auto fbcsr_ref = Fbcsr::create(this->mtx->get_executor()); -// -// fbcsr_ref->copy_from(this->mtx.get()); -// fbcsr_ref->move_to(hybrid_mtx.get()); -// -// this->assert_equal_to_mtx(hybrid_mtx.get()); -//} - - -// TYPED_TEST(Fbcsr, ConvertsToHybridByColumn2) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Hybrid = typename TestFixture::Hybrid; -// auto hybrid_mtx = -// Hybrid::create(this->mtx2->get_executor(), -// std::make_shared(2)); -// -// this->mtx2->convert_to(hybrid_mtx.get()); -// -// this->assert_equal_to_mtx2(hybrid_mtx.get()); -//} - - -// TYPED_TEST(Fbcsr, MovesToHybridByColumn2) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Hybrid = typename TestFixture::Hybrid; -// using Fbcsr = typename TestFixture::Mtx; -// auto hybrid_mtx = -// Hybrid::create(this->mtx2->get_executor(), -// std::make_shared(2)); -// auto fbcsr_ref = Fbcsr::create(this->mtx2->get_executor()); -// -// fbcsr_ref->copy_from(this->mtx2.get()); -// fbcsr_ref->move_to(hybrid_mtx.get()); -// -// this->assert_equal_to_mtx2(hybrid_mtx.get()); -//} - - TYPED_TEST(Fbcsr, ConvertsEmptyToPrecision) { using ValueType = typename TestFixture::value_type; @@ -749,112 +507,6 @@ TYPED_TEST(Fbcsr, MovesEmptyToDense) } -// TYPED_TEST(Fbcsr, ConvertsEmptyToCoo) -// { -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using Fbcsr = typename TestFixture::Mtx; -// using Coo = gko::matrix::Coo; -// auto empty = Fbcsr::create(this->exec); -// auto res = Coo::create(this->exec); - -// empty->convert_to(res.get()); - -// ASSERT_EQ(res->get_num_stored_elements(), 0); -// ASSERT_FALSE(res->get_size()); -// } - - -// TYPED_TEST(Fbcsr, MovesEmptyToCoo) -// { -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using Fbcsr = typename TestFixture::Mtx; -// using Coo = gko::matrix::Coo; -// auto empty = Fbcsr::create(this->exec); -// auto res = Coo::create(this->exec); - -// empty->move_to(res.get()); - -// ASSERT_EQ(res->get_num_stored_elements(), 0); -// ASSERT_FALSE(res->get_size()); -// } - - -// TYPED_TEST(Fbcsr, ConvertsEmptyToEll) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using Fbcsr = typename TestFixture::Mtx; -// using Ell = gko::matrix::Ell; -// auto empty = Fbcsr::create(this->exec); -// auto res = Ell::create(this->exec); -// -// empty->convert_to(res.get()); -// -// ASSERT_EQ(res->get_num_stored_elements(), 0); -// ASSERT_FALSE(res->get_size()); -//} - - -// TYPED_TEST(Fbcsr, MovesEmptyToEll) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using Fbcsr = typename TestFixture::Mtx; -// using Ell = gko::matrix::Ell; -// auto empty = Fbcsr::create(this->exec); -// auto res = Ell::create(this->exec); -// -// empty->move_to(res.get()); -// -// ASSERT_EQ(res->get_num_stored_elements(), 0); -// ASSERT_FALSE(res->get_size()); -//} - - -// TYPED_TEST(Fbcsr, ConvertsEmptyToSellp) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using Fbcsr = typename TestFixture::Mtx; -// using Sellp = gko::matrix::Sellp; -// auto empty = Fbcsr::create(this->exec); -// auto res = Sellp::create(this->exec); -// -// empty->convert_to(res.get()); -// -// ASSERT_EQ(res->get_num_stored_elements(), 0); -// ASSERT_EQ(*res->get_const_slice_sets(), 0); -// ASSERT_FALSE(res->get_size()); -//} - - -// TYPED_TEST(Fbcsr, MovesEmptyToSellp) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using Fbcsr = typename TestFixture::Mtx; -// using Sellp = gko::matrix::Sellp; -// auto empty = Fbcsr::create(this->exec); -// auto res = Sellp::create(this->exec); -// -// empty->move_to(res.get()); -// -// ASSERT_EQ(res->get_num_stored_elements(), 0); -// ASSERT_EQ(*res->get_const_slice_sets(), 0); -// ASSERT_FALSE(res->get_size()); -//} - - TYPED_TEST(Fbcsr, ConvertsEmptyToSparsityCsr) { using ValueType = typename TestFixture::value_type; @@ -889,42 +541,6 @@ TYPED_TEST(Fbcsr, MovesEmptyToSparsityCsr) } -// TYPED_TEST(Fbcsr, ConvertsEmptyToHybrid) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using Fbcsr = typename TestFixture::Mtx; -// using Hybrid = gko::matrix::Hybrid; -// auto empty = Fbcsr::create(this->exec); -// auto res = Hybrid::create(this->exec); -// -// empty->convert_to(res.get()); -// -// ASSERT_EQ(res->get_num_stored_elements(), 0); -// ASSERT_FALSE(res->get_size()); -//} - - -// TYPED_TEST(Fbcsr, MovesEmptyToHybrid) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using ValueType = typename TestFixture::value_type; -// using IndexType = typename TestFixture::index_type; -// using Fbcsr = typename TestFixture::Mtx; -// using Hybrid = gko::matrix::Hybrid; -// auto empty = Fbcsr::create(this->exec); -// auto res = Hybrid::create(this->exec); -// -// empty->move_to(res.get()); -// -// ASSERT_EQ(res->get_num_stored_elements(), 0); -// ASSERT_FALSE(res->get_size()); -//} - - TYPED_TEST(Fbcsr, CalculatesNonzerosPerRow) { using IndexType = typename TestFixture::index_type; @@ -942,53 +558,6 @@ TYPED_TEST(Fbcsr, CalculatesNonzerosPerRow) } -// TYPED_TEST(Fbcsr, CalculatesTotalCols) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// gko::size_type total_cols; -// gko::size_type stride_factor = gko::matrix::default_stride_factor; -// gko::size_type slice_size = gko::matrix::default_slice_size; -// -// gko::kernels::reference::fbcsr::calculate_total_cols( -// this->exec, this->mtx.get(), &total_cols, stride_factor, slice_size); -// -// ASSERT_EQ(total_cols, 3); -//} - - -// TYPED_TEST(Fbcsr, ConvertsToEll) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Ell = typename TestFixture::Ell; -// using Dense = typename TestFixture::Vec; -// auto ell_mtx = Ell::create(this->mtx->get_executor()); -// auto dense_mtx = Dense::create(this->mtx->get_executor()); -// auto ref_dense_mtx = Dense::create(this->mtx->get_executor()); -// -// this->mtx->convert_to(ell_mtx.get()); -// -// this->assert_equal_to_mtx(ell_mtx.get()); -//} - - -// TYPED_TEST(Fbcsr, MovesToEll) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Ell = typename TestFixture::Ell; -// using Dense = typename TestFixture::Vec; -// auto ell_mtx = Ell::create(this->mtx->get_executor()); -// auto dense_mtx = Dense::create(this->mtx->get_executor()); -// auto ref_dense_mtx = Dense::create(this->mtx->get_executor()); -// -// this->mtx->move_to(ell_mtx.get()); -// -// this->assert_equal_to_mtx(ell_mtx.get()); -//} - - TYPED_TEST(Fbcsr, SquareMtxIsTransposable) { using Fbcsr = typename TestFixture::Mtx; @@ -1013,214 +582,6 @@ TYPED_TEST(Fbcsr, NonSquareMtxIsTransposable) } -// TYPED_TEST(Fbcsr, SquareMatrixIsRowPermutable) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Fbcsr = typename TestFixture::Mtx; -// using index_type = typename TestFixture::index_type; -// // clang-format off -// auto p_mtx = gko::initialize({{1.0, 3.0, 2.0}, -// {0.0, 5.0, 0.0}, -// {0.0, 1.5, 2.0}}, this->exec); -// // clang-format on -// gko::Array permute_idxs{this->exec, {1, 2, 0}}; -// -// auto row_permute = p_mtx->row_permute(&permute_idxs); -// -// auto row_permute_fbcsr = static_cast(row_permute.get()); -// // clang-format off -// GKO_ASSERT_MTX_NEAR(row_permute_fbcsr, -// l({{0.0, 5.0, 0.0}, -// {0.0, 1.5, 2.0}, -// {1.0, 3.0, 2.0}}), -// 0.0); -// // clang-format on -//} - - -// TYPED_TEST(Fbcsr, NonSquareMatrixIsRowPermutable) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Fbcsr = typename TestFixture::Mtx; -// using index_type = typename TestFixture::index_type; -// // clang-format off -// auto p_mtx = gko::initialize({{1.0, 3.0, 2.0}, -// {0.0, 5.0, 0.0}}, this->exec); -// // clang-format on -// gko::Array permute_idxs{this->exec, {1, 0}}; -// -// auto row_permute = p_mtx->row_permute(&permute_idxs); -// -// auto row_permute_fbcsr = static_cast(row_permute.get()); -// // clang-format off -// GKO_ASSERT_MTX_NEAR(row_permute_fbcsr, -// l({{0.0, 5.0, 0.0}, -// {1.0, 3.0, 2.0}}), -// 0.0); -// // clang-format on -//} - - -// TYPED_TEST(Fbcsr, SquareMatrixIsColPermutable) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Fbcsr = typename TestFixture::Mtx; -// using index_type = typename TestFixture::index_type; -// // clang-format off -// auto p_mtx = gko::initialize({{1.0, 3.0, 2.0}, -// {0.0, 5.0, 0.0}, -// {0.0, 1.5, 2.0}}, this->exec); -// // clang-format on -// gko::Array permute_idxs{this->exec, {1, 2, 0}}; -// -// auto c_permute = p_mtx->column_permute(&permute_idxs); -// -// auto c_permute_fbcsr = static_cast(c_permute.get()); -// // clang-format off -// GKO_ASSERT_MTX_NEAR(c_permute_fbcsr, -// l({{3.0, 2.0, 1.0}, -// {5.0, 0.0, 0.0}, -// {1.5, 2.0, 0.0}}), -// 0.0); -// // clang-format on -//} - - -// TYPED_TEST(Fbcsr, NonSquareMatrixIsColPermutable) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Fbcsr = typename TestFixture::Mtx; -// using index_type = typename TestFixture::index_type; -// // clang-format off -// auto p_mtx = gko::initialize({{1.0, 0.0, 2.0}, -// {0.0, 5.0, 0.0}}, this->exec); -// // clang-format on -// gko::Array permute_idxs{this->exec, {1, 2, 0}}; -// -// auto c_permute = p_mtx->column_permute(&permute_idxs); -// -// auto c_permute_fbcsr = static_cast(c_permute.get()); -// // clang-format off -// GKO_ASSERT_MTX_NEAR(c_permute_fbcsr, -// l({{0.0, 2.0, 1.0}, -// {5.0, 0.0, 0.0}}), -// 0.0); -// // clang-format on -//} - - -// TYPED_TEST(Fbcsr, SquareMatrixIsInverseRowPermutable) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Fbcsr = typename TestFixture::Mtx; -// using index_type = typename TestFixture::index_type; -// // clang-format off -// auto inverse_p_mtx = gko::initialize({{1.0, 3.0, 2.0}, -// {0.0, 5.0, 0.0}, -// {0.0, 1.5, 2.0}}, this->exec); -// // clang-format on -// gko::Array inverse_permute_idxs{this->exec, {1, 2, 0}}; -// -// auto inverse_row_permute = -// inverse_p_mtx->inverse_row_permute(&inverse_permute_idxs); -// -// auto inverse_row_permute_fbcsr = -// static_cast(inverse_row_permute.get()); -// // clang-format off -// GKO_ASSERT_MTX_NEAR(inverse_row_permute_fbcsr, -// l({{0.0, 1.5, 2.0}, -// {1.0, 3.0, 2.0}, -// {0.0, 5.0, 0.0}}), -// 0.0); -// // clang-format on -//} - - -// TYPED_TEST(Fbcsr, NonSquareMatrixIsInverseRowPermutable) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Fbcsr = typename TestFixture::Mtx; -// using index_type = typename TestFixture::index_type; -// // clang-format off -// auto inverse_p_mtx = gko::initialize({{1.0, 3.0, 2.0}, -// {0.0, 5.0, 0.0}}, this->exec); -// // clang-format on -// gko::Array inverse_permute_idxs{this->exec, {1, 0}}; -// -// auto inverse_row_permute = -// inverse_p_mtx->inverse_row_permute(&inverse_permute_idxs); -// -// auto inverse_row_permute_fbcsr = -// static_cast(inverse_row_permute.get()); -// // clang-format off -// GKO_ASSERT_MTX_NEAR(inverse_row_permute_fbcsr, -// l({{0.0, 5.0, 0.0}, -// {1.0, 3.0, 2.0}}), -// 0.0); -// // clang-format on -//} - - -// TYPED_TEST(Fbcsr, SquareMatrixIsInverseColPermutable) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Fbcsr = typename TestFixture::Mtx; -// using index_type = typename TestFixture::index_type; -// // clang-format off -// auto inverse_p_mtx = gko::initialize({{1.0, 3.0, 2.0}, -// {0.0, 5.0, 0.0}, -// {0.0, 1.5, 2.0}}, this->exec); -// // clang-format on -// gko::Array inverse_permute_idxs{this->exec, {1, 2, 0}}; -// -// auto inverse_c_permute = -// inverse_p_mtx->inverse_column_permute(&inverse_permute_idxs); -// -// auto inverse_c_permute_fbcsr = static_cast(inverse_c_permute.get()); -// // clang-format off -// GKO_ASSERT_MTX_NEAR(inverse_c_permute_fbcsr, -// l({{2.0, 1.0, 3.0}, -// {0.0, 0.0, 5.0}, -// {2.0, 0.0, 1.5}}), -// 0.0); -// // clang-format on -//} - - -// TYPED_TEST(Fbcsr, NonSquareMatrixIsInverseColPermutable) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// using Fbcsr = typename TestFixture::Mtx; -// using index_type = typename TestFixture::index_type; -// // clang-format off -// auto inverse_p_mtx = gko::initialize({{1.0, 3.0, 2.0}, -// {0.0, 5.0, 0.0}}, this->exec); -// // clang-format on -// gko::Array inverse_permute_idxs{this->exec, {1, 2, 0}}; -// -// auto inverse_c_permute = -// inverse_p_mtx->inverse_column_permute(&inverse_permute_idxs); -// -// auto inverse_c_permute_fbcsr = static_cast(inverse_c_permute.get()); -// // clang-format off -// GKO_ASSERT_MTX_NEAR(inverse_c_permute_fbcsr, -// l({{2.0, 1.0, 3.0}, -// {0.0, 0.0, 5.0}}), -// 0.0); -// // clang-format on -//} - - TYPED_TEST(Fbcsr, RecognizeSortedMatrix) { ASSERT_TRUE(this->mtx->is_sorted_by_column_index()); @@ -1240,28 +601,6 @@ TYPED_TEST(Fbcsr, RecognizeUnsortedMatrix) } -// TYPED_TEST(Fbcsr, SortSortedMatrix) -// { -// auto matrix = this->mtx->clone(); - -// matrix->sort_by_column_index(); - -// GKO_ASSERT_MTX_NEAR(matrix, this->mtx, 0.0); -// } - - -// TYPED_TEST(Fbcsr, SortUnsortedMatrix) -// GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto matrix = this->mtx3_unsorted->clone(); -// -// matrix->sort_by_column_index(); -// -// GKO_ASSERT_MTX_NEAR(matrix, this->mtx3_sorted, 0.0); -//} - - TYPED_TEST(Fbcsr, ExtractsDiagonal) { using T = typename TestFixture::value_type; From 040686e07556d77ac9dc486fabd21039b6bbf814 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Thu, 26 Nov 2020 15:13:19 +0100 Subject: [PATCH 18/58] fixed docstrings, made const usage more consistent, and other fixes - removed remaining commented code - fixed documentation in FbcsrBuilder - removed unnecessary headers - improved docstrings for FBCSR sample matrix generation --- core/components/fixed_block.hpp | 13 ++- core/matrix/fbcsr.cpp | 29 ++++--- core/matrix/fbcsr_builder.hpp | 11 ++- core/matrix/fbcsr_kernels.hpp | 4 - core/test/matrix/fbcsr_sample.cpp | 8 +- core/test/matrix/fbcsr_sample.hpp | 71 ++++++++++----- cuda/matrix/fbcsr_kernels.cu | 71 --------------- hip/matrix/fbcsr_kernels.hip.cpp | 72 +-------------- include/ginkgo/core/base/blockutils.hpp | 9 +- include/ginkgo/core/matrix/fbcsr.hpp | 67 ++++++++------ omp/matrix/fbcsr_kernels.cpp | 111 ------------------------ 11 files changed, 138 insertions(+), 328 deletions(-) diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index fbd8a259f69..32c6fb8ddda 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -49,6 +49,10 @@ namespace blockutils { * * The primary use is to reinterpret subsets of entries in a big array as * small dense blocks. + * + * @tparam ValueType The numeric type of entries of the block + * @tparam nrows Number of rows + * @tparam ncols Number of columns */ template class FixedBlock final { @@ -83,7 +87,11 @@ class FixedBlock final { }; -/// A lightweight dynamic block type for the host space +/** + * A lightweight dynamic block type for the host space + * + * @tparam ValueType The numeric type of entries of the block + */ template class DenseBlock final { public: @@ -145,6 +153,9 @@ class DenseBlock final { /** Note that accessing BSR values using this type of view abstracts away the * storage layout within the individual blocks, as long as all blocks use the * same layout. For now, row-major blocks are assumed. + * + * @tparam ValueType The numeric type of entries of the block + * @tparam IndexType The type of integer used to identify the different blocks */ template class DenseBlocksView final { diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 54e938d768d..46fe819d240 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -121,8 +121,10 @@ void Fbcsr::apply_impl(const LinOp *const b, template -void Fbcsr::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Fbcsr::apply_impl(const LinOp *const alpha, + const LinOp *const b, + const LinOp *const beta, + LinOp *const x) const { using Dense = Dense; using TFbcsr = Fbcsr; @@ -143,7 +145,7 @@ void Fbcsr::apply_impl(const LinOp *alpha, const LinOp *b, template void Fbcsr::convert_to( - Fbcsr *result) const + Fbcsr *const result) const { bool same_executor = this->get_executor() == result->get_executor(); // NOTE: as soon as strategies are improved, this can be reverted @@ -163,7 +165,8 @@ void Fbcsr::convert_to( template -void Fbcsr::move_to(Fbcsr *result) +void Fbcsr::move_to( + Fbcsr *const result) { bool same_executor = this->get_executor() == result->get_executor(); EnableLinOp::move_to(result); @@ -188,14 +191,15 @@ void Fbcsr::convert_to( template void Fbcsr::move_to( - Fbcsr, IndexType> *result) + Fbcsr, IndexType> *const result) { this->convert_to(result); } template -void Fbcsr::convert_to(Dense *result) const +void Fbcsr::convert_to( + Dense *const result) const { auto exec = this->get_executor(); auto tmp = Dense::create(exec, this->get_size()); @@ -205,7 +209,7 @@ void Fbcsr::convert_to(Dense *result) const template -void Fbcsr::move_to(Dense *result) +void Fbcsr::move_to(Dense *const result) { this->convert_to(result); } @@ -213,7 +217,7 @@ void Fbcsr::move_to(Dense *result) template void Fbcsr::convert_to( - Csr *result) const + Csr *const result) const { auto exec = this->get_executor(); auto tmp = Csr::create( @@ -225,7 +229,8 @@ void Fbcsr::convert_to( template -void Fbcsr::move_to(Csr *result) +void Fbcsr::move_to( + Csr *const result) { this->convert_to(result); } @@ -233,7 +238,7 @@ void Fbcsr::move_to(Csr *result) template void Fbcsr::convert_to( - SparsityCsr *result) const + SparsityCsr *const result) const { using gko::blockutils::getNumBlocks; auto exec = this->get_executor(); @@ -252,7 +257,7 @@ void Fbcsr::convert_to( template void Fbcsr::move_to( - SparsityCsr *result) + SparsityCsr *const result) { this->convert_to(result); } @@ -497,7 +502,7 @@ Fbcsr::compute_absolute() const template template void Fbcsr::convert_strategy_helper( - FbcsrType *result) const + FbcsrType *const result) const { auto strat = this->get_strategy().get(); std::shared_ptr> diff --git a/core/matrix/fbcsr_builder.hpp b/core/matrix/fbcsr_builder.hpp index 54b6ece53f9..62096ae27fc 100644 --- a/core/matrix/fbcsr_builder.hpp +++ b/core/matrix/fbcsr_builder.hpp @@ -53,20 +53,23 @@ template class FbcsrBuilder { public: /** - * Returns the column index array of the CSR matrix. + * @return The column index array of the matrix. */ Array &get_col_idx_array() { return matrix_->col_idxs_; } /** - * Returns the value array of the CSR matrix. + * @return The value array of the matrix. */ Array &get_value_array() { return matrix_->values_; } - /// Returns the (uniform) block size + /** + * @return The (uniform) block size + */ int get_block_size() const { return matrix_->bs_; } /** - * Initializes a CsrBuilder from an existing CSR matrix. + * @param matrix An existing FBCSR matrix + * for which intrusive access is needed */ explicit FbcsrBuilder(Fbcsr *const matrix) : matrix_{matrix} diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp index c7a2747039e..ed37c49153d 100644 --- a/core/matrix/fbcsr_kernels.hpp +++ b/core/matrix/fbcsr_kernels.hpp @@ -39,13 +39,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#include #include #include #include -#include -#include -#include #include diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index 246510db57d..72685d99eab 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -50,15 +50,15 @@ namespace testing { namespace matstr = gko::matrix::matrix_strategy; -/// Generates a copu of the given matrix with a different scalar type -/** \tparam AbsValueType The scalar type of the output matrix +/** Generates a copy of the given matrix with a different scalar type + * + * \tparam AbsValueType The scalar type of the output matrix */ template static std::unique_ptr< gko::matrix::Fbcsr> generate_acopy_impl(const FbcsrType *const mat) { - // using AbsValueType = typename gko::remove_complex; using index_type = typename FbcsrType::index_type; using value_type = typename FbcsrType::value_type; using AbsFbcsr = gko::matrix::Fbcsr; @@ -686,8 +686,6 @@ void FbcsrSample2::apply( const ValueType defv = sct(0.15 + FBCSR_TEST_OFFSET); - // ValueType *const yvals = y->get_data(); - // const ValueType *const xvals = x->get_const_data(); for (index_type k = 0; k < x->get_size()[1]; k++) { y->at(0, k) = sct(1.0) * x->at(0, k) + sct(2.0) * x->at(1, k); y->at(1, k) = sct(3.0) * x->at(0, k); diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 5e879964950..e6fe63dc37b 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef GKO_CORE_MATRIX_TEST_FBCSR_SAMPLE_HPP #define GKO_CORE_MATRIX_TEST_FBCSR_SAMPLE_HPP + #include #include #include @@ -42,12 +43,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include + namespace gko { namespace testing { -/// Generates the same sample block CSR matrix in different formats -/** This currently a 6 x 12 matrix with 3x3 blocks. +/** Generates the same sample block CSR matrix in different formats + * + * This currently a 6 x 12 matrix with 3x3 blocks. * Assumes that the layout within each block is row-major. * Generates complex data when instantiated with a complex value type. */ @@ -66,30 +69,64 @@ class FbcsrSample { FbcsrSample(std::shared_ptr exec); + /** + * @return The sample matrix in FBCSR format + */ std::unique_ptr generate_fbcsr() const; - /// Generates CSR matrix equal to the BSR matrix. Keeps explicit zeros. + /** + * @return Sample matrix in CSR format + * + * Keeps explicit zeros. + */ std::unique_ptr generate_csr() const; + /** + * @return Sample matrix as dense + */ std::unique_ptr generate_dense() const; - /// Returns the matrix in COO format keeping explicit nonzeros - /** The nonzeros are sorted by row and column. + /** + * @return The matrix in COO format keeping explicit nonzeros + * + * The nonzeros are sorted by row and column. */ std::unique_ptr generate_coo() const; + /** + * @return Sparsity structure of the matrix + */ std::unique_ptr generate_sparsity_csr() const; + /** + * @return Array of COO triplets that represent the matrix + * + * @note The order of the triplets assumes the blocks are stored row-major + */ MatData generate_matrix_data() const; + /** + * @return Array of COO triplets that represent the matrix; includes + * explicit zeros + * + * @note The order of the triplets assumes the blocks are stored row-major + */ MatData generate_matrix_data_with_explicit_zeros() const; - /// Returns an array containing number of stored values in each row - /// (not block-row) + /** + * @return An array containing number of stored values in each row + */ gko::Array getNonzerosPerRow() const; + /** + * @return FBCSR matrix with absolute values of respective entries + */ std::unique_ptr generate_abs_fbcsr() const; + /** + * @return FBCSR matrix with real scalar type, + * with absolute values of respective entries + */ std::unique_ptr, index_type>> generate_abs_fbcsr_abstype() const; @@ -126,8 +163,9 @@ class FbcsrSample { } }; -/// Generates the a sample block CSR matrix in different formats -/** This currently a 6 x 8 matrix with 2x2 blocks. +/** + * Generates a sample block CSR matrix in different formats. + * 6 x 8 matrix with 2x2 blocks. */ template class FbcsrSample2 { @@ -135,11 +173,7 @@ class FbcsrSample2 { using value_type = ValueType; using index_type = IndexType; using Fbcsr = gko::matrix::Fbcsr; - using Csr = gko::matrix::Csr; - using Coo = gko::matrix::Coo; using Dense = gko::matrix::Dense; - using MatData = gko::matrix_data; - using SparCsr = gko::matrix::SparsityCsr; using Diagonal = gko::matrix::Diagonal; FbcsrSample2(std::shared_ptr exec); @@ -203,8 +237,9 @@ class FbcsrSampleSquare { const std::shared_ptr exec; }; -/// Generates the a sample block CSR matrix with complex values -/** This currently a 6 x 8 matrix with 2x2 blocks. +/** + * Generates the a sample block CSR matrix with complex values + * This is a 6 x 8 matrix with 2x2 blocks. */ template class FbcsrSampleComplex { @@ -212,12 +247,6 @@ class FbcsrSampleComplex { using value_type = ValueType; using index_type = IndexType; using Fbcsr = gko::matrix::Fbcsr; - using Csr = gko::matrix::Csr; - using Coo = gko::matrix::Coo; - using Dense = gko::matrix::Dense; - using MatData = gko::matrix_data; - using SparCsr = gko::matrix::SparsityCsr; - using Diagonal = gko::matrix::Diagonal; static_assert(is_complex(), "Only for complex types!"); diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu index f2ac5a85a01..7da16b2976d 100644 --- a/cuda/matrix/fbcsr_kernels.cu +++ b/cuda/matrix/fbcsr_kernels.cu @@ -43,24 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/components/fill_array.hpp" -#include "core/components/prefix_sum.hpp" -#include "core/matrix/dense_kernels.hpp" -#include "core/matrix/fbcsr_builder.hpp" -#include "core/synthesizer/implementation_selection.hpp" #include "cuda/base/config.hpp" -#include "cuda/base/cusparse_bindings.hpp" -#include "cuda/base/math.hpp" -#include "cuda/base/pointer_mode_guard.hpp" -#include "cuda/base/types.hpp" -#include "cuda/components/atomic.cuh" -#include "cuda/components/cooperative_groups.cuh" -#include "cuda/components/intrinsics.cuh" -#include "cuda/components/merging.cuh" -#include "cuda/components/reduction.cuh" -#include "cuda/components/segment_scan.cuh" -#include "cuda/components/thread_ids.cuh" -#include "cuda/components/uninitialized_array.hpp" namespace gko { @@ -116,13 +99,6 @@ template void convert_row_ptrs_to_idxs(std::shared_ptr exec, const IndexType *ptrs, size_type num_rows, IndexType *idxs) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto grid_dim = ceildiv(num_rows, default_block_size); -// -// kernel::convert_row_ptrs_to_idxs<<>>( -// num_rows, as_cuda_type(ptrs), as_cuda_type(idxs)); -//} template @@ -168,31 +144,6 @@ void calculate_max_nnz_per_row( std::shared_ptr exec, const matrix::Fbcsr *source, size_type *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto num_rows = source->get_size()[0]; -// -// auto nnz_per_row = Array(exec, num_rows); -// auto block_results = Array(exec, default_block_size); -// auto d_result = Array(exec, 1); -// -// const auto grid_dim = ceildiv(num_rows, default_block_size); -// kernel::calculate_nnz_per_row<<>>( -// num_rows, as_cuda_type(source->get_const_row_ptrs()), -// as_cuda_type(nnz_per_row.get_data())); -// -// const auto n = ceildiv(num_rows, default_block_size); -// const auto reduce_dim = n <= default_block_size ? n : default_block_size; -// kernel::reduce_max_nnz<<>>( -// num_rows, as_cuda_type(nnz_per_row.get_const_data()), -// as_cuda_type(block_results.get_data())); -// -// kernel::reduce_max_nnz<<<1, default_block_size>>>( -// reduce_dim, as_cuda_type(block_results.get_const_data()), -// as_cuda_type(d_result.get_data())); -// -// *result = exec->copy_val_to_host(d_result.get_const_data()); -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); @@ -203,15 +154,6 @@ void calculate_nonzeros_per_row( std::shared_ptr exec, const matrix::Fbcsr *source, Array *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto num_rows = source->get_size()[0]; -// auto row_ptrs = source->get_const_row_ptrs(); -// auto grid_dim = ceildiv(num_rows, default_block_size); -// -// kernel::calculate_nnz_per_row<<>>( -// num_rows, as_cuda_type(row_ptrs), as_cuda_type(result->get_data())); -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); @@ -231,19 +173,6 @@ void is_sorted_by_column_index( std::shared_ptr exec, const matrix::Fbcsr *to_check, bool *is_sorted) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// *is_sorted = true; -// auto cpu_array = Array::view(exec->get_master(), 1, is_sorted); -// auto gpu_array = Array{exec, cpu_array}; -// auto block_size = default_block_size; -// auto num_rows = static_cast(to_check->get_size()[0]); -// auto num_blocks = ceildiv(num_rows, block_size); -// kernel::check_unsorted<<>>( -// to_check->get_const_row_ptrs(), to_check->get_const_col_idxs(), -// num_rows, gpu_array.get_data()); -// cpu_array = gpu_array; -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp index 891e644b25c..1ce2735d8f6 100644 --- a/hip/matrix/fbcsr_kernels.hip.cpp +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -46,24 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/components/fill_array.hpp" -#include "core/components/prefix_sum.hpp" -#include "core/matrix/dense_kernels.hpp" -#include "core/matrix/fbcsr_builder.hpp" -#include "core/synthesizer/implementation_selection.hpp" -#include "hip/base/config.hip.hpp" -#include "hip/base/hipsparse_bindings.hip.hpp" -#include "hip/base/math.hip.hpp" -#include "hip/base/pointer_mode_guard.hip.hpp" -#include "hip/base/types.hip.hpp" -#include "hip/components/atomic.hip.hpp" -#include "hip/components/cooperative_groups.hip.hpp" -#include "hip/components/intrinsics.hip.hpp" -#include "hip/components/merging.hip.hpp" -#include "hip/components/reduction.hip.hpp" -#include "hip/components/segment_scan.hip.hpp" -#include "hip/components/thread_ids.hip.hpp" -#include "hip/components/uninitialized_array.hip.hpp" +#include "hip/base/config.hpp" namespace gko { @@ -158,34 +141,6 @@ void calculate_max_nnz_per_row( std::shared_ptr exec, const matrix::Fbcsr *source, size_type *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto num_rows = source->get_size()[0]; -// -// auto nnz_per_row = Array(exec, num_rows); -// auto block_results = Array(exec, default_block_size); -// auto d_result = Array(exec, 1); -// -// const auto grid_dim = ceildiv(num_rows, default_block_size); -// hipLaunchKernelGGL(kernel::calculate_nnz_per_row, dim3(grid_dim), -// dim3(default_block_size), 0, 0, num_rows, -// as_hip_type(source->get_const_row_ptrs()), -// as_hip_type(nnz_per_row.get_data())); -// -// const auto n = ceildiv(num_rows, default_block_size); -// const auto reduce_dim = n <= default_block_size ? n : default_block_size; -// hipLaunchKernelGGL(kernel::reduce_max_nnz, dim3(reduce_dim), -// dim3(default_block_size), 0, 0, num_rows, -// as_hip_type(nnz_per_row.get_const_data()), -// as_hip_type(block_results.get_data())); -// -// hipLaunchKernelGGL(kernel::reduce_max_nnz, dim3(1), -// dim3(default_block_size), 0, 0, reduce_dim, -// as_hip_type(block_results.get_const_data()), -// as_hip_type(d_result.get_data())); -// -// *result = exec->copy_val_to_host(d_result.get_const_data()); -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); @@ -196,17 +151,6 @@ void calculate_nonzeros_per_row( std::shared_ptr exec, const matrix::Fbcsr *source, Array *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto num_rows = source->get_size()[0]; -// auto row_ptrs = source->get_const_row_ptrs(); -// auto grid_dim = ceildiv(num_rows, default_block_size); -// -// hipLaunchKernelGGL(kernel::calculate_nnz_per_row, dim3(grid_dim), -// dim3(default_block_size), 0, 0, num_rows, -// as_hip_type(row_ptrs), -// as_hip_type(result->get_data())); -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); @@ -226,20 +170,6 @@ void is_sorted_by_column_index( std::shared_ptr exec, const matrix::Fbcsr *to_check, bool *is_sorted) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// *is_sorted = true; -// auto cpu_array = Array::view(exec->get_master(), 1, is_sorted); -// auto gpu_array = Array{exec, cpu_array}; -// auto block_size = default_block_size; -// auto num_rows = static_cast(to_check->get_size()[0]); -// auto num_blocks = ceildiv(num_rows, block_size); -// hipLaunchKernelGGL( -// HIP_KERNEL_NAME(kernel::check_unsorted), dim3(num_blocks), -// dim3(block_size), 0, 0, to_check->get_const_row_ptrs(), -// to_check->get_const_col_idxs(), num_rows, gpu_array.get_data()); -// cpu_array = gpu_array; -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); diff --git a/include/ginkgo/core/base/blockutils.hpp b/include/ginkgo/core/base/blockutils.hpp index 4bd4d3d040d..c057e1dec04 100644 --- a/include/ginkgo/core/base/blockutils.hpp +++ b/include/ginkgo/core/base/blockutils.hpp @@ -55,8 +55,13 @@ class BlockSizeError : public Error { }; -/** Returns the quotient of the second arg divided by the first - * but throws when they don't divide +/** + * Computes the number of blocks + * + * @param block_size The size of each block + * @param size The total size of some array/vector + * @return The quotient of the size divided by the block size + * but throws when they don't divide */ template IndexType getNumBlocks(const int block_size, const IndexType size) diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 33e18a99794..30dadb68e54 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -161,16 +161,19 @@ class Fbcsr : public EnableLinOp>, void move_to(Csr *result) override; - /// Get the block sparsity pattern in CSR-like format - /** Note that the actual non-zero values are never copied; + /** + * Get the block sparsity pattern in CSR-like format + * + * Note that the actual non-zero values are never copied; * the result always has a value array of size 1 with the value 1. */ void convert_to(SparsityCsr *result) const override; void move_to(SparsityCsr *result) override; - /// Convert COO data into block CSR - /** @warning Unlike Csr::read, here explicit non-zeros are NOT dropped. + /** Convert COO data into block CSR + * + * @warning Unlike Csr::read, here explicit non-zeros are NOT dropped. */ void read(const mat_data &data) override; @@ -200,13 +203,13 @@ class Fbcsr : public EnableLinOp>, bool is_sorted_by_column_index() const; /** - * Returns the values of the matrix. - * - * @return the values of the matrix. + * @return The values of the matrix. */ value_type *get_values() noexcept { return values_.get_data(); } - /// @see Fbcsr::get_const_values() + /** + * @see Fbcsr::get_const_values() + */ const value_type *get_values() const noexcept { return values_.get_const_data(); @@ -225,13 +228,13 @@ class Fbcsr : public EnableLinOp>, } /** - * Returns the column indexes of the matrix. - * - * @return the column indexes of the matrix. + * @return The column indexes of the matrix. */ index_type *get_col_idxs() noexcept { return col_idxs_.get_data(); } - /// @see Fbcsr::get_const_col_idxs() + /** + * @see Fbcsr::get_const_col_idxs() + */ const index_type *get_col_idxs() const noexcept { return col_idxs_.get_const_data(); @@ -250,13 +253,13 @@ class Fbcsr : public EnableLinOp>, } /** - * Returns the row pointers of the matrix. - * - * @return the row pointers of the matrix. + * @return The row pointers of the matrix. */ index_type *get_row_ptrs() noexcept { return row_ptrs_.get_data(); } - /// @see Fbcsr::get_const_row_ptrs() + /** + * @see Fbcsr::get_const_row_ptrs() + */ const index_type *get_row_ptrs() const noexcept { return row_ptrs_.get_const_data(); @@ -275,12 +278,13 @@ class Fbcsr : public EnableLinOp>, } /** - * Returns the starting rows. - * - * @return the starting rows. + * @return The starting row for each 'team' of threads */ index_type *get_srow() noexcept { return startrow_.get_data(); } + /** + * @see get_const_srow + */ const index_type *get_srow() const noexcept { return startrow_.get_const_data(); @@ -299,9 +303,7 @@ class Fbcsr : public EnableLinOp>, } /** - * Returns the number of the srow stored elements (involved warps) - * - * @return the number of the srow stored elements (involved warps) + * @return The number of the srow stored elements (involved warps) */ size_type get_num_srow_elements() const noexcept { @@ -318,9 +320,8 @@ class Fbcsr : public EnableLinOp>, return values_.get_num_elems(); } - /** Returns the strategy - * - * @return the strategy + /** + * @return The strategy */ std::shared_ptr get_strategy() const noexcept { @@ -338,15 +339,29 @@ class Fbcsr : public EnableLinOp>, this->make_srow(); } + /** + * @return The fixed block size for this matrix + */ int get_block_size() const { return bs_; } + /** + * Set the fixed block size for this matrix + * + * @param block_size The block size + */ void set_block_size(const int block_size) { bs_ = block_size; } + /** + * @return The number of block-rows in the matrix + */ index_type get_num_block_rows() const { return row_ptrs_.get_num_elems() - 1; } + /** + * @return The number of block-columns in the matrix + */ index_type get_num_block_cols() const { return nbcols_; } protected: @@ -430,7 +445,7 @@ class Fbcsr : public EnableLinOp>, void convert_strategy_helper(FbcsrType *result) const; /** - * Computes srow. It should be run after changing any row_ptrs_ value. + * Computes srow. It should be run after changing any row pointer. */ void make_srow() { diff --git a/omp/matrix/fbcsr_kernels.cpp b/omp/matrix/fbcsr_kernels.cpp index a38b3c9bda7..fc2e2bc17ba 100644 --- a/omp/matrix/fbcsr_kernels.cpp +++ b/omp/matrix/fbcsr_kernels.cpp @@ -47,13 +47,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/base/allocator.hpp" -#include "core/base/iterator_factory.hpp" -#include "core/components/prefix_sum.hpp" -#include "core/matrix/fbcsr_builder.hpp" -#include "omp/components/format_conversion.hpp" - - namespace gko { namespace kernels { namespace omp { @@ -86,49 +79,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); -template -void spgemm_insert_row(unordered_set &cols, - const matrix::Fbcsr *c, - size_type row) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto row_ptrs = c->get_const_row_ptrs(); -// auto col_idxs = c->get_const_col_idxs(); -// cols.insert(col_idxs + row_ptrs[row], col_idxs + row_ptrs[row + 1]); -//} - - -template -void spgemm_insert_row2(unordered_set &cols, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - size_type row) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto a_row_ptrs = a->get_const_row_ptrs(); -// auto a_col_idxs = a->get_const_col_idxs(); -// auto b_row_ptrs = b->get_const_row_ptrs(); -// auto b_col_idxs = b->get_const_col_idxs(); -// for (size_type a_nz = a_row_ptrs[row]; -// a_nz < size_type(a_row_ptrs[row + 1]); ++a_nz) { -// auto a_col = a_col_idxs[a_nz]; -// auto b_row = a_col; -// cols.insert(b_col_idxs + b_row_ptrs[b_row], -// b_col_idxs + b_row_ptrs[b_row + 1]); -// } -//} - - -template -void convert_row_ptrs_to_idxs(std::shared_ptr exec, - const IndexType *ptrs, size_type num_rows, - IndexType *idxs) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// convert_ptrs_to_idxs(ptrs, num_rows, idxs); -//} - - template void convert_to_dense(std::shared_ptr exec, const matrix::Fbcsr *source, @@ -162,38 +112,12 @@ void transpose_and_transform(std::shared_ptr exec, matrix::Fbcsr *trans, const matrix::Fbcsr *orig, UnaryOperator op) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto trans_row_ptrs = trans->get_row_ptrs(); -// auto orig_row_ptrs = orig->get_const_row_ptrs(); -// auto trans_col_idxs = trans->get_col_idxs(); -// auto orig_col_idxs = orig->get_const_col_idxs(); -// auto trans_vals = trans->get_values(); -// auto orig_vals = orig->get_const_values(); -// -// auto orig_num_cols = orig->get_size()[1]; -// auto orig_num_rows = orig->get_size()[0]; -// auto orig_nnz = orig_row_ptrs[orig_num_rows]; -// -// trans_row_ptrs[0] = 0; -// convert_unsorted_idxs_to_ptrs(orig_col_idxs, orig_nnz, trans_row_ptrs + 1, -// orig_num_cols); -// -// convert_fbcsr_to_csc(orig_num_rows, orig_row_ptrs, orig_col_idxs, -// orig_vals, -// trans_col_idxs, trans_row_ptrs + 1, trans_vals, op); -//} template void transpose(std::shared_ptr exec, const matrix::Fbcsr *orig, matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// transpose_and_transform(exec, trans, orig, -// [](const ValueType x) { return x; }); -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); @@ -204,11 +128,6 @@ void conj_transpose(std::shared_ptr exec, const matrix::Fbcsr *orig, matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// transpose_and_transform(exec, trans, orig, -// [](const ValueType x) { return conj(x); }); -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); @@ -229,16 +148,6 @@ void calculate_nonzeros_per_row( std::shared_ptr exec, const matrix::Fbcsr *source, Array *result) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto row_ptrs = source->get_const_row_ptrs(); -// auto row_nnz_val = result->get_data(); -// -//#pragma omp parallel for -// for (size_type i = 0; i < result->get_num_elems(); i++) { -// row_nnz_val[i] = row_ptrs[i + 1] - row_ptrs[i]; -// } -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); @@ -258,26 +167,6 @@ void is_sorted_by_column_index( std::shared_ptr exec, const matrix::Fbcsr *to_check, bool *is_sorted) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// const auto row_ptrs = to_check->get_const_row_ptrs(); -// const auto col_idxs = to_check->get_const_col_idxs(); -// const auto size = to_check->get_size(); -// bool local_is_sorted = true; -//#pragma omp parallel for reduction(&& : local_is_sorted) -// for (size_type i = 0; i < size[0]; ++i) { -// // Skip comparison if any thread detects that it is not sorted -// if (local_is_sorted) { -// for (auto idx = row_ptrs[i] + 1; idx < row_ptrs[i + 1]; ++idx) { -// if (col_idxs[idx - 1] > col_idxs[idx]) { -// local_is_sorted = false; -// break; -// } -// } -// } -// } -// *is_sorted = local_is_sorted; -//} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); From 3875bab32bf2bf3fecb346f8be96cd6b9f403d5a Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Fri, 27 Nov 2020 09:58:34 +0100 Subject: [PATCH 19/58] fixed header name for fbcsr hip kernels --- hip/matrix/fbcsr_kernels.hip.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp index 1ce2735d8f6..5e7e21d1a23 100644 --- a/hip/matrix/fbcsr_kernels.hip.cpp +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -46,7 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "hip/base/config.hpp" +#include "hip/base/config.hip.hpp" namespace gko { From e16b1a971931b92fc28dc86fca0c7459ba61aed6 Mon Sep 17 00:00:00 2001 From: Aditya Date: Fri, 27 Nov 2020 11:01:56 +0100 Subject: [PATCH 20/58] Apply suggestions from code review updated gtest macro Co-authored-by: tcojean --- core/test/matrix/fbcsr.cpp | 2 +- core/test/matrix/fbcsr_builder.cpp | 2 +- dpcpp/matrix/fbcsr_kernels.dp.cpp | 14 -------------- reference/test/matrix/fbcsr_kernels.cpp | 4 ++-- 4 files changed, 4 insertions(+), 18 deletions(-) diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index 893e12b0df6..10643276706 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -134,7 +134,7 @@ class Fbcsr : public ::testing::Test { } }; -TYPED_TEST_CASE(Fbcsr, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(Fbcsr, gko::test::ValueIndexTypes); TYPED_TEST(Fbcsr, SampleGeneratorIsCorrect) diff --git a/core/test/matrix/fbcsr_builder.cpp b/core/test/matrix/fbcsr_builder.cpp index 31ccc2145bc..60e6d93d6cd 100644 --- a/core/test/matrix/fbcsr_builder.cpp +++ b/core/test/matrix/fbcsr_builder.cpp @@ -64,7 +64,7 @@ class FbcsrBuilder : public ::testing::Test { std::unique_ptr mtx; }; -TYPED_TEST_CASE(FbcsrBuilder, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(FbcsrBuilder, gko::test::ValueIndexTypes); TYPED_TEST(FbcsrBuilder, ReturnsCorrectArrays) diff --git a/dpcpp/matrix/fbcsr_kernels.dp.cpp b/dpcpp/matrix/fbcsr_kernels.dp.cpp index 17c44830b9a..9e02173781f 100644 --- a/dpcpp/matrix/fbcsr_kernels.dp.cpp +++ b/dpcpp/matrix/fbcsr_kernels.dp.cpp @@ -33,28 +33,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/fbcsr_kernels.hpp" -#include -#include -#include - - #include #include #include #include -#include #include #include -#include - - -#include "core/base/allocator.hpp" -#include "core/base/iterator_factory.hpp" -#include "core/components/prefix_sum.hpp" -#include "core/matrix/fbcsr_builder.hpp" -#include "dpcpp/components/format_conversion.dp.hpp" namespace gko { diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 367d643af14..9e389655bb9 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -135,7 +135,7 @@ class Fbcsr : public ::testing::Test { const std::unique_ptr mtxsq; }; -TYPED_TEST_CASE(Fbcsr, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(Fbcsr, gko::test::ValueIndexTypes); template @@ -656,7 +656,7 @@ class FbcsrComplex : public ::testing::Test { using Mtx = gko::matrix::Fbcsr; }; -TYPED_TEST_CASE(FbcsrComplex, gko::test::ComplexValueIndexTypes); +TYPED_TEST_SUITE(FbcsrComplex, gko::test::ComplexValueIndexTypes); TYPED_TEST(FbcsrComplex, MtxIsConjugateTransposable) From 540228b8c7c3b1ad3bce714c4aa19d163157cb03 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Mon, 30 Nov 2020 15:13:31 +0100 Subject: [PATCH 21/58] fixed bug in fbcsr apply tests --- core/test/matrix/CMakeLists.txt | 1 + reference/test/matrix/fbcsr_kernels.cpp | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt index 22a8d0828c6..f750c5a506f 100644 --- a/core/test/matrix/CMakeLists.txt +++ b/core/test/matrix/CMakeLists.txt @@ -12,6 +12,7 @@ target_include_directories(test_fbcsr_sample "$" "$" ) +set_property(TARGET test_fbcsr_sample PROPERTY CXX_STANDARD 14) ginkgo_create_test(fbcsr test_fbcsr_sample) ginkgo_create_test(fbcsr_builder) ginkgo_create_test(hybrid) diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 9e389655bb9..4a257ca750c 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -139,13 +139,13 @@ TYPED_TEST_SUITE(Fbcsr, gko::test::ValueIndexTypes); template -constexpr T get_some_number() +constexpr typename std::enable_if_t(), T> get_some_number() { return static_cast(1.2); } template -constexpr typename std::enable_if_t, T> get_some_number() +constexpr typename std::enable_if_t(), T> get_some_number() { using RT = gko::remove_complex; return {static_cast(1.2), static_cast(3.4)}; From 5ae6626fc9205e127e7fc5c1dc439174d3ce31e1 Mon Sep 17 00:00:00 2001 From: Aditya Date: Mon, 30 Nov 2020 17:32:31 +0100 Subject: [PATCH 22/58] Apply suggestions from code review Co-authored-by: tcojean Co-authored-by: fritzgoebel --- core/components/fixed_block.hpp | 6 +- core/device_hooks/common_kernels.inc.cpp | 6 -- core/matrix/fbcsr.cpp | 39 ++++------- core/matrix/fbcsr_kernels.hpp | 6 -- core/test/matrix/fbcsr.cpp | 3 - core/test/matrix/fbcsr_sample.cpp | 2 +- cuda/matrix/fbcsr_kernels.cu | 9 --- dpcpp/matrix/fbcsr_kernels.dp.cpp | 9 --- hip/matrix/fbcsr_kernels.hip.cpp | 9 --- include/ginkgo/core/matrix/fbcsr.hpp | 5 +- include/ginkgo/core/matrix/sparsity_csr.hpp | 1 + omp/matrix/fbcsr_kernels.cpp | 9 --- reference/matrix/fbcsr_kernels.cpp | 77 +-------------------- reference/test/matrix/fbcsr_kernels.cpp | 2 +- 14 files changed, 24 insertions(+), 159 deletions(-) diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index 32c6fb8ddda..555f880e10f 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -131,10 +131,12 @@ class DenseBlock final { void resize(const int nrows, const int ncols) { + if (nrows * ncols != nrows_ * ncols_) { + delete[] vals_; + vals_ = new value_type[nrows * ncols]; + } nrows_ = nrows; ncols_ = ncols; - delete[] vals_; - vals_ = new value_type[nrows_ * ncols_]; } void zero() diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 9cca9e2d26f..b2818c4959a 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -854,12 +854,6 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); -template -GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) -GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); - template GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 46fe819d240..4308cb616e8 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -42,11 +42,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include -#include #include -#include #include @@ -71,7 +68,6 @@ GKO_REGISTER_OPERATION(calculate_max_nnz_per_row, fbcsr::calculate_max_nnz_per_row); GKO_REGISTER_OPERATION(calculate_nonzeros_per_row, fbcsr::calculate_nonzeros_per_row); -GKO_REGISTER_OPERATION(sort_by_column_index, fbcsr::sort_by_column_index); GKO_REGISTER_OPERATION(is_sorted_by_column_index, fbcsr::is_sorted_by_column_index); GKO_REGISTER_OPERATION(extract_diagonal, fbcsr::extract_diagonal); @@ -92,11 +88,11 @@ Fbcsr::Fbcsr(std::shared_ptr exec, std::shared_ptr strategy) : EnableLinOp(exec, size), bs_{block_size}, - nbcols_{gko::blockutils::getNumBlocks(block_size, size[1])}, + nbcols_{blockutils::getNumBlocks(block_size, size[1])}, values_(exec, num_nonzeros), - col_idxs_(exec, gko::blockutils::getNumBlocks(block_size * block_size, - num_nonzeros)), - row_ptrs_(exec, gko::blockutils::getNumBlocks(block_size, size[0]) + 1), + col_idxs_(exec, blockutils::getNumBlocks(block_size * block_size, + num_nonzeros)), + row_ptrs_(exec, blockutils::getNumBlocks(block_size, size[0]) + 1), startrow_(exec, strategy->calc_size(num_nonzeros)), strategy_(strategy->copy()) {} @@ -107,8 +103,7 @@ void Fbcsr::apply_impl(const LinOp *const b, LinOp *const x) const { using Dense = Dense; - using TFbcsr = Fbcsr; - if (auto b_fbcsr = dynamic_cast(b)) { + if (auto b_fbcsr = dynamic_cast *>(b)) { // if b is a FBCSR matrix, we compute a SpGeMM throw /*::gko::*/ NotImplemented(__FILE__, __LINE__, "SpGeMM for Fbcsr"); @@ -127,13 +122,12 @@ void Fbcsr::apply_impl(const LinOp *const alpha, LinOp *const x) const { using Dense = Dense; - using TFbcsr = Fbcsr; - if (auto b_fbcsr = dynamic_cast(b)) { + if (auto b_fbcsr = dynamic_cast *>(b)) { // if b is a FBCSR matrix, we compute a SpGeMM throw NotImplemented(__FILE__, __LINE__, "Adv SpGeMM for Fbcsr"); } else if (dynamic_cast *>(b)) { // if b is an identity matrix, we compute an SpGEAM - throw NotImplemented(__FILE__, __LINE__, "Adv SpGeMM for Fbcsr"); + throw NotImplemented(__FILE__, __LINE__, "Adv SpGEAM for Fbcsr"); } else { // otherwise we assume that b is dense and compute a SpMV/SpMM this->get_executor()->run( @@ -240,7 +234,7 @@ template void Fbcsr::convert_to( SparsityCsr *const result) const { - using gko::blockutils::getNumBlocks; + using blockutils::getNumBlocks; auto exec = this->get_executor(); auto tmp = SparsityCsr::create( exec, @@ -250,7 +244,7 @@ void Fbcsr::convert_to( tmp->col_idxs_ = this->col_idxs_; tmp->row_ptrs_ = this->row_ptrs_; - tmp->value_ = gko::Array(exec, {one()}); + tmp->value_ = Array(exec, {one()}); tmp->move_to(result); } @@ -334,13 +328,11 @@ void Fbcsr::read(const mat_data &data) cur_bcol = blocks.begin()->first.block_column; const index_type num_brows = data.size[0] / bs; - gko::blockutils::DenseBlocksView values( + blockutils::DenseBlocksView values( tmp->values_.get_data(), bs, bs); for (auto it = blocks.begin(); it != blocks.end(); it++) { - if (cur_brow >= num_brows) - throw gko::OutOfBoundsError(__FILE__, __LINE__, cur_brow, - num_brows); + GKO_ENSURE_IN_BOUNDS(cur_brow, num_brows); tmp->col_idxs_.get_data()[cur_bnz] = it->first.block_column; @@ -383,8 +375,8 @@ void Fbcsr::write(mat_data &data) const data = {tmp->get_size(), {}}; - const gko::blockutils::DenseBlocksView - vblocks(tmp->values_.get_const_data(), bs_, bs_); + const blockutils::DenseBlocksView vblocks( + tmp->values_.get_const_data(), bs_, bs_); for (size_type brow = 0; brow < tmp->get_size()[0] / bs_; ++brow) { const auto start = tmp->row_ptrs_.get_const_data()[brow]; @@ -435,11 +427,6 @@ std::unique_ptr Fbcsr::conj_transpose() const template void Fbcsr::sort_by_column_index() GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto exec = this->get_executor(); -// exec->run(fbcsr::make_sort_by_column_index(this)); -//} template diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp index ed37c49153d..af7e527a891 100644 --- a/core/matrix/fbcsr_kernels.hpp +++ b/core/matrix/fbcsr_kernels.hpp @@ -95,10 +95,6 @@ namespace kernels { const matrix::Fbcsr *source, \ Array *result) -#define GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \ - void sort_by_column_index(std::shared_ptr exec, \ - matrix::Fbcsr *to_sort) - #define GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType) \ void is_sorted_by_column_index( \ std::shared_ptr exec, \ @@ -127,8 +123,6 @@ namespace kernels { template \ GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType); \ template \ - GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType); \ - template \ GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType); \ template \ GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index 10643276706..d6257b49ddc 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -110,9 +110,6 @@ class Fbcsr : public ::testing::Test { ASSERT_EQ(c[inz], orig_colinds[inz]); for (int i = 0; i < bs * bs; i++) { - // ASSERT_LT(gko::abs(v[inz*bs*bs + i] - - // mtx->get_values()[inz*bs*bs + i]), - // std::numeric_limits>::epsilon()); ASSERT_EQ(v[inz * bs * bs + i], orig_vals[inz * bs * bs + i]); } diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index 72685d99eab..17879742892 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "core/components/fixed_block.hpp" -#include "fbcsr_sample.hpp" +#include "core/test/matrix/fbcsr_sample.hpp" #define FBCSR_TEST_OFFSET 0.000011118888 diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu index 7da16b2976d..b8d397ef5fc 100644 --- a/cuda/matrix/fbcsr_kernels.cu +++ b/cuda/matrix/fbcsr_kernels.cu @@ -159,15 +159,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); -template -void sort_by_column_index(std::shared_ptr exec, - matrix::Fbcsr *to_sort) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); - - template void is_sorted_by_column_index( std::shared_ptr exec, diff --git a/dpcpp/matrix/fbcsr_kernels.dp.cpp b/dpcpp/matrix/fbcsr_kernels.dp.cpp index 9e02173781f..72a708bc5ff 100644 --- a/dpcpp/matrix/fbcsr_kernels.dp.cpp +++ b/dpcpp/matrix/fbcsr_kernels.dp.cpp @@ -155,15 +155,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); -template -void sort_by_column_index(std::shared_ptr exec, - matrix::Fbcsr *to_sort) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); - - template void is_sorted_by_column_index( std::shared_ptr exec, diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp index 5e7e21d1a23..ed2eb2667c1 100644 --- a/hip/matrix/fbcsr_kernels.hip.cpp +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -156,15 +156,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); -template -void sort_by_column_index(std::shared_ptr exec, - matrix::Fbcsr *to_sort) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); - - template void is_sorted_by_column_index( std::shared_ptr exec, diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 30dadb68e54..3c455d7ea5d 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -38,8 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include - -#include "matrix_strategies.hpp" +#include namespace gko { @@ -422,7 +421,7 @@ class Fbcsr : public EnableLinOp>, std::shared_ptr strategy = std::make_shared()) : EnableLinOp(exec, size), bs_{block_size}, - nbcols_{gko::blockutils::getNumBlocks(block_size, size[1])}, + nbcols_{blockutils::getNumBlocks(block_size, size[1])}, values_{exec, std::forward(values)}, col_idxs_{exec, std::forward(col_idxs)}, row_ptrs_{exec, std::forward(row_ptrs)}, diff --git a/include/ginkgo/core/matrix/sparsity_csr.hpp b/include/ginkgo/core/matrix/sparsity_csr.hpp index b5f16f5ae3e..bce6621dd7d 100644 --- a/include/ginkgo/core/matrix/sparsity_csr.hpp +++ b/include/ginkgo/core/matrix/sparsity_csr.hpp @@ -48,6 +48,7 @@ namespace matrix { template class Csr; + template class Fbcsr; diff --git a/omp/matrix/fbcsr_kernels.cpp b/omp/matrix/fbcsr_kernels.cpp index fc2e2bc17ba..442d6f7c107 100644 --- a/omp/matrix/fbcsr_kernels.cpp +++ b/omp/matrix/fbcsr_kernels.cpp @@ -153,15 +153,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); -template -void sort_by_column_index(std::shared_ptr exec, - matrix::Fbcsr *to_sort) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); - - template void is_sorted_by_column_index( std::shared_ptr exec, diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 8060d777ec2..c59f8a17e52 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -77,7 +77,7 @@ void spmv(const std::shared_ptr exec, auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); auto vals = a->get_const_values(); - const gko::blockutils::DenseBlocksView avalues( + const blockutils::DenseBlocksView avalues( vals, bs, bs); ValueType *const cvals = c->get_values(); @@ -121,7 +121,7 @@ void advanced_spmv(const std::shared_ptr exec, auto vals = a->get_const_values(); auto valpha = alpha->at(0, 0); auto vbeta = beta->at(0, 0); - const gko::blockutils::DenseBlocksView avalues( + const blockutils::DenseBlocksView avalues( vals, bs, bs); ValueType *const cvals = c->get_values(); @@ -150,70 +150,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); -template -void spgemm_insert_row(unordered_set &cols, - const matrix::Fbcsr *c, - size_type row) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto row_ptrs = c->get_const_row_ptrs(); -// auto col_idxs = c->get_const_col_idxs(); -// cols.insert(col_idxs + row_ptrs[row], col_idxs + row_ptrs[row + 1]); -//} - - -template -void spgemm_insert_row2(unordered_set &cols, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - size_type row) GKO_NOT_IMPLEMENTED; -//{ -// TODO (script:fbcsr): change the code imported from matrix/csr if needed -// auto a_row_ptrs = a->get_const_row_ptrs(); -// auto a_col_idxs = a->get_const_col_idxs(); -// auto b_row_ptrs = b->get_const_row_ptrs(); -// auto b_col_idxs = b->get_const_col_idxs(); -// for (size_type a_nz = a_row_ptrs[row]; -// a_nz < size_type(a_row_ptrs[row + 1]); ++a_nz) { -// auto a_col = a_col_idxs[a_nz]; -// auto b_row = a_col; -// cols.insert(b_col_idxs + b_row_ptrs[b_row], -// b_col_idxs + b_row_ptrs[b_row + 1]); -// } -//} - - -template -void spgemm_accumulate_row(map &cols, - const matrix::Fbcsr *c, - ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; - - -template -void spgemm_accumulate_row2(map &cols, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - ValueType scale, size_type row) GKO_NOT_IMPLEMENTED; - - -template -void spgemm(std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - matrix::Fbcsr *c) GKO_NOT_IMPLEMENTED; - - -template -void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Fbcsr *b, - const matrix::Dense *beta, - const matrix::Fbcsr *d, - matrix::Fbcsr *c) - GKO_NOT_IMPLEMENTED; - - template void convert_to_dense(const std::shared_ptr exec, const matrix::Fbcsr *const source, @@ -435,15 +371,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); -template -void sort_by_column_index(std::shared_ptr exec, - matrix::Fbcsr *to_sort) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); - - template void is_sorted_by_column_index( std::shared_ptr exec, diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 4a257ca750c..5cce425a3cb 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -547,7 +547,7 @@ TYPED_TEST(Fbcsr, CalculatesNonzerosPerRow) gko::Array row_nnz(this->exec, this->mtx2->get_size()[0]); - gko::kernels::reference::fbcsr ::calculate_nonzeros_per_row( + gko::kernels::reference::fbcsr::calculate_nonzeros_per_row( this->exec, this->mtx2.get(), &row_nnz); auto row_nnz_val = row_nnz.get_data(); From 9d61602657e1d47056a32d93ba182f8ba00d6f14 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Mon, 30 Nov 2020 23:25:49 +0100 Subject: [PATCH 23/58] attempt to fix MSVC SFINAE issue and Apple linker errors --- core/test/matrix/CMakeLists.txt | 3 ++- core/test/matrix/fbcsr_sample.hpp | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt index f750c5a506f..203821ae392 100644 --- a/core/test/matrix/CMakeLists.txt +++ b/core/test/matrix/CMakeLists.txt @@ -5,7 +5,7 @@ ginkgo_create_test(csr_builder) ginkgo_create_test(dense) ginkgo_create_test(diagonal) ginkgo_create_test(ell) -add_library(test_fbcsr_sample fbcsr_sample.cpp) +add_library(test_fbcsr_sample STATIC fbcsr_sample.cpp) target_include_directories(test_fbcsr_sample PRIVATE "$" @@ -13,6 +13,7 @@ target_include_directories(test_fbcsr_sample "$" ) set_property(TARGET test_fbcsr_sample PROPERTY CXX_STANDARD 14) +set_property(TARGET test_fbcsr_sample PROPERTY CXX_EXTENSIONS OFF) ginkgo_create_test(fbcsr test_fbcsr_sample) ginkgo_create_test(fbcsr_builder) ginkgo_create_test(hybrid) diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index e6fe63dc37b..4d23eb5557b 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -155,11 +155,11 @@ class FbcsrSample { /// ... while ignoring imaginary parts for real instantiations template - constexpr std::enable_if_t() && !is_complex(), + constexpr std::enable_if_t() && !is_complex(), ValueType> - sct(U u) const + sct(std::complex cu) const { - return static_cast(u.real()); + return static_cast(cu.real()); } }; From 849bc2c7160c9cf208f132d5afc4e70c9035dca6 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Tue, 1 Dec 2020 10:46:57 +0100 Subject: [PATCH 24/58] removed constexpr with std complex constructor to satisfy older toolchains, added ginkgo as explicit dependency of fbcsr_sample --- core/test/matrix/CMakeLists.txt | 1 + reference/test/matrix/fbcsr_kernels.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt index 203821ae392..9376b970552 100644 --- a/core/test/matrix/CMakeLists.txt +++ b/core/test/matrix/CMakeLists.txt @@ -14,6 +14,7 @@ target_include_directories(test_fbcsr_sample ) set_property(TARGET test_fbcsr_sample PROPERTY CXX_STANDARD 14) set_property(TARGET test_fbcsr_sample PROPERTY CXX_EXTENSIONS OFF) +target_link_libraries(test_fbcsr_sample PRIVATE ginkgo) ginkgo_create_test(fbcsr test_fbcsr_sample) ginkgo_create_test(fbcsr_builder) ginkgo_create_test(hybrid) diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 5cce425a3cb..82fb882c518 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -212,8 +212,8 @@ TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseVector) using T = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - constexpr T alphav = -1.0; - constexpr T betav = 2.0; + const T alphav = -1.0; + const T betav = 2.0; auto alpha = gko::initialize({alphav}, this->exec); auto beta = gko::initialize({betav}, this->exec); @@ -254,8 +254,8 @@ TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseMatrix) using T = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - constexpr T alphav = -1.0; - constexpr T betav = 2.0; + const T alphav = -1.0; + const T betav = 2.0; auto alpha = gko::initialize({alphav}, this->exec); auto beta = gko::initialize({betav}, this->exec); From 65849807a7971a35ca2278f169dd39f888a7aff7 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Tue, 1 Dec 2020 12:22:30 +0100 Subject: [PATCH 25/58] fbcsr complex sample class now uses correct index types --- core/components/fixed_block.hpp | 7 ++++--- core/test/matrix/fbcsr.cpp | 8 ++++++-- core/test/matrix/fbcsr_sample.cpp | 11 ++++++----- core/test/matrix/fbcsr_sample.hpp | 7 +++---- include/ginkgo/core/base/blockutils.hpp | 1 - include/ginkgo/core/matrix/fbcsr.hpp | 6 +++--- include/ginkgo/core/matrix/matrix_strategies.hpp | 6 +++--- reference/test/matrix/fbcsr_kernels.cpp | 6 ++++-- 8 files changed, 29 insertions(+), 23 deletions(-) diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index 555f880e10f..14c035fc965 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -34,11 +34,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_COMPONENTS_FIXED_BLOCK_HPP_ +#include + + #include #include -#include - namespace gko { namespace blockutils { @@ -208,4 +209,4 @@ class DenseBlocksView final { } // namespace blockutils } // namespace gko -#endif +#endif // GKO_CORE_COMPONENTS_FIXED_BLOCK_HPP_ diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index d6257b49ddc..57b5b09e341 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -31,14 +31,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ #include -#include -#include #include #include +#include + + +#include + + #include "core/components/fixed_block.hpp" #include "core/test/matrix/fbcsr_sample.hpp" #include "core/test/utils.hpp" diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index 17879742892..a5b55a5de3a 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -30,10 +30,11 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ +#include + #include -#include #include "core/components/fixed_block.hpp" #include "core/test/matrix/fbcsr_sample.hpp" @@ -878,10 +879,10 @@ FbcsrSampleComplex::generate_conjtranspose_fbcsr() const return mtx; } -template class FbcsrSampleComplex, int>; -template class FbcsrSampleComplex, int>; -template class FbcsrSampleComplex, long>; -template class FbcsrSampleComplex, long>; +template class FbcsrSampleComplex, int32>; +template class FbcsrSampleComplex, int32>; +template class FbcsrSampleComplex, int64>; +template class FbcsrSampleComplex, int64>; } // namespace testing } // namespace gko diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 4d23eb5557b..12ec1fb310f 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -30,9 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ - -#ifndef GKO_CORE_MATRIX_TEST_FBCSR_SAMPLE_HPP -#define GKO_CORE_MATRIX_TEST_FBCSR_SAMPLE_HPP +#ifndef GKO_CORE_TEST_MATRIX_FBCSR_SAMPLE_HPP_ +#define GKO_CORE_TEST_MATRIX_FBCSR_SAMPLE_HPP_ #include @@ -270,4 +269,4 @@ class FbcsrSampleComplex { } // namespace testing } // namespace gko -#endif +#endif // GKO_CORE_TEST_MATRIX_FBCSR_SAMPLE_HPP_ diff --git a/include/ginkgo/core/base/blockutils.hpp b/include/ginkgo/core/base/blockutils.hpp index c057e1dec04..f3664e1d092 100644 --- a/include/ginkgo/core/base/blockutils.hpp +++ b/include/ginkgo/core/base/blockutils.hpp @@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ - #ifndef GINKGO_CORE_BASE_BLOCKUTILS_HPP_ #define GINKGO_CORE_BASE_BLOCKUTILS_HPP_ diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 3c455d7ea5d..dcb3e0fcb6a 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_MATRIX_FBCSR_HPP_ -#define GKO_CORE_MATRIX_FBCSR_HPP_ +#ifndef GKO_PUBLIC_CORE_MATRIX_FBCSR_HPP_ +#define GKO_PUBLIC_CORE_MATRIX_FBCSR_HPP_ #include @@ -468,4 +468,4 @@ class Fbcsr : public EnableLinOp>, } // namespace gko -#endif // GKO_CORE_MATRIX_FBCSR_HPP_ +#endif // GKO_PUBLIC_CORE_MATRIX_FBCSR_HPP_ diff --git a/include/ginkgo/core/matrix/matrix_strategies.hpp b/include/ginkgo/core/matrix/matrix_strategies.hpp index 47133b3610f..30dd715cf55 100644 --- a/include/ginkgo/core/matrix/matrix_strategies.hpp +++ b/include/ginkgo/core/matrix/matrix_strategies.hpp @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_MATRIX_MATRIX_STRATEGY_HPP_ -#define GKO_CORE_MATRIX_MATRIX_STRATEGY_HPP_ +#ifndef GKO_PUBLIC_CORE_MATRIX_MATRIX_STRATEGIES_HPP_ +#define GKO_PUBLIC_CORE_MATRIX_MATRIX_STRATEGIES_HPP_ #include @@ -500,4 +500,4 @@ void strategy_rebuild_helper(MtxType *const result) } // namespace matrix } // namespace gko -#endif +#endif // GKO_PUBLIC_CORE_MATRIX_MATRIX_STRATEGIES_HPP_ diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 82fb882c518..cafce276ae1 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -139,13 +139,15 @@ TYPED_TEST_SUITE(Fbcsr, gko::test::ValueIndexTypes); template -constexpr typename std::enable_if_t(), T> get_some_number() +constexpr typename std::enable_if_t::value, T> +get_some_number() { return static_cast(1.2); } template -constexpr typename std::enable_if_t(), T> get_some_number() +constexpr typename std::enable_if_t::value, T> +get_some_number() { using RT = gko::remove_complex; return {static_cast(1.2), static_cast(3.4)}; From 9794c6be678f9efb99c6a03359559e3ea154d903 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 2 Dec 2020 15:42:46 +0100 Subject: [PATCH 26/58] removed strategies from fbcsr --- core/matrix/fbcsr.cpp | 105 +--- core/matrix/fbcsr_builder.hpp | 5 +- core/test/matrix/fbcsr.cpp | 17 +- core/test/matrix/fbcsr_builder.cpp | 36 +- core/test/matrix/fbcsr_sample.cpp | 39 +- include/ginkgo/core/matrix/fbcsr.hpp | 113 +--- .../ginkgo/core/matrix/matrix_strategies.hpp | 503 ------------------ include/ginkgo/ginkgo.hpp | 1 - reference/test/matrix/fbcsr_kernels.cpp | 9 - 9 files changed, 36 insertions(+), 792 deletions(-) delete mode 100644 include/ginkgo/core/matrix/matrix_strategies.hpp diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 4308cb616e8..cb7b40702cb 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -82,19 +82,17 @@ GKO_REGISTER_OPERATION(outplace_absolute_array, template -Fbcsr::Fbcsr(std::shared_ptr exec, - const dim<2> &size, size_type num_nonzeros, - int block_size, - std::shared_ptr strategy) +Fbcsr::Fbcsr(const std::shared_ptr exec, + const dim<2> &size, + const size_type num_nonzeros, + const int block_size) : EnableLinOp(exec, size), bs_{block_size}, nbcols_{blockutils::getNumBlocks(block_size, size[1])}, values_(exec, num_nonzeros), col_idxs_(exec, blockutils::getNumBlocks(block_size * block_size, num_nonzeros)), - row_ptrs_(exec, blockutils::getNumBlocks(block_size, size[0]) + 1), - startrow_(exec, strategy->calc_size(num_nonzeros)), - strategy_(strategy->copy()) + row_ptrs_(exec, blockutils::getNumBlocks(block_size, size[0]) + 1) {} @@ -141,20 +139,11 @@ template void Fbcsr::convert_to( Fbcsr *const result) const { - bool same_executor = this->get_executor() == result->get_executor(); - // NOTE: as soon as strategies are improved, this can be reverted result->values_ = this->values_; result->col_idxs_ = this->col_idxs_; result->row_ptrs_ = this->row_ptrs_; - result->startrow_ = this->startrow_; result->set_size(this->get_size()); result->bs_ = this->bs_; - if (!same_executor) { - convert_strategy_helper(result); - } else { - result->set_strategy(std::move(this->get_strategy()->copy())); - } - // END NOTE } @@ -162,11 +151,7 @@ template void Fbcsr::move_to( Fbcsr *const result) { - bool same_executor = this->get_executor() == result->get_executor(); EnableLinOp::move_to(result); - if (!same_executor) { - matrix_strategy::strategy_rebuild_helper(result); - } } @@ -179,7 +164,6 @@ void Fbcsr::convert_to( result->row_ptrs_ = this->row_ptrs_; result->set_size(this->get_size()); result->bs_ = this->bs_; - convert_strategy_helper(result); } @@ -321,7 +305,7 @@ void Fbcsr::read(const mat_data &data) const std::map blocks = create_block_map(data); auto tmp = Fbcsr::create(this->get_executor()->get_master(), data.size, - blocks.size() * bs * bs, bs, this->get_strategy()); + blocks.size() * bs * bs, bs); tmp->row_ptrs_.get_data()[0] = 0; index_type cur_brow = 0, cur_bnz = 0, @@ -356,7 +340,6 @@ void Fbcsr::read(const mat_data &data) assert(cur_brow == tmp->get_size()[0] / bs); - tmp->make_srow(); tmp->move_to(this); } @@ -402,11 +385,9 @@ std::unique_ptr Fbcsr::transpose() const { auto exec = this->get_executor(); auto trans_cpy = Fbcsr::create(exec, gko::transpose(this->get_size()), - this->get_num_stored_elements(), bs_, - this->get_strategy()); + this->get_num_stored_elements(), bs_); exec->run(fbcsr::make_transpose(this, trans_cpy.get())); - trans_cpy->make_srow(); return std::move(trans_cpy); } @@ -416,11 +397,9 @@ std::unique_ptr Fbcsr::conj_transpose() const { auto exec = this->get_executor(); auto trans_cpy = Fbcsr::create(exec, gko::transpose(this->get_size()), - this->get_num_stored_elements(), bs_, - this->get_strategy()); + this->get_num_stored_elements(), bs_); exec->run(fbcsr::make_conj_transpose(this, trans_cpy.get())); - trans_cpy->make_srow(); return std::move(trans_cpy); } @@ -480,78 +459,10 @@ Fbcsr::compute_absolute() const this->get_const_values(), this->get_num_stored_elements(), abs_fbcsr->get_values())); - convert_strategy_helper(abs_fbcsr.get()); return abs_fbcsr; } -// TODO clean this up as soon as we improve strategy_type -template -template -void Fbcsr::convert_strategy_helper( - FbcsrType *const result) const -{ - auto strat = this->get_strategy().get(); - std::shared_ptr> - new_strat; - using classical = matrix_strategy::classical; - using load_balance = matrix_strategy::load_balance; - using automatic = matrix_strategy::automatic; - - if (dynamic_cast(strat)) { - new_strat = std::make_shared(); - } else { - auto rexec = result->get_executor(); - auto cuda_exec = std::dynamic_pointer_cast(rexec); - auto hip_exec = std::dynamic_pointer_cast(rexec); - auto lb = dynamic_cast(strat); - if (cuda_exec) { - if (lb) { - new_strat = std::make_shared(cuda_exec); - } else { - new_strat = std::make_shared(cuda_exec); - } - } else if (hip_exec) { - if (lb) { - new_strat = std::make_shared(hip_exec); - } else { - new_strat = std::make_shared(hip_exec); - } - } else { - // Try to preserve this executor's configuration - auto this_cuda_exec = std::dynamic_pointer_cast( - this->get_executor()); - auto this_hip_exec = std::dynamic_pointer_cast( - this->get_executor()); - if (this_cuda_exec) { - if (lb) { - new_strat = std::make_shared(this_cuda_exec); - } else { - new_strat = std::make_shared(this_cuda_exec); - } - } else if (this_hip_exec) { - if (lb) { - new_strat = std::make_shared(this_hip_exec); - } else { - new_strat = std::make_shared(this_hip_exec); - } - } else { - // We had a load balance or automatic strategy from a non - // HIP or Cuda executor and are moving to a non HIP or Cuda - // executor. - // FIXME this creates a long delay - if (lb) { - new_strat = std::make_shared(); - } else { - new_strat = std::make_shared(); - } - } - } - } - result->set_strategy(new_strat); -} - - #define GKO_DECLARE_FBCSR_MATRIX(ValueType, IndexType) \ class Fbcsr GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_MATRIX); diff --git a/core/matrix/fbcsr_builder.hpp b/core/matrix/fbcsr_builder.hpp index 62096ae27fc..b99aebe824b 100644 --- a/core/matrix/fbcsr_builder.hpp +++ b/core/matrix/fbcsr_builder.hpp @@ -75,10 +75,7 @@ class FbcsrBuilder { : matrix_{matrix} {} - /** - * Updates the internal matrix data structures at destruction. - */ - ~FbcsrBuilder() { matrix_->make_srow(); } + ~FbcsrBuilder() {} // make this type non-movable FbcsrBuilder(const FbcsrBuilder &) = delete; diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index 57b5b09e341..b32dbb76367 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -40,9 +40,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include - - #include "core/components/fixed_block.hpp" #include "core/test/matrix/fbcsr_sample.hpp" #include "core/test/utils.hpp" @@ -51,9 +48,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -namespace matstr = gko::matrix::matrix_strategy; - - template class Fbcsr : public ::testing::Test { protected: @@ -97,7 +91,6 @@ class Fbcsr : public ::testing::Test { auto v = m->get_const_values(); auto c = m->get_const_col_idxs(); auto r = m->get_const_row_ptrs(); - auto s = m->get_const_srow(); const int bs = 3; @@ -119,8 +112,6 @@ class Fbcsr : public ::testing::Test { } } } - - ASSERT_EQ(m->get_num_srow_elements(), 0); } void assert_empty(const Mtx *m) @@ -131,7 +122,6 @@ class Fbcsr : public ::testing::Test { ASSERT_EQ(m->get_const_values(), nullptr); ASSERT_EQ(m->get_const_col_idxs(), nullptr); ASSERT_NE(m->get_const_row_ptrs(), nullptr); - ASSERT_EQ(m->get_const_srow(), nullptr); } }; @@ -235,10 +225,8 @@ TYPED_TEST(Fbcsr, CanBeCreatedFromExistingData) this->exec, gko::dim<2>{nbrows * bs, nbcols * bs}, bs, gko::Array::view(this->exec, bnnz * bs * bs, values), gko::Array::view(this->exec, bnnz, col_idxs), - gko::Array::view(this->exec, nbrows + 1, row_ptrs), - std::make_shared>()); + gko::Array::view(this->exec, nbrows + 1, row_ptrs)); - ASSERT_EQ(mtx->get_num_srow_elements(), 0); ASSERT_EQ(mtx->get_const_values(), values); ASSERT_EQ(mtx->get_const_col_idxs(), col_idxs); ASSERT_EQ(mtx->get_const_row_ptrs(), row_ptrs); @@ -291,8 +279,7 @@ TYPED_TEST(Fbcsr, CanBeCleared) TYPED_TEST(Fbcsr, CanBeReadFromMatrixData) { using Mtx = typename TestFixture::Mtx; - auto m = - Mtx::create(this->exec, std::make_shared>()); + auto m = Mtx::create(this->exec); m->set_block_size(this->fbsample.bs); m->read(this->fbsample.generate_matrix_data()); diff --git a/core/test/matrix/fbcsr_builder.cpp b/core/test/matrix/fbcsr_builder.cpp index 60e6d93d6cd..684aa2e324e 100644 --- a/core/test/matrix/fbcsr_builder.cpp +++ b/core/test/matrix/fbcsr_builder.cpp @@ -57,7 +57,7 @@ class FbcsrBuilder : public ::testing::Test { protected: FbcsrBuilder() : exec(gko::ReferenceExecutor::create()), - mtx(Mtx::create(exec, gko::dim<2>{2, 3}, 4)) + mtx(Mtx::create(exec, gko::dim<2>{4, 6}, 8, 2)) {} std::shared_ptr exec; @@ -83,38 +83,4 @@ TYPED_TEST(FbcsrBuilder, ReturnsCorrectArrays) } -TYPED_TEST(FbcsrBuilder, UpdatesSrowOnDestruction) -{ - using Mtx = typename TestFixture::Mtx; - using value_type = typename TestFixture::value_type; - using index_type = typename TestFixture::index_type; - struct mock_strategy - : public gko::matrix::matrix_strategy::strategy_type { - virtual void process(const gko::Array &, - gko::Array *) override - { - *was_called = true; - } - - virtual int64_t calc_size(const int64_t nnz) override { return 0; } - - virtual std::shared_ptr copy() override - { - return std::make_shared(*was_called); - } - - mock_strategy(bool &flag) : Mtx::strategy_type(""), was_called(&flag) {} - - bool *was_called; - }; - bool was_called{}; - this->mtx->set_strategy(std::make_shared(was_called)); - was_called = false; - - gko::matrix::FbcsrBuilder{this->mtx.get()}; - - ASSERT_TRUE(was_called); -} - - } // namespace diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index a5b55a5de3a..555f2db8627 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -33,9 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include - - #include "core/components/fixed_block.hpp" #include "core/test/matrix/fbcsr_sample.hpp" @@ -49,8 +46,6 @@ namespace gko { namespace testing { -namespace matstr = gko::matrix::matrix_strategy; - /** Generates a copy of the given matrix with a different scalar type * * \tparam AbsValueType The scalar type of the output matrix @@ -63,14 +58,13 @@ generate_acopy_impl(const FbcsrType *const mat) using index_type = typename FbcsrType::index_type; using value_type = typename FbcsrType::value_type; using AbsFbcsr = gko::matrix::Fbcsr; - using classical = matstr::classical; std::shared_ptr exec = std::dynamic_pointer_cast(mat->get_executor()); std::unique_ptr amat = AbsFbcsr::create(exec, mat->get_size(), mat->get_num_stored_elements(), - mat->get_block_size(), std::make_shared()); + mat->get_block_size()); const index_type *const colidxs = mat->get_col_idxs(); const index_type *const rowptrs = mat->get_row_ptrs(); @@ -112,12 +106,11 @@ FbcsrSample::generate_fbcsr() const Fbcsr::create(exec, gko::dim<2>{static_cast(nrows), static_cast(ncols)}, - nnz, bs, std::make_shared>()); + nnz, bs); value_type *const v = mtx->get_values(); index_type *const c = mtx->get_col_idxs(); index_type *const r = mtx->get_row_ptrs(); - index_type *const s = mtx->get_srow(); r[0] = 0; r[1] = 2; r[2] = 4; @@ -154,8 +147,6 @@ FbcsrSample::generate_fbcsr() const v[34] += FBCSR_TEST_IMAGINARY; v[35] += FBCSR_TEST_IMAGINARY; - for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) s[is] = 0; - return mtx; } @@ -520,12 +511,11 @@ FbcsrSample2::generate_fbcsr() const Fbcsr::create(exec, gko::dim<2>{static_cast(nrows), static_cast(ncols)}, - nnz, bs, std::make_shared>()); + nnz, bs); value_type *const v = mtx->get_values(); index_type *const c = mtx->get_col_idxs(); index_type *const r = mtx->get_row_ptrs(); - index_type *const s = mtx->get_srow(); r[0] = 0; r[1] = 1; r[2] = 3; @@ -548,8 +538,6 @@ FbcsrSample2::generate_fbcsr() const v[14] = -2; v[15] = -11; - for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) s[is] = 0; - return mtx; } @@ -614,12 +602,11 @@ FbcsrSample2::generate_transpose_fbcsr() const Fbcsr::create(exec, gko::dim<2>{static_cast(ncols), static_cast(nrows)}, - nnz, bs, std::make_shared>()); + nnz, bs); value_type *const v = mtx->get_values(); index_type *const c = mtx->get_col_idxs(); index_type *const r = mtx->get_row_ptrs(); - index_type *const s = mtx->get_srow(); r[0] = 0; r[1] = 2; r[2] = 2; @@ -643,8 +630,6 @@ FbcsrSample2::generate_transpose_fbcsr() const v[13] = 0; v[15] = 0; - for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) s[is] = 0; - return mtx; } @@ -724,12 +709,11 @@ FbcsrSampleSquare::generate_fbcsr() const Fbcsr::create(exec, gko::dim<2>{static_cast(nrows), static_cast(ncols)}, - nnz, bs, std::make_shared>()); + nnz, bs); value_type *const v = mtx->get_values(); index_type *const c = mtx->get_col_idxs(); index_type *const r = mtx->get_row_ptrs(); - index_type *const s = mtx->get_srow(); r[0] = 0; r[1] = 1; r[2] = 2; @@ -749,12 +733,11 @@ FbcsrSampleSquare::generate_transpose_fbcsr() const Fbcsr::create(exec, gko::dim<2>{static_cast(nrows), static_cast(ncols)}, - nnz, bs, std::make_shared>()); + nnz, bs); value_type *const v = mtx->get_values(); index_type *const c = mtx->get_col_idxs(); index_type *const r = mtx->get_row_ptrs(); - index_type *const s = mtx->get_srow(); r[0] = 0; r[1] = 0; r[2] = 2; @@ -802,12 +785,11 @@ FbcsrSampleComplex::generate_fbcsr() const Fbcsr::create(exec, gko::dim<2>{static_cast(nrows), static_cast(ncols)}, - nnz, bs, std::make_shared>()); + nnz, bs); value_type *const v = mtx->get_values(); index_type *const c = mtx->get_col_idxs(); index_type *const r = mtx->get_row_ptrs(); - index_type *const s = mtx->get_srow(); r[0] = 0; r[1] = 1; r[2] = 3; @@ -831,8 +813,6 @@ FbcsrSampleComplex::generate_fbcsr() const v[14] = -2.0 - 2.15i; v[15] = -11.0 - 11.15i; - for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) s[is] = 0; - return mtx; } @@ -844,12 +824,11 @@ FbcsrSampleComplex::generate_conjtranspose_fbcsr() const Fbcsr::create(exec, gko::dim<2>{static_cast(ncols), static_cast(nrows)}, - nnz, bs, std::make_shared>()); + nnz, bs); value_type *const v = mtx->get_values(); index_type *const c = mtx->get_col_idxs(); index_type *const r = mtx->get_row_ptrs(); - index_type *const s = mtx->get_srow(); r[0] = 0; r[1] = 2; r[2] = 2; @@ -874,8 +853,6 @@ FbcsrSampleComplex::generate_conjtranspose_fbcsr() const v[13] = 0; v[15] = 0; - for (index_type is = 0; is < mtx->get_num_srow_elements(); is++) s[is] = 0; - return mtx; } diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index dcb3e0fcb6a..745bd45dbfe 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include namespace gko { @@ -137,9 +136,6 @@ class Fbcsr : public EnableLinOp>, using mat_data = matrix_data; using absolute_type = remove_complex; - using strategy_type = - matrix_strategy::strategy_type>; - void convert_to(Fbcsr *result) const override; @@ -276,39 +272,6 @@ class Fbcsr : public EnableLinOp>, return row_ptrs_.get_const_data(); } - /** - * @return The starting row for each 'team' of threads - */ - index_type *get_srow() noexcept { return startrow_.get_data(); } - - /** - * @see get_const_srow - */ - const index_type *get_srow() const noexcept - { - return startrow_.get_const_data(); - } - - /** - * @copydoc Fbcsr::get_srow() - * - * @note This is the constant version of the function, which can be - * significantly more memory efficient than the non-constant version, - * so always prefer this version. - */ - const index_type *get_const_srow() const noexcept - { - return startrow_.get_const_data(); - } - - /** - * @return The number of the srow stored elements (involved warps) - */ - size_type get_num_srow_elements() const noexcept - { - return startrow_.get_num_elems(); - } - /** * Returns the number of elements explicitly stored in the matrix. * @@ -319,25 +282,6 @@ class Fbcsr : public EnableLinOp>, return values_.get_num_elems(); } - /** - * @return The strategy - */ - std::shared_ptr get_strategy() const noexcept - { - return strategy_; - } - - /** - * Set the strategy - * - * @param strategy the fbcsr strategy - */ - void set_strategy(std::shared_ptr strategy) - { - strategy_ = std::move(strategy->copy()); - this->make_srow(); - } - /** * @return The fixed block size for this matrix */ @@ -364,17 +308,13 @@ class Fbcsr : public EnableLinOp>, index_type get_num_block_cols() const { return nbcols_; } protected: - using classical = matrix_strategy::classical>; - /** * Creates an uninitialized FBCSR matrix with a block size of 1. * * @param exec Executor associated to the matrix - * @param strategy the strategy of FBCSR */ - Fbcsr(std::shared_ptr exec, - std::shared_ptr strategy) - : Fbcsr(std::move(exec), dim<2>{}, {}, 1, std::move(strategy)) + Fbcsr(std::shared_ptr exec) + : Fbcsr(std::move(exec), dim<2>{}, {}, 1) {} /** @@ -384,12 +324,12 @@ class Fbcsr : public EnableLinOp>, * @param size size of the matrix * @param num_nonzeros number of nonzeros * @param block_size Size of the small dense square blocks - * @param strategy the strategy of FBCSR */ - Fbcsr(std::shared_ptr exec, const dim<2> &size = dim<2>{}, - size_type num_nonzeros = {}, int block_size = 1, - std::shared_ptr strategy = - std::make_shared()); + // Fbcsr(std::shared_ptr exec, const dim<2> &size = + // dim<2>{}, + // size_type num_nonzeros = {}, int block_size = 1); + Fbcsr(std::shared_ptr exec, const dim<2> &size, + size_type num_nonzeros, int block_size); /** * Creates a FBCSR matrix from already allocated (and initialized) row @@ -414,24 +354,19 @@ class Fbcsr : public EnableLinOp>, */ template - Fbcsr( - std::shared_ptr exec, const dim<2> &size, - int block_size, ValuesArray &&values, ColIdxsArray &&col_idxs, - RowPtrsArray &&row_ptrs, - std::shared_ptr strategy = std::make_shared()) + Fbcsr(std::shared_ptr exec, const dim<2> &size, + int block_size, ValuesArray &&values, ColIdxsArray &&col_idxs, + RowPtrsArray &&row_ptrs) : EnableLinOp(exec, size), bs_{block_size}, nbcols_{blockutils::getNumBlocks(block_size, size[1])}, values_{exec, std::forward(values)}, col_idxs_{exec, std::forward(col_idxs)}, - row_ptrs_{exec, std::forward(row_ptrs)}, - startrow_(exec), - strategy_(strategy->copy()) + row_ptrs_{exec, std::forward(row_ptrs)} { GKO_ASSERT_EQ(values_.get_num_elems(), col_idxs_.get_num_elems() * bs_ * bs_); GKO_ASSERT_EQ(this->get_size()[0] / bs_ + 1, row_ptrs_.get_num_elems()); - this->make_srow(); } void apply_impl(const LinOp *b, LinOp *x) const override; @@ -439,28 +374,12 @@ class Fbcsr : public EnableLinOp>, void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const override; - // TODO clean this up as soon as we improve strategy_type - template - void convert_strategy_helper(FbcsrType *result) const; - - /** - * Computes srow. It should be run after changing any row pointer. - */ - void make_srow() - { - startrow_.resize_and_reset( - strategy_->calc_size(col_idxs_.get_num_elems())); - strategy_->process(row_ptrs_, &startrow_); - } - private: - int bs_; ///< Block size - size_type nbcols_; ///< Number of block-columns - Array values_; - Array col_idxs_; - Array row_ptrs_; - Array startrow_; - std::shared_ptr strategy_; + int bs_; ///< Block size + size_type nbcols_; ///< Number of block-columns + Array values_; ///< Non-zero values of all blocks + Array col_idxs_; ///< Block-column indices of all blocks + Array row_ptrs_; ///< Block-row pointers into @ref col_idxs_ }; diff --git a/include/ginkgo/core/matrix/matrix_strategies.hpp b/include/ginkgo/core/matrix/matrix_strategies.hpp deleted file mode 100644 index 30dd715cf55..00000000000 --- a/include/ginkgo/core/matrix/matrix_strategies.hpp +++ /dev/null @@ -1,503 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#ifndef GKO_PUBLIC_CORE_MATRIX_MATRIX_STRATEGIES_HPP_ -#define GKO_PUBLIC_CORE_MATRIX_MATRIX_STRATEGIES_HPP_ - - -#include -#include - - -namespace gko { -namespace matrix { - - -namespace matrix_strategy { - - -template -class automatic; - -/** - * strategy_type is to decide how map the work-items to execution units - * - * The practical strategy method should inherit strategy_type and implement - * its `process`, `calc_size` function and the corresponding device kernel. - */ -template -class strategy_type { - friend class automatic; - -public: - using index_type = typename MtxType::index_type; - - /** - * Creates a strategy_type. - * - * @param name the name of strategy - */ - strategy_type(std::string name) : name_(name) {} - - /** - * Returns the name of strategy - * - * @return the name of strategy - */ - std::string get_name() { return name_; } - - /** - * Computes srow according to row pointers. - * - * @param mtx_row_ptrs the row pointers of the matrix - * @param mtx_srow the srow of the matrix - */ - virtual void process(const Array &mtx_row_ptrs, - Array *mtx_srow) = 0; - - /** - * Computes the srow size according to the number of nonzeros. - * - * @param nnz the number of nonzeros - * - * @return the size of srow - */ - virtual int64_t calc_size(const int64_t nnz) = 0; - - /** - * Copy a strategy. This is a workaround until strategies are revamped, - * since strategies like `automatic` do not work when actually shared. - */ - virtual std::shared_ptr copy() = 0; - -protected: - void set_name(std::string name) { name_ = name; } - -private: - std::string name_; -}; - -/** - * classical is a strategy_type which uses the same number of threads on - * each block-row. Classical strategy uses multithreads to calculate on parts of - * rows and then do a reduction of these threads results. The number of - * threads per row depends on the max number of stored elements per row. - */ -template -class classical : public strategy_type { -public: - using index_type = typename strategy_type::index_type; - - /** - * Creates a classical strategy. - */ - classical() : strategy_type("classical"), max_length_per_row_(0) {} - - void process(const Array &mtx_row_ptrs, - Array *mtx_srow) override - { - auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master(); - Array row_ptrs_host(host_mtx_exec); - const bool is_mtx_on_host{host_mtx_exec == mtx_row_ptrs.get_executor()}; - const index_type *row_ptrs{}; - if (is_mtx_on_host) { - row_ptrs = mtx_row_ptrs.get_const_data(); - } else { - row_ptrs_host = mtx_row_ptrs; - row_ptrs = row_ptrs_host.get_const_data(); - } - auto num_rows = mtx_row_ptrs.get_num_elems() - 1; - max_length_per_row_ = 0; - for (index_type i = 1; i < num_rows + 1; i++) { - max_length_per_row_ = - std::max(max_length_per_row_, row_ptrs[i] - row_ptrs[i - 1]); - } - } - - int64_t calc_size(const int64_t nnz) override { return 0; } - - index_type get_max_length_per_row() const noexcept - { - return max_length_per_row_; - } - - std::shared_ptr> copy() override - { - return std::make_shared>(); - } - -private: - index_type max_length_per_row_; -}; - -/** - * load_balance is a strategy_type which uses the load balance algorithm. - */ -template -class load_balance : public strategy_type { -public: - using index_type = typename strategy_type::index_type; - - /** - * Creates a load_balance strategy. - */ - load_balance() - : load_balance(std::move( - gko::CudaExecutor::create(0, gko::OmpExecutor::create()))) - {} - - /** - * Creates a load_balance strategy with CUDA executor. - * - * @param exec the CUDA executor - */ - load_balance(std::shared_ptr exec) - : load_balance(exec->get_num_warps(), exec->get_warp_size()) - {} - - /** - * Creates a load_balance strategy with HIP executor. - * - * @param exec the HIP executor - */ - load_balance(std::shared_ptr exec) - : load_balance(exec->get_num_warps(), exec->get_warp_size(), false) - {} - - /** - * Creates a load_balance strategy with specified parameters - * - * @param nwarps The number of warps in the executor - * @param warp_size The warp size of the executor - * @param cuda_params Whether Nvidia-based warp parameters should be used. - * - * @note The warp_size must be the size of full warp. When using this - * constructor, set_strategy needs to be called with correct - * parameters which is replaced during the conversion. - */ - load_balance(int64_t nwarps, int warp_size = 32, bool cuda_params = true) - : strategy_type("load_balance"), - nwarps_(nwarps), - warp_size_(warp_size), - cuda_params_(cuda_params) - {} - - void process(const Array &mtx_row_ptrs, - Array *mtx_srow) override - { - auto nwarps = mtx_srow->get_num_elems(); - - if (nwarps > 0) { - auto host_srow_exec = mtx_srow->get_executor()->get_master(); - auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master(); - const bool is_srow_on_host{host_srow_exec == - mtx_srow->get_executor()}; - const bool is_mtx_on_host{host_mtx_exec == - mtx_row_ptrs.get_executor()}; - Array row_ptrs_host(host_mtx_exec); - Array srow_host(host_srow_exec); - const index_type *row_ptrs{}; - index_type *srow{}; - if (is_srow_on_host) { - srow = mtx_srow->get_data(); - } else { - srow_host = *mtx_srow; - srow = srow_host.get_data(); - } - if (is_mtx_on_host) { - row_ptrs = mtx_row_ptrs.get_const_data(); - } else { - row_ptrs_host = mtx_row_ptrs; - row_ptrs = row_ptrs_host.get_const_data(); - } - for (size_type i = 0; i < nwarps; i++) { - srow[i] = 0; - } - const size_type num_rows = mtx_row_ptrs.get_num_elems() - 1; - const index_type num_elems = row_ptrs[num_rows]; - for (size_type i = 0; i < num_rows; i++) { - const auto num = - (ceildiv(row_ptrs[i + 1], warp_size_) * nwarps); - const auto den = ceildiv(num_elems, warp_size_); - auto bucket = ceildiv(num, den); - if (bucket < nwarps) { - srow[bucket]++; - } - } - // find starting row for thread i - for (size_type i = 1; i < nwarps; i++) { - srow[i] += srow[i - 1]; - } - if (!is_srow_on_host) { - *mtx_srow = srow_host; - } - } - } - - int64_t calc_size(const int64_t nnz) override - { - if (warp_size_ > 0) { - int multiple = 8; - if (nnz >= 2e8) { - multiple = 2048; - } else if (nnz >= 2e7) { - multiple = 512; - } else if (nnz >= 2e6) { - multiple = 128; - } else if (nnz >= 2e5) { - multiple = 32; - } - -#if GINKGO_HIP_PLATFORM_HCC - if (!cuda_params_) { - multiple = 8; - if (nnz >= 1e7) { - multiple = 64; - } else if (nnz >= 1e6) { - multiple = 16; - } - } -#endif // GINKGO_HIP_PLATFORM_HCC - - auto nwarps = nwarps_ * multiple; - return min(ceildiv(nnz, warp_size_), int64_t(nwarps)); - } else { - return 0; - } - } - - std::shared_ptr> copy() override - { - return std::make_shared>(nwarps_, warp_size_, - cuda_params_); - } - -private: - int64_t nwarps_; - int warp_size_; - bool cuda_params_; -}; - -template -class automatic : public strategy_type { -public: - using index_type = typename strategy_type::index_type; - - /* Use imbalance strategy when the maximum number of nonzero per row is - * more than 1024 on NVIDIA hardware */ - const index_type nvidia_row_len_limit = 1024; - /* Use imbalance strategy when the matrix has more more than 1e6 on - * NVIDIA hardware */ - const index_type nvidia_nnz_limit = 1e6; - /* Use imbalance strategy when the maximum number of nonzero per row is - * more than 768 on AMD hardware */ - const index_type amd_row_len_limit = 768; - /* Use imbalance strategy when the matrix has more more than 1e8 on AMD - * hardware */ - const index_type amd_nnz_limit = 1e8; - - /** - * Creates an automatic strategy. - */ - automatic() - : automatic(std::move( - gko::CudaExecutor::create(0, gko::OmpExecutor::create()))) - {} - - /** - * Creates an automatic strategy with CUDA executor. - * - * @param exec the CUDA executor - */ - automatic(std::shared_ptr exec) - : automatic(exec->get_num_warps(), exec->get_warp_size()) - {} - - /** - * Creates an automatic strategy with HIP executor. - * - * @param exec the HIP executor - */ - automatic(std::shared_ptr exec) - : automatic(exec->get_num_warps(), exec->get_warp_size(), false) - {} - - /** - * Creates an automatic strategy with specified parameters - * - * @param nwarps the number of warps in the executor - * @param warp_size the warp size of the executor - * @param cuda_strategy whether the `cuda_strategy` needs to be used. - * - * @note The warp_size must be the size of full warp. When using this - * constructor, set_strategy needs to be called with correct - * parameters which is replaced during the conversion. - */ - automatic(int64_t nwarps, int warp_size = 32, bool cuda_strategy = true) - : strategy_type("automatic"), - nwarps_(nwarps), - warp_size_(warp_size), - cuda_strategy_(cuda_strategy), - max_length_per_row_(0) - {} - - void process(const Array &mtx_row_ptrs, - Array *mtx_srow) override - { - // if the number of stored elements is larger than or - // the maximum number of stored elements per row is larger than - // , use load_balance otherwise use classical - index_type nnz_limit = nvidia_nnz_limit; - index_type row_len_limit = nvidia_row_len_limit; -#if GINKGO_HIP_PLATFORM_HCC - if (!cuda_strategy_) { - nnz_limit = amd_nnz_limit; - row_len_limit = amd_row_len_limit; - } -#endif // GINKGO_HIP_PLATFORM_HCC - auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master(); - const bool is_mtx_on_host{host_mtx_exec == mtx_row_ptrs.get_executor()}; - Array row_ptrs_host(host_mtx_exec); - const index_type *row_ptrs{}; - if (is_mtx_on_host) { - row_ptrs = mtx_row_ptrs.get_const_data(); - } else { - row_ptrs_host = mtx_row_ptrs; - row_ptrs = row_ptrs_host.get_const_data(); - } - const auto num_rows = mtx_row_ptrs.get_num_elems() - 1; - if (row_ptrs[num_rows] > nnz_limit) { - load_balance actual_strategy(nwarps_, warp_size_, - cuda_strategy_); - if (is_mtx_on_host) { - actual_strategy.process(mtx_row_ptrs, mtx_srow); - } else { - actual_strategy.process(row_ptrs_host, mtx_srow); - } - this->set_name(actual_strategy.get_name()); - } else { - index_type maxnum = 0; - for (index_type i = 1; i < num_rows + 1; i++) { - maxnum = std::max(maxnum, row_ptrs[i] - row_ptrs[i - 1]); - } - if (maxnum > row_len_limit) { - load_balance actual_strategy(nwarps_, warp_size_, - cuda_strategy_); - if (is_mtx_on_host) { - actual_strategy.process(mtx_row_ptrs, mtx_srow); - } else { - actual_strategy.process(row_ptrs_host, mtx_srow); - } - this->set_name(actual_strategy.get_name()); - } else { - classical actual_strategy; - if (is_mtx_on_host) { - actual_strategy.process(mtx_row_ptrs, mtx_srow); - max_length_per_row_ = - actual_strategy.get_max_length_per_row(); - } else { - actual_strategy.process(row_ptrs_host, mtx_srow); - max_length_per_row_ = - actual_strategy.get_max_length_per_row(); - } - this->set_name(actual_strategy.get_name()); - } - } - } - - int64_t calc_size(const int64_t nnz) override - { - return std::make_shared>(nwarps_, warp_size_, - cuda_strategy_) - ->calc_size(nnz); - } - - index_type get_max_length_per_row() const noexcept - { - return max_length_per_row_; - } - - std::shared_ptr> copy() override - { - return std::make_shared>(nwarps_, warp_size_, - cuda_strategy_); - } - -private: - int64_t nwarps_; - int warp_size_; - bool cuda_strategy_; - index_type max_length_per_row_; -}; - - -/** - * When strategy is load_balance or automatic, rebuild the strategy - * according to executor's property. - * - * @param result the matrix. - */ -template -void strategy_rebuild_helper(MtxType *const result) -{ - // TODO (script:fbcsr): change the code imported from matrix/csr if needed - // using load_balance = typename Fbcsr::load_balance; - // using automatic = typename Fbcsr::automatic; - auto strategy = result->get_strategy(); - auto executor = result->get_executor(); - if (std::dynamic_pointer_cast>(strategy)) { - if (auto exec = - std::dynamic_pointer_cast(executor)) { - result->set_strategy(std::make_shared>(exec)); - } else if (auto exec = std::dynamic_pointer_cast( - executor)) { - result->set_strategy(std::make_shared>(exec)); - } - } else if (std::dynamic_pointer_cast>(strategy)) { - if (auto exec = - std::dynamic_pointer_cast(executor)) { - result->set_strategy(std::make_shared>(exec)); - } else if (auto exec = std::dynamic_pointer_cast( - executor)) { - result->set_strategy(std::make_shared>(exec)); - } - } -} - - -} // namespace matrix_strategy - - -} // namespace matrix -} // namespace gko - -#endif // GKO_PUBLIC_CORE_MATRIX_MATRIX_STRATEGIES_HPP_ diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 541980beaa3..38ac3fd861c 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -85,7 +85,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include #include #include diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index cafce276ae1..368ada19b45 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -47,7 +47,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include @@ -59,9 +58,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -namespace matstr = gko::matrix::matrix_strategy; - - template class Fbcsr : public ::testing::Test { protected: @@ -344,8 +340,6 @@ TYPED_TEST(Fbcsr, ConvertsToPrecision) tmp->convert_to(res.get()); GKO_ASSERT_MTX_NEAR(this->mtx, res, residual); - // ASSERT_EQ(typeid(*this->mtx->get_strategy()), - // typeid(*res->get_strategy())); } @@ -367,8 +361,6 @@ TYPED_TEST(Fbcsr, MovesToPrecision) tmp->move_to(res.get()); GKO_ASSERT_MTX_NEAR(this->mtx, res, residual); - // ASSERT_EQ(typeid(*this->mtx->get_strategy()), - // typeid(*res->get_strategy())); } @@ -595,7 +587,6 @@ TYPED_TEST(Fbcsr, RecognizeUnsortedMatrix) using Fbcsr = typename TestFixture::Mtx; using index_type = typename TestFixture::index_type; - // auto cpmat = Fbcsr::create(this->exec, this->mtx->get_strategy()); auto cpmat = this->mtx->clone(); index_type *const colinds = cpmat->get_col_idxs(); std::swap(colinds[0], colinds[1]); From 2d3f5837a44a0dae00091817338d5df58965ff49 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 2 Dec 2020 16:26:50 +0100 Subject: [PATCH 27/58] tolerance for absolute tests is the respective epsilon --- reference/test/matrix/fbcsr_kernels.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 368ada19b45..4feaf5d1e5d 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -620,12 +620,15 @@ TYPED_TEST(Fbcsr, InplaceAbsolute) mtx->compute_absolute_inplace(); - GKO_ASSERT_MTX_NEAR(mtx, refabs, 0.0); + const gko::remove_complex tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(mtx, refabs, tolerance); } TYPED_TEST(Fbcsr, OutplaceAbsolute) { + using value_type = typename TestFixture::value_type; using Mtx = typename TestFixture::Mtx; using AbsMtx = typename gko::remove_complex; @@ -635,7 +638,9 @@ TYPED_TEST(Fbcsr, OutplaceAbsolute) auto abs_mtx = mtx->compute_absolute(); - GKO_ASSERT_MTX_NEAR(abs_mtx, refabs, 0.0); + const gko::remove_complex tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(abs_mtx, refabs, tolerance); } @@ -691,7 +696,9 @@ TYPED_TEST(FbcsrComplex, InplaceAbsolute) mtx->compute_absolute_inplace(); - GKO_ASSERT_MTX_NEAR(mtx, refabs, 0.0); + const gko::remove_complex tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(mtx, refabs, tolerance); } @@ -711,7 +718,9 @@ TYPED_TEST(FbcsrComplex, OutplaceAbsolute) auto abs_mtx = mtx->compute_absolute(); - GKO_ASSERT_MTX_NEAR(abs_mtx, refabs, 0.0); + const gko::remove_complex tolerance = + std::numeric_limits>::epsilon(); + GKO_ASSERT_MTX_NEAR(abs_mtx, refabs, tolerance); } From af9fb1fd3e8aeb8185c60556ba1a37f3a3de7b70 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Fri, 4 Dec 2020 19:11:42 +0100 Subject: [PATCH 28/58] incorporated Terry's comments about allocation and docstrings --- core/components/fixed_block.hpp | 51 ++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index 14c035fc965..c3d21b8a171 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -34,26 +34,31 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_COMPONENTS_FIXED_BLOCK_HPP_ +#include #include +#include #include #include + namespace gko { namespace blockutils { -/// A dense block of values with compile-time constant dimensions -/** The blocks are stored row-major. However, in future, +/** + * @brief A dense block of values with compile-time constant dimensions + * + * The blocks are interpreted as row-major. However, in the future, * a layout template parameter can be added if needed. * * The primary use is to reinterpret subsets of entries in a big array as * small dense blocks. * - * @tparam ValueType The numeric type of entries of the block - * @tparam nrows Number of rows - * @tparam ncols Number of columns + * @tparam ValueType The numeric type of entries of the block + * @tparam nrows Number of rows + * @tparam ncols Number of columns */ template class FixedBlock final { @@ -89,7 +94,7 @@ class FixedBlock final { /** - * A lightweight dynamic block type for the host space + * A lightweight dynamic block type on the host * * @tparam ValueType The numeric type of entries of the block */ @@ -98,16 +103,16 @@ class DenseBlock final { public: using value_type = ValueType; - DenseBlock() : nrows_{0}, ncols_{0}, vals_{nullptr} {} + /** + * If this default construtor is used, @ref set_executor must be called + * prior to any other member function. + */ + DenseBlock() : nrows_{0}, ncols_{0} {} DenseBlock(const int num_rows, const int num_cols) - : nrows_{num_rows}, - ncols_{num_cols}, - vals_{new value_type[num_rows * num_cols]} + : nrows_{num_rows}, ncols_{num_cols}, vals_(num_rows * num_cols) {} - ~DenseBlock() { delete[] vals_; } - value_type &at(const int row, const int col) { return vals_[row * ncols_ + col]; @@ -132,33 +137,32 @@ class DenseBlock final { void resize(const int nrows, const int ncols) { - if (nrows * ncols != nrows_ * ncols_) { - delete[] vals_; - vals_ = new value_type[nrows * ncols]; - } + vals_.resize(nrows * ncols); nrows_ = nrows; ncols_ = ncols; } void zero() { - for (int i = 0; i < nrows_ * ncols_; i++) - vals_[i] = static_cast(0); + std::fill(vals_.begin(), vals_.end(), static_cast(0)); } private: int nrows_; int ncols_; - value_type *vals_; + std::vector vals_; }; -/// A view into a an array of dense blocks of some runtime-defined size -/** Note that accessing BSR values using this type of view abstracts away the + +/** + * @brief A view into a an array of dense blocks of some runtime-defined size + * + * Accessing BSR values using this type of view abstracts away the * storage layout within the individual blocks, as long as all blocks use the * same layout. For now, row-major blocks are assumed. * - * @tparam ValueType The numeric type of entries of the block - * @tparam IndexType The type of integer used to identify the different blocks + * @tparam ValueType The numeric type of entries of the block + * @tparam IndexType The type of integer used to identify the different blocks */ template class DenseBlocksView final { @@ -209,4 +213,5 @@ class DenseBlocksView final { } // namespace blockutils } // namespace gko + #endif // GKO_CORE_COMPONENTS_FIXED_BLOCK_HPP_ From bac1d9e07f9391112db77c04d2ed0129f505faf5 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Mon, 7 Dec 2020 17:06:28 +0100 Subject: [PATCH 29/58] addressed Pratik's comments and some SonarCloud suggestions --- core/components/fixed_block.hpp | 10 +--- core/matrix/fbcsr.cpp | 21 +++---- core/matrix/fbcsr_builder.hpp | 2 +- include/ginkgo/core/matrix/fbcsr.hpp | 3 - reference/matrix/fbcsr_kernels.cpp | 80 ++++++++++++------------- reference/test/matrix/fbcsr_kernels.cpp | 2 - 6 files changed, 53 insertions(+), 65 deletions(-) diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index c3d21b8a171..c41386b9964 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -103,11 +103,7 @@ class DenseBlock final { public: using value_type = ValueType; - /** - * If this default construtor is used, @ref set_executor must be called - * prior to any other member function. - */ - DenseBlock() : nrows_{0}, ncols_{0} {} + DenseBlock() {} DenseBlock(const int num_rows, const int num_cols) : nrows_{num_rows}, ncols_{num_cols}, vals_(num_rows * num_cols) @@ -148,8 +144,8 @@ class DenseBlock final { } private: - int nrows_; - int ncols_; + int nrows_ = 0; + int ncols_ = 0; std::vector vals_; }; diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index cb7b40702cb..51950dc96fe 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -254,7 +254,7 @@ void Fbcsr::read(const mat_data &data) std::to_string(__LINE__) + ": List of nonzeros is too big!"); - const index_type nnz = static_cast(data.nonzeros.size()); + const auto nnz = static_cast(data.nonzeros.size()); const int bs = this->bs_; @@ -275,15 +275,15 @@ void Fbcsr::read(const mat_data &data) } }; - auto create_block_map = [nnz, bs](const mat_data &data) { + auto create_block_map = [nnz, bs](const mat_data &mdata) { std::map blocks; for (index_type inz = 0; inz < nnz; inz++) { - const index_type row = data.nonzeros[inz].row; - const index_type col = data.nonzeros[inz].column; - const value_type val = data.nonzeros[inz].value; + const index_type row = mdata.nonzeros[inz].row; + const index_type col = mdata.nonzeros[inz].column; + const value_type val = mdata.nonzeros[inz].value; - const int localrow = static_cast(row % bs); - const int localcol = static_cast(col % bs); + const auto localrow = static_cast(row % bs); + const auto localcol = static_cast(col % bs); const index_type blockrow = row / bs; const index_type blockcol = col / bs; @@ -308,8 +308,9 @@ void Fbcsr::read(const mat_data &data) blocks.size() * bs * bs, bs); tmp->row_ptrs_.get_data()[0] = 0; - index_type cur_brow = 0, cur_bnz = 0, - cur_bcol = blocks.begin()->first.block_column; + index_type cur_brow = 0; + index_type cur_bnz = 0; + index_type cur_bcol = blocks.begin()->first.block_column; const index_type num_brows = data.size[0] / bs; blockutils::DenseBlocksView values( @@ -361,7 +362,7 @@ void Fbcsr::write(mat_data &data) const const blockutils::DenseBlocksView vblocks( tmp->values_.get_const_data(), bs_, bs_); - for (size_type brow = 0; brow < tmp->get_size()[0] / bs_; ++brow) { + for (size_type brow = 0; brow < tmp->get_num_block_rows(); ++brow) { const auto start = tmp->row_ptrs_.get_const_data()[brow]; const auto end = tmp->row_ptrs_.get_const_data()[brow + 1]; diff --git a/core/matrix/fbcsr_builder.hpp b/core/matrix/fbcsr_builder.hpp index b99aebe824b..e80d76b3eeb 100644 --- a/core/matrix/fbcsr_builder.hpp +++ b/core/matrix/fbcsr_builder.hpp @@ -75,7 +75,7 @@ class FbcsrBuilder { : matrix_{matrix} {} - ~FbcsrBuilder() {} + ~FbcsrBuilder() = default; // make this type non-movable FbcsrBuilder(const FbcsrBuilder &) = delete; diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 745bd45dbfe..409a6fc5130 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -325,9 +325,6 @@ class Fbcsr : public EnableLinOp>, * @param num_nonzeros number of nonzeros * @param block_size Size of the small dense square blocks */ - // Fbcsr(std::shared_ptr exec, const dim<2> &size = - // dim<2>{}, - // size_type num_nonzeros = {}, int block_size = 1); Fbcsr(std::shared_ptr exec, const dim<2> &size, size_type num_nonzeros, int block_size); diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index c59f8a17e52..c3532238db6 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -66,13 +66,13 @@ namespace fbcsr { template -void spmv(const std::shared_ptr exec, +void spmv(const std::shared_ptr, const matrix::Fbcsr *const a, const matrix::Dense *const b, matrix::Dense *const c) { const int bs = a->get_block_size(); - const IndexType nvecs = static_cast(b->get_size()[1]); + const auto nvecs = static_cast(b->get_size()[1]); const IndexType nbrows = a->get_num_block_rows(); auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); @@ -80,11 +80,12 @@ void spmv(const std::shared_ptr exec, const blockutils::DenseBlocksView avalues( vals, bs, bs); - ValueType *const cvals = c->get_values(); + auto *const cvals = c->get_values(); for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { - const IndexType crowblkend = (ibrow + 1) * bs * nvecs; - for (IndexType i = ibrow * bs * nvecs; i < crowblkend; i++) + // const IndexType crowblkend = (ibrow + 1) * bs * nvecs; + for (IndexType i = ibrow * bs * nvecs; i < (ibrow + 1) * bs * nvecs; + ++i) cvals[i] = zero(); for (IndexType inz = row_ptrs[ibrow]; inz < row_ptrs[ibrow + 1]; @@ -106,7 +107,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); template -void advanced_spmv(const std::shared_ptr exec, +void advanced_spmv(const std::shared_ptr, const matrix::Dense *const alpha, const matrix::Fbcsr *const a, const matrix::Dense *const b, @@ -114,7 +115,7 @@ void advanced_spmv(const std::shared_ptr exec, matrix::Dense *const c) { const int bs = a->get_block_size(); - const IndexType nvecs = static_cast(b->get_size()[1]); + const auto nvecs = static_cast(b->get_size()[1]); const IndexType nbrows = a->get_num_block_rows(); auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); @@ -124,11 +125,11 @@ void advanced_spmv(const std::shared_ptr exec, const blockutils::DenseBlocksView avalues( vals, bs, bs); - ValueType *const cvals = c->get_values(); + auto *const cvals = c->get_values(); for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { - const IndexType crowblkend = (ibrow + 1) * bs * nvecs; - for (IndexType i = ibrow * bs * nvecs; i < crowblkend; i++) + for (IndexType i = ibrow * bs * nvecs; i < (ibrow + 1) * bs * nvecs; + ++i) cvals[i] *= vbeta; for (IndexType inz = row_ptrs[ibrow]; inz < row_ptrs[ibrow + 1]; @@ -151,13 +152,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_dense(const std::shared_ptr exec, +void convert_to_dense(const std::shared_ptr, const matrix::Fbcsr *const source, matrix::Dense *const result) { const int bs = source->get_block_size(); - const size_type nbrows = source->get_num_block_rows(); - const size_type nbcols = source->get_num_block_cols(); + const IndexType nbrows = source->get_num_block_rows(); + const IndexType nbcols = source->get_num_block_cols(); const IndexType *const row_ptrs = source->get_const_row_ptrs(); const IndexType *const col_idxs = source->get_const_col_idxs(); const ValueType *const vals = source->get_const_values(); @@ -189,13 +190,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_csr(const std::shared_ptr exec, +void convert_to_csr(const std::shared_ptr, const matrix::Fbcsr *const source, matrix::Csr *const result) { const int bs = source->get_block_size(); - const size_type nbrows = source->get_num_block_rows(); - const size_type nbcols = source->get_num_block_cols(); + const IndexType nbrows = source->get_num_block_rows(); + const IndexType nbcols = source->get_num_block_cols(); const IndexType *const browptrs = source->get_const_row_ptrs(); const IndexType *const bcolinds = source->get_const_col_idxs(); const ValueType *const bvals = source->get_const_values(); @@ -236,7 +237,8 @@ void convert_to_csr(const std::shared_ptr exec, } } - row_ptrs[source->get_size()[0]] = source->get_num_stored_elements(); + row_ptrs[source->get_size()[0]] = + static_cast(source->get_num_stored_elements()); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -245,7 +247,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_fbcsr_to_fbcsc(const size_type num_blk_rows, const int blksz, +void convert_fbcsr_to_fbcsc(const IndexType num_blk_rows, const int blksz, const IndexType *const row_ptrs, const IndexType *const col_idxs, const ValueType *const fbcsr_vals, @@ -257,13 +259,13 @@ void convert_fbcsr_to_fbcsc(const size_type num_blk_rows, const int blksz, fbcsr_vals, blksz, blksz); gko::blockutils::DenseBlocksView cvalues( csc_vals, blksz, blksz); - for (size_type brow = 0; brow < num_blk_rows; ++brow) { + for (IndexType brow = 0; brow < num_blk_rows; ++brow) { for (auto i = row_ptrs[brow]; i < row_ptrs[brow + 1]; ++i) { - const auto dest_idx = col_ptrs[col_idxs[i]]++; + const auto dest_idx = col_ptrs[col_idxs[i]]; + col_ptrs[col_idxs[i]]++; row_idxs[dest_idx] = brow; for (int ib = 0; ib < blksz; ib++) for (int jb = 0; jb < blksz; jb++) - // csc_vals[dest_idx] = op(fbcsr_vals[i]); cvalues(dest_idx, ib, jb) = op(transpose_blocks ? rvalues(i, jb, ib) : rvalues(i, ib, jb)); @@ -274,7 +276,6 @@ void convert_fbcsr_to_fbcsc(const size_type num_blk_rows, const int blksz, template void transpose_and_transform( - const std::shared_ptr exec, matrix::Fbcsr *const trans, const matrix::Fbcsr *const orig, UnaryOperator op) { @@ -286,10 +287,8 @@ void transpose_and_transform( auto trans_vals = trans->get_values(); auto orig_vals = orig->get_const_values(); - auto orig_num_cols = orig->get_size()[1]; - const size_type nbcols = orig->get_num_block_cols(); - auto orig_num_rows = orig->get_size()[0]; - const size_type nbrows = orig->get_num_block_rows(); + const IndexType nbcols = orig->get_num_block_cols(); + const IndexType nbrows = orig->get_num_block_rows(); auto orig_nbnz = orig_row_ptrs[nbrows]; trans_row_ptrs[0] = 0; @@ -302,12 +301,11 @@ void transpose_and_transform( template -void transpose(std::shared_ptr exec, +void transpose(std::shared_ptr, const matrix::Fbcsr *const orig, matrix::Fbcsr *const trans) { - transpose_and_transform(exec, trans, orig, - [](const ValueType x) { return x; }); + transpose_and_transform(trans, orig, [](const ValueType x) { return x; }); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -315,11 +313,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void conj_transpose(std::shared_ptr exec, +void conj_transpose(std::shared_ptr, const matrix::Fbcsr *const orig, matrix::Fbcsr *const trans) { - transpose_and_transform(exec, trans, orig, + transpose_and_transform(trans, orig, [](const ValueType x) { return conj(x); }); } @@ -329,7 +327,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row( - std::shared_ptr exec, + std::shared_ptr, const matrix::Fbcsr *const source, size_type *const result) { @@ -353,7 +351,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row( - std::shared_ptr exec, + std::shared_ptr, const matrix::Fbcsr *source, Array *result) { const auto row_ptrs = source->get_const_row_ptrs(); @@ -373,14 +371,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( - std::shared_ptr exec, + std::shared_ptr, const matrix::Fbcsr *const to_check, bool *const is_sorted) { const auto row_ptrs = to_check->get_const_row_ptrs(); const auto col_idxs = to_check->get_const_col_idxs(); - const auto size = to_check->get_size(); - const int bs = to_check->get_block_size(); const size_type nbrows = to_check->get_num_block_rows(); for (size_type i = 0; i < nbrows; ++i) { @@ -400,7 +396,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void extract_diagonal(std::shared_ptr exec, +void extract_diagonal(std::shared_ptr, const matrix::Fbcsr *const orig, matrix::Diagonal *const diag) { @@ -408,16 +404,16 @@ void extract_diagonal(std::shared_ptr exec, const auto col_idxs = orig->get_const_col_idxs(); const auto values = orig->get_const_values(); const int bs = orig->get_block_size(); - const size_type diag_size = diag->get_size()[0]; - const size_type nbrows = orig->get_num_block_rows(); + const IndexType nbrows = orig->get_num_block_rows(); auto diag_values = diag->get_values(); - assert(diag_size == orig->get_size()[0]); + + assert(diag->get_size()[0] == orig->get_size()[0]); const gko::blockutils::DenseBlocksView vblocks( values, bs, bs); - for (size_type ibrow = 0; ibrow < nbrows; ++ibrow) { - for (size_type idx = row_ptrs[ibrow]; idx < row_ptrs[ibrow + 1]; + for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { + for (IndexType idx = row_ptrs[ibrow]; idx < row_ptrs[ibrow + 1]; ++idx) { if (col_idxs[idx] == ibrow) { for (int ib = 0; ib < bs; ib++) diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 4feaf5d1e5d..807c8e967bd 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -426,9 +426,7 @@ TYPED_TEST(Fbcsr, MovesToSparsityCsr) using SparsityCsr = typename TestFixture::SparCsr; using Fbcsr = typename TestFixture::Mtx; auto sparsity_mtx = SparsityCsr::create(this->mtx->get_executor()); - // auto fbcsr_ref = Fbcsr::create(this->mtx->get_executor()); - // fbcsr_ref->copy_from(this->mtx.get()); this->mtx->move_to(sparsity_mtx.get()); this->assert_equal_to_mtx(sparsity_mtx.get()); From 186e948dd3f1a76e630c5c708d3608ca89d9095d Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Tue, 8 Dec 2020 11:38:39 +0100 Subject: [PATCH 30/58] refactored DenseBlocksView a little and wrote test --- core/components/fixed_block.hpp | 75 ++++--------------------------- core/matrix/fbcsr.cpp | 67 ++++++++++++++++++++++++++- core/test/matrix/fbcsr.cpp | 23 ++++++++++ core/test/matrix/fbcsr_sample.cpp | 20 +++++++++ core/test/matrix/fbcsr_sample.hpp | 19 ++++++-- 5 files changed, 133 insertions(+), 71 deletions(-) diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index c41386b9964..2e9595cdc04 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -34,13 +34,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_COMPONENTS_FIXED_BLOCK_HPP_ -#include #include -#include -#include -#include +#include namespace gko { @@ -93,63 +90,6 @@ class FixedBlock final { }; -/** - * A lightweight dynamic block type on the host - * - * @tparam ValueType The numeric type of entries of the block - */ -template -class DenseBlock final { -public: - using value_type = ValueType; - - DenseBlock() {} - - DenseBlock(const int num_rows, const int num_cols) - : nrows_{num_rows}, ncols_{num_cols}, vals_(num_rows * num_cols) - {} - - value_type &at(const int row, const int col) - { - return vals_[row * ncols_ + col]; - } - - const value_type &at(const int row, const int col) const - { - return vals_[row * ncols_ + col]; - } - - value_type &operator()(const int row, const int col) - { - return at(row, col); - } - - const value_type &operator()(const int row, const int col) const - { - return at(row, col); - } - - int size() const { return nrows_ * ncols_; } - - void resize(const int nrows, const int ncols) - { - vals_.resize(nrows * ncols); - nrows_ = nrows; - ncols_ = ncols; - } - - void zero() - { - std::fill(vals_.begin(), vals_.end(), static_cast(0)); - } - -private: - int nrows_ = 0; - int ncols_ = 0; - std::vector vals_; -}; - - /** * @brief A view into a an array of dense blocks of some runtime-defined size * @@ -174,18 +114,18 @@ class DenseBlocksView final { */ DenseBlocksView(ValueType *const buffer, const int num_rows, const int num_cols) - : nrows_{num_rows}, ncols_{num_cols}, vals_{buffer} + : nrows{num_rows}, ncols{num_cols}, vals_{buffer} {} value_type &at(const index_type block, const int row, const int col) { - return vals_[block * nrows_ * ncols_ + row * ncols_ + col]; + return vals_[block * nrows * ncols + row * ncols + col]; } const typename std::remove_const::type &at( const index_type block, const int row, const int col) const { - return vals_[block * nrows_ * ncols_ + row * ncols_ + col]; + return vals_[block * nrows * ncols + row * ncols + col]; } value_type &operator()(const index_type block, const int row, const int col) @@ -199,10 +139,11 @@ class DenseBlocksView final { return at(block, row, col); } + const int nrows; ///< Number of rows in each block + const int ncols; ///< Number of columns in each block + private: - int nrows_; ///< Number of rows in each block - int ncols_; ///< Number of columns in each block - value_type *vals_; + value_type *const vals_; }; diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 51950dc96fe..88b17928d06 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -81,6 +81,71 @@ GKO_REGISTER_OPERATION(outplace_absolute_array, } // namespace fbcsr +namespace detail { + + +/** + * A lightweight dynamic block type on the host + * + * @internal Currently used only while reading a FBCSR matrix from matrix_data. + * + * @tparam ValueType The numeric type of entries of the block + */ +template +class DenseBlock final { +public: + using value_type = ValueType; + + DenseBlock() {} + + DenseBlock(const int num_rows, const int num_cols) + : nrows_{num_rows}, ncols_{num_cols}, vals_(num_rows * num_cols) + {} + + value_type &at(const int row, const int col) + { + return vals_[row * ncols_ + col]; + } + + const value_type &at(const int row, const int col) const + { + return vals_[row * ncols_ + col]; + } + + value_type &operator()(const int row, const int col) + { + return at(row, col); + } + + const value_type &operator()(const int row, const int col) const + { + return at(row, col); + } + + int size() const { return nrows_ * ncols_; } + + void resize(const int nrows, const int ncols) + { + vals_.resize(nrows * ncols); + nrows_ = nrows; + ncols_ = ncols; + } + + void zero() + { + std::fill(vals_.begin(), vals_.end(), static_cast(0)); + } + +private: + int nrows_ = 0; + int ncols_ = 0; + std::vector vals_; +}; + + +} // namespace detail + + template Fbcsr::Fbcsr(const std::shared_ptr exec, const dim<2> &size, @@ -258,7 +323,7 @@ void Fbcsr::read(const mat_data &data) const int bs = this->bs_; - using Blk_t = blockutils::DenseBlock; + using Blk_t = detail::DenseBlock; struct FbEntry { index_type block_row; diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index b32dbb76367..fe0b48ef4b0 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -309,4 +309,27 @@ TYPED_TEST(Fbcsr, GeneratesCorrectMatrixData) } +TYPED_TEST(Fbcsr, DenseBlocksViewWorksCorrectly) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using Dbv = gko::blockutils::DenseBlocksView; + + const gko::testing::FbcsrSample2 fbsample( + std::static_pointer_cast(this->exec)); + + auto refmtx = fbsample.generate_fbcsr(); + const Dbv testdbv(refmtx->get_values(), fbsample.bs, fbsample.bs); + + std::vector ref_dbv_array(fbsample.nnz); + Dbv refdbv(ref_dbv_array.data(), fbsample.bs, fbsample.bs); + fbsample.fill_value_blocks_view(refdbv); + + for (index_type ibz = 0; ibz < fbsample.nbnz; ibz++) + for (int i = 0; i < fbsample.bs; ++i) + for (int j = 0; j < fbsample.bs; ++j) + ASSERT_EQ(testdbv(ibz, i, j), refdbv(ibz, i, j)); +} + + } // namespace diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index 555f2db8627..22d8a3c26da 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -541,6 +541,26 @@ FbcsrSample2::generate_fbcsr() const return mtx; } +template +void FbcsrSample2::fill_value_blocks_view( + DenseBlocksView &dbv) const +{ + dbv(0, 0, 0) = 1.0; + dbv(0, 0, 1) = 2.0; + dbv(0, 1, 0) = 3.0; + dbv(0, 1, 1) = 0.0; + for (int i = 0; i < 2; ++i) + for (int j = 0; j < 2; ++j) dbv(1, i, j) = 0.15 + FBCSR_TEST_OFFSET; + dbv(2, 0, 0) = 0.15 + FBCSR_TEST_OFFSET; + dbv(2, 0, 1) = 0.15 + FBCSR_TEST_OFFSET; + dbv(2, 1, 0) = 0.0; + dbv(2, 1, 1) = 0.0; + dbv(3, 0, 0) = -12.0; + dbv(3, 0, 1) = -1.0; + dbv(3, 1, 0) = -2.0; + dbv(3, 1, 1) = -11.0; +} + template std::unique_ptr, IndexType>> FbcsrSample2::generate_abs_fbcsr_abstype() const diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 12ec1fb310f..9b04d9c30cc 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -43,6 +43,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/fixed_block.hpp" + + namespace gko { namespace testing { @@ -174,6 +177,8 @@ class FbcsrSample2 { using Fbcsr = gko::matrix::Fbcsr; using Dense = gko::matrix::Dense; using Diagonal = gko::matrix::Diagonal; + using DenseBlocksView = + gko::blockutils::DenseBlocksView; FbcsrSample2(std::shared_ptr exec); @@ -192,6 +197,11 @@ class FbcsrSample2 { std::unique_ptr, index_type>> generate_abs_fbcsr_abstype() const; + /** + * Fills a view into a FBCSR values array using the sample matrix's data + */ + void fill_value_blocks_view(DenseBlocksView &dbv) const; + /// Enables use of literals to instantiate value data template constexpr ValueType sct(U u) const @@ -210,8 +220,10 @@ class FbcsrSample2 { const std::shared_ptr exec; }; -/// Generates the a sample block CSR square matrix in different formats -/** This currently a 4 x 4 matrix with 2x2 blocks. +/** + * @brief Generates the a sample block CSR square matrix and its transpose + * + * This currently a 4 x 4 matrix with 2x2 blocks. */ template class FbcsrSampleSquare { @@ -237,7 +249,8 @@ class FbcsrSampleSquare { }; /** - * Generates the a sample block CSR matrix with complex values + * @brief Generates a sample block CSR matrix with complex values + * * This is a 6 x 8 matrix with 2x2 blocks. */ template From 2a0737d61801547f20ff08e5cd64d63613e75486 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Tue, 8 Dec 2020 16:21:46 +0100 Subject: [PATCH 31/58] removed couple of unnecessary functions from fbcsr sample generators --- core/test/matrix/fbcsr_sample.cpp | 41 ------------------------- core/test/matrix/fbcsr_sample.hpp | 14 --------- include/ginkgo/core/matrix/fbcsr.hpp | 8 ++++- reference/test/matrix/fbcsr_kernels.cpp | 8 +++-- 4 files changed, 13 insertions(+), 58 deletions(-) diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp index 22d8a3c26da..d7fc2a8a237 100644 --- a/core/test/matrix/fbcsr_sample.cpp +++ b/core/test/matrix/fbcsr_sample.cpp @@ -430,40 +430,6 @@ gko::matrix_data FbcsrSample< {5, 8, sct(14.0) + FBCSR_TEST_IMAGINARY}}}); } -template -std::unique_ptr> -FbcsrSample::generate_coo() const -{ - gko::matrix_data mdata = - generate_matrix_data_with_explicit_zeros(); - - using nztype = - typename gko::matrix_data::nonzero_type; - std::sort(mdata.nonzeros.begin(), mdata.nonzeros.end(), - [](const nztype &a, const nztype &b) { - if (a.row < b.row) - return true; - else if (a.row > b.row) - return false; - else if (a.column < b.column) - return true; - else - return false; - }); - - gko::Array rowidx(exec, nnz); - gko::Array colidx(exec, nnz); - gko::Array values(exec, nnz); - for (size_t i = 0; i < mdata.nonzeros.size(); i++) { - rowidx.get_data()[i] = mdata.nonzeros[i].row; - colidx.get_data()[i] = mdata.nonzeros[i].column; - values.get_data()[i] = mdata.nonzeros[i].value; - } - auto mat = - Coo::create(exec, gko::dim<2>{nrows, ncols}, values, colidx, rowidx); - return mat; -} - template std::unique_ptr> FbcsrSample::generate_sparsity_csr() const @@ -478,13 +444,6 @@ FbcsrSample::generate_sparsity_csr() const return SparCsr::create(exec, gko::dim<2>{nbrows, nbcols}, colids, rowptrs); } -template -gko::Array FbcsrSample::getNonzerosPerRow() - const -{ - return gko::Array(exec, {6, 6, 6, 6, 6, 6}); -} - #define GKO_DECLARE_FBCSR_TEST_SAMPLE(ValueType, IndexType) \ class FbcsrSample GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_TEST_SAMPLE); diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 9b04d9c30cc..5985e192a5b 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -35,7 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #include #include #include @@ -64,7 +63,6 @@ class FbcsrSample { using absvalue_type = remove_complex; using Fbcsr = gko::matrix::Fbcsr; using Csr = gko::matrix::Csr; - using Coo = gko::matrix::Coo; using Dense = gko::matrix::Dense; using MatData = gko::matrix_data; using SparCsr = gko::matrix::SparsityCsr; @@ -88,13 +86,6 @@ class FbcsrSample { */ std::unique_ptr generate_dense() const; - /** - * @return The matrix in COO format keeping explicit nonzeros - * - * The nonzeros are sorted by row and column. - */ - std::unique_ptr generate_coo() const; - /** * @return Sparsity structure of the matrix */ @@ -115,11 +106,6 @@ class FbcsrSample { */ MatData generate_matrix_data_with_explicit_zeros() const; - /** - * @return An array containing number of stored values in each row - */ - gko::Array getNonzerosPerRow() const; - /** * @return FBCSR matrix with absolute values of respective entries */ diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 409a6fc5130..925e7268749 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -152,6 +152,12 @@ class Fbcsr : public EnableLinOp>, void move_to(Dense *other) override; + /** + * Converts the matrix to CSR format + * + * @note Any explicit zeros in the original matrix are retained + * in the converted result. + */ void convert_to(Csr *result) const override; void move_to(Csr *result) override; @@ -159,7 +165,7 @@ class Fbcsr : public EnableLinOp>, /** * Get the block sparsity pattern in CSR-like format * - * Note that the actual non-zero values are never copied; + * @note The actual non-zero values are never copied; * the result always has a value array of size 1 with the value 1. */ void convert_to(SparsityCsr *result) const override; diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 807c8e967bd..1a7e00d3f40 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -161,14 +161,18 @@ TYPED_TEST(Fbcsr, AppliesToDenseVector) auto x = Vec::create(this->exec, gko::dim<2>{(gko::size_type)ncols, 1}); T *const xvals = x->get_values(); for (index_type i = 0; i < ncols; i++) - // xvals[i] = std::log(static_cast(static_cast((i+1)^2))); - xvals[i] = (i + 1.0) * (i + 1.0); + xvals[i] = std::sin(static_cast(static_cast((i + 1) ^ 2))); auto y = Vec::create(this->exec, gko::dim<2>{(gko::size_type)nrows, 1}); auto yref = Vec::create(this->exec, gko::dim<2>{(gko::size_type)nrows, 1}); this->mtx2->apply(x.get(), y.get()); this->fbsample2.apply(x.get(), yref.get()); + // using Csr = typename TestFixture::Csr; + // auto csr_mtx = Csr::create(this->mtx->get_executor(), + // std::make_shared()); + // this->mtx2->convert_to(csr_mtx.get()); + // csr_mtx->apply(x.get(), yref.get()); const double tolerance = std::numeric_limits>::epsilon(); From fe122b833b335aadc77676af0f935c23d8f8bcb1 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 9 Dec 2020 18:17:52 +0100 Subject: [PATCH 32/58] FBCSR reference kernel testing now depends mostly on comparison with CSR kernels This enabled reduction in FBCSR sample matrix code and removal of the cpp file, so the test_fbcsr_sample library is no longer needed. --- core/test/matrix/CMakeLists.txt | 12 +- core/test/matrix/fbcsr.cpp | 3 - core/test/matrix/fbcsr_sample.cpp | 844 ------------------------ core/test/matrix/fbcsr_sample.hpp | 619 ++++++++++++++--- reference/test/matrix/CMakeLists.txt | 2 +- reference/test/matrix/fbcsr_kernels.cpp | 139 ++-- 6 files changed, 623 insertions(+), 996 deletions(-) delete mode 100644 core/test/matrix/fbcsr_sample.cpp diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt index 9376b970552..64b3b3ed593 100644 --- a/core/test/matrix/CMakeLists.txt +++ b/core/test/matrix/CMakeLists.txt @@ -5,17 +5,7 @@ ginkgo_create_test(csr_builder) ginkgo_create_test(dense) ginkgo_create_test(diagonal) ginkgo_create_test(ell) -add_library(test_fbcsr_sample STATIC fbcsr_sample.cpp) -target_include_directories(test_fbcsr_sample - PRIVATE - "$" - "$" - "$" - ) -set_property(TARGET test_fbcsr_sample PROPERTY CXX_STANDARD 14) -set_property(TARGET test_fbcsr_sample PROPERTY CXX_EXTENSIONS OFF) -target_link_libraries(test_fbcsr_sample PRIVATE ginkgo) -ginkgo_create_test(fbcsr test_fbcsr_sample) +ginkgo_create_test(fbcsr) ginkgo_create_test(fbcsr_builder) ginkgo_create_test(hybrid) ginkgo_create_test(identity) diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index fe0b48ef4b0..619315e5c98 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -134,11 +134,9 @@ TYPED_TEST(Fbcsr, SampleGeneratorIsCorrect) using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; using Csr = gko::matrix::Csr; - using Dense = gko::matrix::Dense; std::unique_ptr fbmtx = this->fbsample.generate_fbcsr(); std::unique_ptr csmtx = this->fbsample.generate_csr(); - std::unique_ptr dmtx = this->fbsample.generate_dense(); const int bs = this->fbsample.bs; ASSERT_EQ(bs, fbmtx->get_block_size()); @@ -174,7 +172,6 @@ TYPED_TEST(Fbcsr, SampleGeneratorIsCorrect) ASSERT_EQ(col, csmtx->get_const_col_idxs()[inz]); ASSERT_EQ(fbvals(ibnz, ib, jb), csmtx->get_const_values()[inz]); - ASSERT_EQ(fbvals(ibnz, ib, jb), dmtx->at(row, col)); } } } diff --git a/core/test/matrix/fbcsr_sample.cpp b/core/test/matrix/fbcsr_sample.cpp deleted file mode 100644 index d7fc2a8a237..00000000000 --- a/core/test/matrix/fbcsr_sample.cpp +++ /dev/null @@ -1,844 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include "core/components/fixed_block.hpp" -#include "core/test/matrix/fbcsr_sample.hpp" - -#define FBCSR_TEST_OFFSET 0.000011118888 - -#define FBCSR_TEST_C_MAG 0.1 + FBCSR_TEST_OFFSET -#define FBCSR_TEST_IMAGINARY \ - sct(std::complex>(0, FBCSR_TEST_C_MAG)) - -namespace gko { -namespace testing { - - -/** Generates a copy of the given matrix with a different scalar type - * - * \tparam AbsValueType The scalar type of the output matrix - */ -template -static std::unique_ptr< - gko::matrix::Fbcsr> -generate_acopy_impl(const FbcsrType *const mat) -{ - using index_type = typename FbcsrType::index_type; - using value_type = typename FbcsrType::value_type; - using AbsFbcsr = gko::matrix::Fbcsr; - - std::shared_ptr exec = - std::dynamic_pointer_cast(mat->get_executor()); - - std::unique_ptr amat = - AbsFbcsr::create(exec, mat->get_size(), mat->get_num_stored_elements(), - mat->get_block_size()); - - const index_type *const colidxs = mat->get_col_idxs(); - const index_type *const rowptrs = mat->get_row_ptrs(); - index_type *const acolidxs = amat->get_col_idxs(); - index_type *const arowptrs = amat->get_row_ptrs(); - - for (index_type i = 0; - i < mat->get_num_stored_elements() / - (mat->get_block_size() * mat->get_block_size()); - i++) - acolidxs[i] = colidxs[i]; - - for (index_type i = 0; i < mat->get_size()[0] / mat->get_block_size() + 1; - i++) - arowptrs[i] = rowptrs[i]; - - return amat; -} - - -template -FbcsrSample::FbcsrSample( - const std::shared_ptr rexec) - : nrows{6}, - ncols{12}, - nnz{36}, - nbrows{2}, - nbcols{4}, - nbnz{4}, - bs{3}, - exec(rexec) -{} - -template -std::unique_ptr> -FbcsrSample::generate_fbcsr() const -{ - std::unique_ptr mtx = - Fbcsr::create(exec, - gko::dim<2>{static_cast(nrows), - static_cast(ncols)}, - nnz, bs); - - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); - r[0] = 0; - r[1] = 2; - r[2] = 4; - c[0] = 1; - c[1] = 3; - c[2] = 0; - c[3] = 2; - - gko::blockutils::DenseBlocksView vals(v, bs, bs); - - if (mtx->get_size()[0] % bs != 0) - throw gko::BadDimension(__FILE__, __LINE__, __func__, "test fbcsr", - mtx->get_size()[0], mtx->get_size()[1], - "block size does not divide the size!"); - - for (index_type ibrow = 0; ibrow < mtx->get_num_block_rows(); ibrow++) { - const index_type *const browptr = mtx->get_row_ptrs(); - for (index_type inz = browptr[ibrow]; inz < browptr[ibrow + 1]; inz++) { - const index_type bcolind = mtx->get_col_idxs()[inz]; - const value_type base = (ibrow + 1) * (bcolind + 1); - for (int ival = 0; ival < bs; ival++) - for (int jval = 0; jval < bs; jval++) - vals(inz, ival, jval) = - base + static_cast>( - ival * bs + jval); - } - } - - // Some of the entries are set to zero - vals(0, 2, 0) = gko::zero(); - vals(0, 2, 2) = gko::zero(); - vals(3, 0, 0) = gko::zero(); - - v[34] += FBCSR_TEST_IMAGINARY; - v[35] += FBCSR_TEST_IMAGINARY; - - return mtx; -} - -template -template -void FbcsrSample::correct_abs_for_complex_values( - FbcsrType *const amat) const -{ - using out_value_type = typename FbcsrType::value_type; - using outreal_type = remove_complex; - out_value_type *const avals = amat->get_values(); - if (is_complex()) { - auto mo = static_cast(FBCSR_TEST_C_MAG); - avals[34] = sqrt(pow(static_cast(13.0), 2) + - pow(static_cast(mo), 2)); - avals[35] = sqrt(pow(static_cast(14.0), 2) + - pow(static_cast(mo), 2)); - } -} - -template -std::unique_ptr, IndexType>> -FbcsrSample::generate_abs_fbcsr_abstype() const -{ - using AbsValueType = typename gko::remove_complex; - using AbsFbcsr = gko::matrix::Fbcsr; - - const std::unique_ptr mat = generate_fbcsr(); - std::unique_ptr amat = - generate_acopy_impl(mat.get()); - - AbsValueType *const avals = amat->get_values(); - const ValueType *const vals = mat->get_values(); - for (IndexType i = 0; i < amat->get_num_stored_elements(); i++) - avals[i] = abs(vals[i]); - - correct_abs_for_complex_values(amat.get()); - - return amat; -} - -template -std::unique_ptr> -FbcsrSample::generate_abs_fbcsr() const -{ - const std::unique_ptr mat = generate_fbcsr(); - std::unique_ptr amat = - generate_acopy_impl(mat.get()); - - ValueType *const avals = amat->get_values(); - const ValueType *const vals = mat->get_values(); - for (IndexType i = 0; i < amat->get_num_stored_elements(); i++) - avals[i] = abs(vals[i]); - - correct_abs_for_complex_values(amat.get()); - - return amat; -} - -template -std::unique_ptr> -FbcsrSample::generate_csr() const -{ - std::unique_ptr csrm = - Csr::create(exec, gko::dim<2>{nrows, ncols}, nnz, - std::make_shared()); - index_type *const csrrow = csrm->get_row_ptrs(); - index_type *const csrcols = csrm->get_col_idxs(); - value_type *const csrvals = csrm->get_values(); - - csrrow[0] = 0; - csrrow[1] = 6; - csrrow[2] = 12; - csrrow[3] = 18; - csrrow[4] = 24; - csrrow[5] = 30; - csrrow[6] = 36; - - csrcols[0] = 3; - csrcols[1] = 4; - csrcols[2] = 5; - csrcols[6] = 3; - csrcols[7] = 4; - csrcols[8] = 5; - csrcols[12] = 3; - csrcols[13] = 4; - csrcols[14] = 5; - - csrcols[3] = 9; - csrcols[4] = 10; - csrcols[5] = 11; - csrcols[9] = 9; - csrcols[10] = 10; - csrcols[11] = 11; - csrcols[15] = 9; - csrcols[16] = 10; - csrcols[17] = 11; - - csrcols[18] = 0; - csrcols[19] = 1; - csrcols[20] = 2; - csrcols[24] = 0; - csrcols[25] = 1; - csrcols[26] = 2; - csrcols[30] = 0; - csrcols[31] = 1; - csrcols[32] = 2; - - csrcols[21] = 6; - csrcols[22] = 7; - csrcols[23] = 8; - csrcols[27] = 6; - csrcols[28] = 7; - csrcols[29] = 8; - csrcols[33] = 6; - csrcols[34] = 7; - csrcols[35] = 8; - - // values - csrvals[0] = 2; - csrvals[1] = 3; - csrvals[2] = 4; - csrvals[6] = 5; - csrvals[7] = 6; - csrvals[8] = 7; - csrvals[12] = 0; - csrvals[13] = 9; - csrvals[14] = 0; - - csrvals[3] = 4; - csrvals[4] = 5; - csrvals[5] = 6; - csrvals[9] = 7; - csrvals[10] = 8; - csrvals[11] = 9; - csrvals[15] = 10; - csrvals[16] = 11; - csrvals[17] = 12; - - csrvals[18] = 2; - csrvals[19] = 3; - csrvals[20] = 4; - csrvals[24] = 5; - csrvals[25] = 6; - csrvals[26] = 7; - csrvals[30] = 8; - csrvals[31] = 9; - csrvals[32] = 10; - - csrvals[21] = 0; - csrvals[22] = 7; - csrvals[23] = 8; - csrvals[27] = 9; - csrvals[28] = 10; - csrvals[29] = 11; - csrvals[33] = 12; - csrvals[34] = 13; - csrvals[35] = 14; - - csrvals[34] += FBCSR_TEST_IMAGINARY; - csrvals[35] += FBCSR_TEST_IMAGINARY; - - return csrm; -} - -template -std::unique_ptr> -FbcsrSample::generate_dense() const -{ - std::unique_ptr densem = - Dense::create(exec, gko::dim<2>(nrows, ncols)); - - for (size_type irow = 0; irow < densem->get_size()[0]; irow++) - for (size_type jcol = 0; jcol < densem->get_size()[1]; jcol++) { - densem->at(irow, jcol) = 0; - if (irow < 3 && jcol >= 3 && jcol < 6) - densem->at(irow, jcol) = 2.0 + irow * bs + jcol - 3; - if (irow < 3 && jcol >= 9) - densem->at(irow, jcol) = 4.0 + irow * bs + jcol - 9; - if (irow >= 3 && jcol < 3) - densem->at(irow, jcol) = 2.0 + (irow - 3) * bs + jcol; - if (irow >= 3 && jcol >= 6 && jcol < 9) - densem->at(irow, jcol) = 6.0 + (irow - 3) * bs + jcol - 6; - } - - densem->at(2, 3) = densem->at(2, 5) = densem->at(3, 6) = 0.0; - densem->at(5, 7) += FBCSR_TEST_IMAGINARY; - densem->at(5, 8) += FBCSR_TEST_IMAGINARY; - - return densem; -} - -// Assuming row-major blocks -template -gko::matrix_data -FbcsrSample::generate_matrix_data() const -{ - return MatData({{6, 12}, - {{0, 3, 2.0}, - {0, 4, 3.0}, - {0, 5, 4.0}, - {1, 3, 5.0}, - {1, 4, 6.0}, - {1, 5, 7.0}, - {2, 4, 9.0}, - - {0, 9, 4.0}, - {0, 10, 5.0}, - {0, 11, 6.0}, - {1, 9, 7.0}, - {1, 10, 8.0}, - {1, 11, 9.0}, - {2, 9, 10.0}, - {2, 10, 11.0}, - {2, 11, 12.0}, - - {3, 0, 2.0}, - {3, 1, 3.0}, - {3, 2, 4.0}, - {4, 0, 5.0}, - {4, 1, 6.0}, - {4, 2, 7.0}, - {5, 0, 8.0}, - {5, 1, 9.0}, - {5, 2, 10.0}, - - {3, 7, 7.0}, - {3, 8, 8.0}, - {4, 6, 9.0}, - {4, 7, 10.0}, - {4, 8, 11.0}, - {5, 6, 12.0}, - {5, 7, sct(13.0) + FBCSR_TEST_IMAGINARY}, - {5, 8, sct(14.0) + FBCSR_TEST_IMAGINARY}}}); -} - -// Assuming row-major blocks -template -gko::matrix_data FbcsrSample< - ValueType, IndexType>::generate_matrix_data_with_explicit_zeros() const -{ - return MatData({{6, 12}, - {{0, 3, 2.0}, - {0, 4, 3.0}, - {0, 5, 4.0}, - {1, 3, 5.0}, - {1, 4, 6.0}, - {1, 5, 7.0}, - {2, 3, 0.0}, - {2, 4, 9.0}, - {2, 5, 0.0}, - - {0, 9, 4.0}, - {0, 10, 5.0}, - {0, 11, 6.0}, - {1, 9, 7.0}, - {1, 10, 8.0}, - {1, 11, 9.0}, - {2, 9, 10.0}, - {2, 10, 11.0}, - {2, 11, 12.0}, - - {3, 0, 2.0}, - {3, 1, 3.0}, - {3, 2, 4.0}, - {4, 0, 5.0}, - {4, 1, 6.0}, - {4, 2, 7.0}, - {5, 0, 8.0}, - {5, 1, 9.0}, - {5, 2, 10.0}, - - {3, 6, 0.0}, - {3, 7, 7.0}, - {3, 8, 8.0}, - {4, 6, 9.0}, - {4, 7, 10.0}, - {4, 8, 11.0}, - {5, 6, 12.0}, - {5, 7, sct(13.0) + FBCSR_TEST_IMAGINARY}, - {5, 8, sct(14.0) + FBCSR_TEST_IMAGINARY}}}); -} - -template -std::unique_ptr> -FbcsrSample::generate_sparsity_csr() const -{ - gko::Array colids(exec, nbnz); - gko::Array rowptrs(exec, nbrows + 1); - const std::unique_ptr fbmat = generate_fbcsr(); - for (index_type i = 0; i < nbrows + 1; i++) - rowptrs.get_data()[i] = fbmat->get_row_ptrs()[i]; - for (index_type i = 0; i < nbnz; i++) - colids.get_data()[i] = fbmat->get_col_idxs()[i]; - return SparCsr::create(exec, gko::dim<2>{nbrows, nbcols}, colids, rowptrs); -} - -#define GKO_DECLARE_FBCSR_TEST_SAMPLE(ValueType, IndexType) \ - class FbcsrSample -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_TEST_SAMPLE); - - -template -FbcsrSample2::FbcsrSample2( - const std::shared_ptr rexec) - : nrows{6}, - ncols{8}, - nnz{16}, - nbrows{3}, - nbcols{4}, - nbnz{4}, - bs{2}, - exec(rexec) -{} - -template -std::unique_ptr> -FbcsrSample2::generate_fbcsr() const -{ - std::unique_ptr mtx = - Fbcsr::create(exec, - gko::dim<2>{static_cast(nrows), - static_cast(ncols)}, - nnz, bs); - - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); - r[0] = 0; - r[1] = 1; - r[2] = 3; - r[3] = 4; - c[0] = 0; - c[1] = 0; - c[2] = 3; - c[3] = 2; - - for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; - - v[0] = 1; - v[1] = 2; - v[2] = 3; - v[3] = 0; - v[10] = 0; - v[11] = 0; - v[12] = -12; - v[13] = -1; - v[14] = -2; - v[15] = -11; - - return mtx; -} - -template -void FbcsrSample2::fill_value_blocks_view( - DenseBlocksView &dbv) const -{ - dbv(0, 0, 0) = 1.0; - dbv(0, 0, 1) = 2.0; - dbv(0, 1, 0) = 3.0; - dbv(0, 1, 1) = 0.0; - for (int i = 0; i < 2; ++i) - for (int j = 0; j < 2; ++j) dbv(1, i, j) = 0.15 + FBCSR_TEST_OFFSET; - dbv(2, 0, 0) = 0.15 + FBCSR_TEST_OFFSET; - dbv(2, 0, 1) = 0.15 + FBCSR_TEST_OFFSET; - dbv(2, 1, 0) = 0.0; - dbv(2, 1, 1) = 0.0; - dbv(3, 0, 0) = -12.0; - dbv(3, 0, 1) = -1.0; - dbv(3, 1, 0) = -2.0; - dbv(3, 1, 1) = -11.0; -} - -template -std::unique_ptr, IndexType>> -FbcsrSample2::generate_abs_fbcsr_abstype() const -{ - using AbsValueType = typename gko::remove_complex; - using AbsFbcsr = gko::matrix::Fbcsr; - - const std::unique_ptr mat = generate_fbcsr(); - std::unique_ptr amat = - generate_acopy_impl(mat.get()); - - AbsValueType *const v = amat->get_values(); - - for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; - v[0] = 1; - v[1] = 2; - v[2] = 3; - v[3] = 0; - v[10] = 0; - v[11] = 0; - v[12] = 12; - v[13] = 1; - v[14] = 2; - v[15] = 11; - - return amat; -} - -template -std::unique_ptr> -FbcsrSample2::generate_abs_fbcsr() const -{ - const std::unique_ptr mat = generate_fbcsr(); - std::unique_ptr amat = - generate_acopy_impl(mat.get()); - - ValueType *const v = amat->get_values(); - - for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; - v[0] = 1.0; - v[1] = 2.0; - v[2] = 3.0; - v[3] = 0.0; - v[10] = 0.0; - v[11] = 0.0; - v[12] = 12.0; - v[13] = 1.0; - v[14] = 2.0; - v[15] = 11.0; - - return amat; -} - -template -std::unique_ptr> -FbcsrSample2::generate_transpose_fbcsr() const -{ - std::unique_ptr mtx = - Fbcsr::create(exec, - gko::dim<2>{static_cast(ncols), - static_cast(nrows)}, - nnz, bs); - - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); - r[0] = 0; - r[1] = 2; - r[2] = 2; - r[3] = 3; - r[4] = 4; - c[0] = 0; - c[1] = 1; - c[2] = 2; - c[3] = 1; - - for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; - - v[0] = 1; - v[1] = 3; - v[2] = 2; - v[3] = 0; - v[8] = -12; - v[9] = -2; - v[10] = -1; - v[11] = -11; - v[13] = 0; - v[15] = 0; - - return mtx; -} - -template -std::unique_ptr> -FbcsrSample2::extract_diagonal() const -{ - gko::Array dvals(exec, nrows); - ValueType *const dv = dvals.get_data(); - dv[0] = 1; - dv[1] = 0; - dv[2] = 0; - dv[3] = 0; - dv[4] = -12; - dv[5] = -11; - return Diagonal::create(exec, nrows, dvals); -} - -template -gko::Array FbcsrSample2::getNonzerosPerRow() - const -{ - return gko::Array(exec, {2, 2, 4, 4, 2, 2}); -} - -template -void FbcsrSample2::apply( - const gko::matrix::Dense *const x, - gko::matrix::Dense *const y) const -{ - if (x->get_size()[0] != ncols) - throw gko::BadDimension(__FILE__, __LINE__, __func__, "spmv", nrows, - ncols, ""); - if (y->get_size()[0] != nrows) - throw gko::BadDimension(__FILE__, __LINE__, __func__, "spmv", nrows, - ncols, ""); - if (x->get_size()[1] != y->get_size()[1]) - throw gko::BadDimension(__FILE__, __LINE__, __func__, "spmv", nrows, - ncols, ""); - - const ValueType defv = sct(0.15 + FBCSR_TEST_OFFSET); - - for (index_type k = 0; k < x->get_size()[1]; k++) { - y->at(0, k) = sct(1.0) * x->at(0, k) + sct(2.0) * x->at(1, k); - y->at(1, k) = sct(3.0) * x->at(0, k); - y->at(2, k) = - defv * (x->at(0, k) + x->at(1, k) + x->at(6, k) + x->at(7, k)); - y->at(3, k) = defv * (x->at(0, k) + x->at(1, k)); - y->at(4, k) = sct(-12.0) * x->at(4, k) - x->at(5, k); - y->at(5, k) = sct(-2.0) * x->at(4, k) + sct(-11.0) * x->at(5, k); - } -} - -#define GKO_DECLARE_FBCSR_TEST_SAMPLE_2(ValueType, IndexType) \ - class FbcsrSample2 -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_TEST_SAMPLE_2); - - -template -FbcsrSampleSquare::FbcsrSampleSquare( - const std::shared_ptr rexec) - : nrows{4}, - ncols{4}, - nnz{8}, - nbrows{2}, - nbcols{2}, - nbnz{2}, - bs{2}, - exec(rexec) -{} - -template -std::unique_ptr> -FbcsrSampleSquare::generate_fbcsr() const -{ - std::unique_ptr mtx = - Fbcsr::create(exec, - gko::dim<2>{static_cast(nrows), - static_cast(ncols)}, - nnz, bs); - - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); - r[0] = 0; - r[1] = 1; - r[2] = 2; - c[0] = 1; - c[1] = 1; - - for (IndexType i = 0; i < nnz; i++) v[i] = i; - - return mtx; -} - -template -std::unique_ptr> -FbcsrSampleSquare::generate_transpose_fbcsr() const -{ - std::unique_ptr mtx = - Fbcsr::create(exec, - gko::dim<2>{static_cast(nrows), - static_cast(ncols)}, - nnz, bs); - - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); - r[0] = 0; - r[1] = 0; - r[2] = 2; - c[0] = 0; - c[1] = 1; - - gko::blockutils::DenseBlocksView vals(v, bs, bs); - vals(0, 0, 0) = 0; - vals(0, 0, 1) = 2; - vals(0, 1, 0) = 1; - vals(0, 1, 1) = 3; - vals(1, 0, 0) = 4; - vals(1, 0, 1) = 6; - vals(1, 1, 0) = 5; - vals(1, 1, 1) = 7; - - return mtx; -} - -#define GKO_DECLARE_FBCSR_TEST_SAMPLE_SQUARE(ValueType, IndexType) \ - class FbcsrSampleSquare - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_TEST_SAMPLE_SQUARE); - - -template -FbcsrSampleComplex::FbcsrSampleComplex( - const std::shared_ptr rexec) - : nrows{6}, - ncols{8}, - nnz{16}, - nbrows{3}, - nbcols{4}, - nbnz{4}, - bs{2}, - exec(rexec) -{} - -template -std::unique_ptr> -FbcsrSampleComplex::generate_fbcsr() const -{ - std::unique_ptr mtx = - Fbcsr::create(exec, - gko::dim<2>{static_cast(nrows), - static_cast(ncols)}, - nnz, bs); - - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); - r[0] = 0; - r[1] = 1; - r[2] = 3; - r[3] = 4; - c[0] = 0; - c[1] = 0; - c[2] = 3; - c[3] = 2; - - for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; - - using namespace std::complex_literals; - v[0] = 1.0 + 1.15i; - v[1] = 2.0 + 2.15i; - v[2] = 3.0 - 3.15i; - v[3] = 0.0 - 0.15i; - v[10] = 0.0; - v[11] = 0.0; - v[12] = -12.0 + 12.15i; - v[13] = -1.0 + 1.15i; - v[14] = -2.0 - 2.15i; - v[15] = -11.0 - 11.15i; - - return mtx; -} - -template -std::unique_ptr> -FbcsrSampleComplex::generate_conjtranspose_fbcsr() const -{ - std::unique_ptr mtx = - Fbcsr::create(exec, - gko::dim<2>{static_cast(ncols), - static_cast(nrows)}, - nnz, bs); - - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); - r[0] = 0; - r[1] = 2; - r[2] = 2; - r[3] = 3; - r[4] = 4; - c[0] = 0; - c[1] = 1; - c[2] = 2; - c[3] = 1; - - for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; - - using namespace std::complex_literals; - v[0] = 1.0 - 1.15i; - v[1] = 3.0 + 3.15i; - v[2] = 2.0 - 2.15i; - v[3] = 0.0 + 0.15i; - v[8] = -12.0 - 12.15i; - v[9] = -2.0 + 2.15i; - v[10] = -1.0 - 1.15i; - v[11] = -11.0 + 11.15i; - v[13] = 0; - v[15] = 0; - - return mtx; -} - -template class FbcsrSampleComplex, int32>; -template class FbcsrSampleComplex, int32>; -template class FbcsrSampleComplex, int64>; -template class FbcsrSampleComplex, int64>; - -} // namespace testing -} // namespace gko diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 5985e192a5b..082fb5f0342 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -45,6 +45,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/fixed_block.hpp" +#define FBCSR_TEST_OFFSET 0.000011118888 + +#define FBCSR_TEST_C_MAG 0.1 + FBCSR_TEST_OFFSET + +#define FBCSR_TEST_IMAGINARY \ + sct(std::complex>(0, FBCSR_TEST_C_MAG)) + + namespace gko { namespace testing { @@ -60,43 +68,246 @@ class FbcsrSample { public: using value_type = ValueType; using index_type = IndexType; - using absvalue_type = remove_complex; using Fbcsr = gko::matrix::Fbcsr; using Csr = gko::matrix::Csr; - using Dense = gko::matrix::Dense; using MatData = gko::matrix_data; using SparCsr = gko::matrix::SparsityCsr; - FbcsrSample(std::shared_ptr exec); + + const size_type nrows = 6; + const size_type ncols = 12; + const size_type nnz = 36; + const size_type nbrows = 2; + const size_type nbcols = 4; + const size_type nbnz = 4; + const int bs = 3; + const std::shared_ptr exec; + + + FbcsrSample(std::shared_ptr rexec) + : exec(rexec) + {} /** * @return The sample matrix in FBCSR format */ - std::unique_ptr generate_fbcsr() const; + std::unique_ptr generate_fbcsr() const + { + std::unique_ptr mtx = + Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + nnz, bs); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + r[0] = 0; + r[1] = 2; + r[2] = 4; + c[0] = 1; + c[1] = 3; + c[2] = 0; + c[3] = 2; + + gko::blockutils::DenseBlocksView vals(v, bs, + bs); + + if (mtx->get_size()[0] % bs != 0) + throw gko::BadDimension(__FILE__, __LINE__, __func__, "test fbcsr", + mtx->get_size()[0], mtx->get_size()[1], + "block size does not divide the size!"); + + for (index_type ibrow = 0; ibrow < mtx->get_num_block_rows(); ibrow++) { + const index_type *const browptr = mtx->get_row_ptrs(); + for (index_type inz = browptr[ibrow]; inz < browptr[ibrow + 1]; + inz++) { + const index_type bcolind = mtx->get_col_idxs()[inz]; + const value_type base = (ibrow + 1) * (bcolind + 1); + for (int ival = 0; ival < bs; ival++) + for (int jval = 0; jval < bs; jval++) + vals(inz, ival, jval) = + base + static_cast>( + ival * bs + jval); + } + } + + // Some of the entries are set to zero + vals(0, 2, 0) = gko::zero(); + vals(0, 2, 2) = gko::zero(); + vals(3, 0, 0) = gko::zero(); + + v[34] += FBCSR_TEST_IMAGINARY; + v[35] += FBCSR_TEST_IMAGINARY; + + return mtx; + } /** * @return Sample matrix in CSR format * * Keeps explicit zeros. */ - std::unique_ptr generate_csr() const; - - /** - * @return Sample matrix as dense - */ - std::unique_ptr generate_dense() const; + std::unique_ptr generate_csr() const + { + std::unique_ptr csrm = + Csr::create(exec, gko::dim<2>{nrows, ncols}, nnz, + std::make_shared()); + index_type *const csrrow = csrm->get_row_ptrs(); + index_type *const csrcols = csrm->get_col_idxs(); + value_type *const csrvals = csrm->get_values(); + + csrrow[0] = 0; + csrrow[1] = 6; + csrrow[2] = 12; + csrrow[3] = 18; + csrrow[4] = 24; + csrrow[5] = 30; + csrrow[6] = 36; + + csrcols[0] = 3; + csrvals[0] = 2; + csrcols[1] = 4; + csrvals[1] = 3; + csrcols[2] = 5; + csrvals[2] = 4; + csrcols[6] = 3; + csrvals[6] = 5; + csrcols[7] = 4; + csrvals[7] = 6; + csrcols[8] = 5; + csrvals[8] = 7; + csrcols[12] = 3; + csrvals[12] = 0; + csrcols[13] = 4; + csrvals[13] = 9; + csrcols[14] = 5; + csrvals[14] = 0; + + csrcols[3] = 9; + csrvals[3] = 4; + csrcols[4] = 10; + csrvals[4] = 5; + csrcols[5] = 11; + csrvals[5] = 6; + csrcols[9] = 9; + csrvals[9] = 7; + csrcols[10] = 10; + csrvals[10] = 8; + csrcols[11] = 11; + csrvals[11] = 9; + csrcols[15] = 9; + csrvals[15] = 10; + csrcols[16] = 10; + csrvals[16] = 11; + csrcols[17] = 11; + csrvals[17] = 12; + + csrcols[18] = 0; + csrvals[18] = 2; + csrcols[19] = 1; + csrvals[19] = 3; + csrcols[20] = 2; + csrvals[20] = 4; + csrcols[24] = 0; + csrvals[24] = 5; + csrcols[25] = 1; + csrvals[25] = 6; + csrcols[26] = 2; + csrvals[26] = 7; + csrcols[30] = 0; + csrvals[30] = 8; + csrcols[31] = 1; + csrvals[31] = 9; + csrcols[32] = 2; + csrvals[32] = 10; + + csrcols[21] = 6; + csrvals[21] = 0; + csrcols[22] = 7; + csrvals[22] = 7; + csrcols[23] = 8; + csrvals[23] = 8; + csrcols[27] = 6; + csrvals[27] = 9; + csrcols[28] = 7; + csrvals[28] = 10; + csrcols[29] = 8; + csrvals[29] = 11; + csrcols[33] = 6; + csrvals[33] = 12; + csrcols[34] = 7; + csrvals[34] = 13; + csrcols[35] = 8; + csrvals[35] = 14; + + csrvals[34] += FBCSR_TEST_IMAGINARY; + csrvals[35] += FBCSR_TEST_IMAGINARY; + + return csrm; + } /** * @return Sparsity structure of the matrix */ - std::unique_ptr generate_sparsity_csr() const; + std::unique_ptr generate_sparsity_csr() const + { + gko::Array colids(exec, nbnz); + gko::Array rowptrs(exec, nbrows + 1); + const std::unique_ptr fbmat = generate_fbcsr(); + for (index_type i = 0; i < nbrows + 1; i++) + rowptrs.get_data()[i] = fbmat->get_row_ptrs()[i]; + for (index_type i = 0; i < nbnz; i++) + colids.get_data()[i] = fbmat->get_col_idxs()[i]; + return SparCsr::create(exec, gko::dim<2>{nbrows, nbcols}, colids, + rowptrs); + } /** * @return Array of COO triplets that represent the matrix * * @note The order of the triplets assumes the blocks are stored row-major */ - MatData generate_matrix_data() const; + MatData generate_matrix_data() const + { + return MatData({{6, 12}, + {{0, 3, 2.0}, + {0, 4, 3.0}, + {0, 5, 4.0}, + {1, 3, 5.0}, + {1, 4, 6.0}, + {1, 5, 7.0}, + {2, 4, 9.0}, + + {0, 9, 4.0}, + {0, 10, 5.0}, + {0, 11, 6.0}, + {1, 9, 7.0}, + {1, 10, 8.0}, + {1, 11, 9.0}, + {2, 9, 10.0}, + {2, 10, 11.0}, + {2, 11, 12.0}, + + {3, 0, 2.0}, + {3, 1, 3.0}, + {3, 2, 4.0}, + {4, 0, 5.0}, + {4, 1, 6.0}, + {4, 2, 7.0}, + {5, 0, 8.0}, + {5, 1, 9.0}, + {5, 2, 10.0}, + + {3, 7, 7.0}, + {3, 8, 8.0}, + {4, 6, 9.0}, + {4, 7, 10.0}, + {4, 8, 11.0}, + {5, 6, 12.0}, + {5, 7, sct(13.0) + FBCSR_TEST_IMAGINARY}, + {5, 8, sct(14.0) + FBCSR_TEST_IMAGINARY}}}); + } /** * @return Array of COO triplets that represent the matrix; includes @@ -104,34 +315,51 @@ class FbcsrSample { * * @note The order of the triplets assumes the blocks are stored row-major */ - MatData generate_matrix_data_with_explicit_zeros() const; - - /** - * @return FBCSR matrix with absolute values of respective entries - */ - std::unique_ptr generate_abs_fbcsr() const; - - /** - * @return FBCSR matrix with real scalar type, - * with absolute values of respective entries - */ - std::unique_ptr, index_type>> - generate_abs_fbcsr_abstype() const; - - - const size_type nrows; - const size_type ncols; - const size_type nnz; - const size_type nbrows; - const size_type nbcols; - const size_type nbnz; - const int bs; - const std::shared_ptr exec; + MatData generate_matrix_data_with_explicit_zeros() const + { + return MatData({{6, 12}, + {{0, 3, 2.0}, + {0, 4, 3.0}, + {0, 5, 4.0}, + {1, 3, 5.0}, + {1, 4, 6.0}, + {1, 5, 7.0}, + {2, 3, 0.0}, + {2, 4, 9.0}, + {2, 5, 0.0}, + + {0, 9, 4.0}, + {0, 10, 5.0}, + {0, 11, 6.0}, + {1, 9, 7.0}, + {1, 10, 8.0}, + {1, 11, 9.0}, + {2, 9, 10.0}, + {2, 10, 11.0}, + {2, 11, 12.0}, + + {3, 0, 2.0}, + {3, 1, 3.0}, + {3, 2, 4.0}, + {4, 0, 5.0}, + {4, 1, 6.0}, + {4, 2, 7.0}, + {5, 0, 8.0}, + {5, 1, 9.0}, + {5, 2, 10.0}, + + {3, 6, 0.0}, + {3, 7, 7.0}, + {3, 8, 8.0}, + {4, 6, 9.0}, + {4, 7, 10.0}, + {4, 8, 11.0}, + {5, 6, 12.0}, + {5, 7, sct(13.0) + FBCSR_TEST_IMAGINARY}, + {5, 8, sct(14.0) + FBCSR_TEST_IMAGINARY}}}); + } private: - template - void correct_abs_for_complex_values(FbcsrType *const mat) const; - /// Enables complex data to be used for complex instantiations... template constexpr std::enable_if_t() || is_complex(), @@ -161,49 +389,159 @@ class FbcsrSample2 { using value_type = ValueType; using index_type = IndexType; using Fbcsr = gko::matrix::Fbcsr; - using Dense = gko::matrix::Dense; + using Csr = gko::matrix::Csr; using Diagonal = gko::matrix::Diagonal; using DenseBlocksView = gko::blockutils::DenseBlocksView; - FbcsrSample2(std::shared_ptr exec); - std::unique_ptr generate_fbcsr() const; + const size_type nrows = 6; + const size_type ncols = 8; + const size_type nnz = 16; + const size_type nbrows = 3; + const size_type nbcols = 4; + const size_type nbnz = 4; + const int bs = 2; + const std::shared_ptr exec; + - std::unique_ptr generate_transpose_fbcsr() const; + FbcsrSample2(std::shared_ptr rexec) + : exec(rexec) + {} - std::unique_ptr extract_diagonal() const; + std::unique_ptr generate_fbcsr() const + { + std::unique_ptr mtx = + Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + nnz, bs); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + r[0] = 0; + r[1] = 1; + r[2] = 3; + r[3] = 4; + c[0] = 0; + c[1] = 0; + c[2] = 3; + c[3] = 2; + + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + + v[0] = 1; + v[1] = 2; + v[2] = 3; + v[3] = 0; + v[10] = 0; + v[11] = 0; + v[12] = -12; + v[13] = -1; + v[14] = -2; + v[15] = -11; + + return mtx; + } - void apply(const Dense *x, Dense *y) const; + std::unique_ptr generate_csr() const + { + std::unique_ptr mtx = + Csr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + nnz, std::make_shared()); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + r[0] = 0; + r[1] = 2; + r[2] = 4; + r[3] = 8; + r[4] = 12; + r[5] = 14; + r[6] = 16; + + c[0] = 0; + c[1] = 1; + c[2] = 0; + c[3] = 1; + c[4] = 0; + c[5] = 1; + c[6] = 6; + c[7] = 7; + c[8] = 0; + c[9] = 1; + c[10] = 6; + c[11] = 7; + c[12] = 4; + c[13] = 5; + c[14] = 4; + c[15] = 5; + + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + + v[0] = 1; + v[1] = 2; + v[2] = 3; + v[3] = 0; + v[10] = 0; + v[11] = 0; + v[12] = -12; + v[13] = -1; + v[14] = -2; + v[15] = -11; + + return mtx; + } - gko::Array getNonzerosPerRow() const; + std::unique_ptr extract_diagonal() const + { + gko::Array dvals(exec, nrows); + ValueType *const dv = dvals.get_data(); + dv[0] = 1; + dv[1] = 0; + dv[2] = 0; + dv[3] = 0; + dv[4] = -12; + dv[5] = -11; + return Diagonal::create(exec, nrows, dvals); + } - std::unique_ptr generate_abs_fbcsr() const; + gko::Array getNonzerosPerRow() const + { + return gko::Array(exec, {2, 2, 4, 4, 2, 2}); + } - std::unique_ptr, index_type>> - generate_abs_fbcsr_abstype() const; + /// Fills a view into a FBCSR values array using the sample matrix's data + void fill_value_blocks_view(DenseBlocksView &dbv) const + { + dbv(0, 0, 0) = 1.0; + dbv(0, 0, 1) = 2.0; + dbv(0, 1, 0) = 3.0; + dbv(0, 1, 1) = 0.0; + for (int i = 0; i < 2; ++i) + for (int j = 0; j < 2; ++j) dbv(1, i, j) = 0.15 + FBCSR_TEST_OFFSET; + dbv(2, 0, 0) = 0.15 + FBCSR_TEST_OFFSET; + dbv(2, 0, 1) = 0.15 + FBCSR_TEST_OFFSET; + dbv(2, 1, 0) = 0.0; + dbv(2, 1, 1) = 0.0; + dbv(3, 0, 0) = -12.0; + dbv(3, 0, 1) = -1.0; + dbv(3, 1, 0) = -2.0; + dbv(3, 1, 1) = -11.0; + } - /** - * Fills a view into a FBCSR values array using the sample matrix's data - */ - void fill_value_blocks_view(DenseBlocksView &dbv) const; +private: /// Enables use of literals to instantiate value data template constexpr ValueType sct(U u) const { return static_cast(u); } - - - const size_type nrows; - const size_type ncols; - const size_type nnz; - const size_type nbrows; - const size_type nbcols; - const size_type nbnz; - const int bs; - const std::shared_ptr exec; }; /** @@ -218,20 +556,42 @@ class FbcsrSampleSquare { using index_type = IndexType; using Fbcsr = gko::matrix::Fbcsr; - FbcsrSampleSquare(std::shared_ptr exec); - std::unique_ptr generate_fbcsr() const; + const size_type nrows = 4; + const size_type ncols = 4; + const size_type nnz = 8; + const size_type nbrows = 2; + const size_type nbcols = 2; + const size_type nbnz = 2; + const int bs = 2; + const std::shared_ptr exec; - std::unique_ptr generate_transpose_fbcsr() const; - const size_type nrows; - const size_type ncols; - const size_type nnz; - const size_type nbrows; - const size_type nbcols; - const size_type nbnz; - const int bs; - const std::shared_ptr exec; + FbcsrSampleSquare(std::shared_ptr rexec) + : exec(rexec) + {} + + std::unique_ptr generate_fbcsr() const + { + std::unique_ptr mtx = + Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + nnz, bs); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + r[0] = 0; + r[1] = 1; + r[2] = 2; + c[0] = 1; + c[1] = 1; + + for (IndexType i = 0; i < nnz; i++) v[i] = i; + + return mtx; + } }; /** @@ -244,25 +604,116 @@ class FbcsrSampleComplex { public: using value_type = ValueType; using index_type = IndexType; + using Csr = gko::matrix::Csr; using Fbcsr = gko::matrix::Fbcsr; + static_assert(is_complex(), "Only for complex types!"); - FbcsrSampleComplex(std::shared_ptr exec); - std::unique_ptr generate_fbcsr() const; + const size_type nrows = 6; + const size_type ncols = 8; + const size_type nnz = 16; + const size_type nbrows = 3; + const size_type nbcols = 4; + const size_type nbnz = 4; + const int bs = 2; + const std::shared_ptr exec; - std::unique_ptr generate_conjtranspose_fbcsr() const; + FbcsrSampleComplex(std::shared_ptr rexec) + : exec(rexec) + {} - const size_type nrows; - const size_type ncols; - const size_type nnz; - const size_type nbrows; - const size_type nbcols; - const size_type nbnz; - const int bs; - const std::shared_ptr exec; + std::unique_ptr generate_fbcsr() const + { + std::unique_ptr mtx = + Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + nnz, bs); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + r[0] = 0; + r[1] = 1; + r[2] = 3; + r[3] = 4; + c[0] = 0; + c[1] = 0; + c[2] = 3; + c[3] = 2; + + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + + using namespace std::complex_literals; + v[0] = 1.0 + 1.15i; + v[1] = 2.0 + 2.15i; + v[2] = 3.0 - 3.15i; + v[3] = 0.0 - 0.15i; + v[10] = 0.0; + v[11] = 0.0; + v[12] = -12.0 + 12.15i; + v[13] = -1.0 + 1.15i; + v[14] = -2.0 - 2.15i; + v[15] = -11.0 - 11.15i; + + return mtx; + } + + std::unique_ptr generate_csr() const + { + std::unique_ptr mtx = + Csr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + nnz, std::make_shared()); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + r[0] = 0; + r[1] = 2; + r[2] = 4; + r[3] = 8; + r[4] = 12; + r[5] = 14; + r[6] = 16; + + c[0] = 0; + c[1] = 1; + c[2] = 0; + c[3] = 1; + c[4] = 0; + c[5] = 1; + c[6] = 6; + c[7] = 7; + c[8] = 0; + c[9] = 1; + c[10] = 6; + c[11] = 7; + c[12] = 4; + c[13] = 5; + c[14] = 4; + c[15] = 5; + + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + + using namespace std::complex_literals; + v[0] = 1.0 + 1.15i; + v[1] = 2.0 + 2.15i; + v[2] = 3.0 - 3.15i; + v[3] = 0.0 - 0.15i; + v[10] = 0.0; + v[11] = 0.0; + v[12] = -12.0 + 12.15i; + v[13] = -1.0 + 1.15i; + v[14] = -2.0 - 2.15i; + v[15] = -11.0 - 11.15i; + + return mtx; + } }; } // namespace testing diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt index cb4d2b0217a..c826a9eaca7 100644 --- a/reference/test/matrix/CMakeLists.txt +++ b/reference/test/matrix/CMakeLists.txt @@ -3,7 +3,7 @@ ginkgo_create_test(csr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) -ginkgo_create_test(fbcsr_kernels test_fbcsr_sample) +ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(hybrid_kernels) ginkgo_create_test(identity) ginkgo_create_test(permutation) diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 1a7e00d3f40..92549484030 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -50,6 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/matrix/csr_kernels.hpp" #include "core/matrix/fbcsr_kernels.hpp" #include "core/test/matrix/fbcsr_sample.hpp" #include "core/test/utils.hpp" @@ -79,8 +80,9 @@ class Fbcsr : public ::testing::Test { fbsamplesquare(exec), mtx(fbsample.generate_fbcsr()), refmtx(fbsample.generate_fbcsr()), + ref2mtx(fbsample2.generate_fbcsr()), refcsrmtx(fbsample.generate_csr()), - refdenmtx(fbsample.generate_dense()), + ref2csrmtx(fbsample2.generate_csr()), refspcmtx(fbsample.generate_sparsity_csr()), mtx2(fbsample2.generate_fbcsr()), m2diag(fbsample2.extract_diagonal()), @@ -123,7 +125,9 @@ class Fbcsr : public ::testing::Test { fbsamplesquare; std::unique_ptr mtx; const std::unique_ptr refmtx; + const std::unique_ptr ref2mtx; const std::unique_ptr refcsrmtx; + const std::unique_ptr ref2csrmtx; const std::unique_ptr refdenmtx; const std::unique_ptr refspcmtx; const std::unique_ptr mtx2; @@ -167,12 +171,11 @@ TYPED_TEST(Fbcsr, AppliesToDenseVector) this->mtx2->apply(x.get(), y.get()); - this->fbsample2.apply(x.get(), yref.get()); - // using Csr = typename TestFixture::Csr; - // auto csr_mtx = Csr::create(this->mtx->get_executor(), - // std::make_shared()); - // this->mtx2->convert_to(csr_mtx.get()); - // csr_mtx->apply(x.get(), yref.get()); + using Csr = typename TestFixture::Csr; + auto csr_mtx = Csr::create(this->mtx->get_executor(), + std::make_shared()); + this->mtx2->convert_to(csr_mtx.get()); + csr_mtx->apply(x.get(), yref.get()); const double tolerance = std::numeric_limits>::epsilon(); @@ -200,7 +203,7 @@ TYPED_TEST(Fbcsr, AppliesToDenseMatrix) this->mtx2->apply(x.get(), y.get()); - this->fbsample2.apply(x.get(), yref.get()); + this->ref2csrmtx->apply(x.get(), yref.get()); const double tolerance = std::numeric_limits>::epsilon(); @@ -225,7 +228,6 @@ TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseVector) auto y = Vec::create(this->exec, gko::dim<2>{nrows, 1}); for (index_type i = 0; i < ncols; i++) { - // xvals[i] = std::log(static_cast(static_cast((i+1)^2))); x->at(i, 0) = (i + 1.0) * (i + 1.0); } for (index_type i = 0; i < nrows; i++) { @@ -233,16 +235,11 @@ TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseVector) get_some_number(); } - auto yref = Vec::create(this->exec, gko::dim<2>{nrows, 1}); - yref = y->clone(); + auto yref = y->clone(); this->mtx2->apply(alpha.get(), x.get(), beta.get(), y.get()); - auto prod = Vec::create(this->exec, gko::dim<2>{nrows, 1}); - this->fbsample2.apply(x.get(), prod.get()); - - yref->scale(beta.get()); - yref->add_scaled(alpha.get(), prod.get()); + this->ref2csrmtx->apply(alpha.get(), x.get(), beta.get(), yref.get()); const double tolerance = std::numeric_limits>::epsilon(); @@ -269,8 +266,8 @@ TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseMatrix) for (index_type i = 0; i < ncols; i++) for (index_type j = 0; j < nvecs; j++) { - // xvals[i] = std::log(static_cast(static_cast((i+1)^2))); - x->at(i, j) = (i + 1.0) / (j + 1.0); + x->at(i, j) = + std::log(static_cast(0.1 + static_cast((i + 1) ^ 2))); } for (index_type i = 0; i < nrows; i++) for (index_type j = 0; j < nvecs; j++) { @@ -279,16 +276,11 @@ TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseMatrix) get_some_number(); } - auto yref = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); - yref = y->clone(); + auto yref = y->clone(); this->mtx2->apply(alpha.get(), x.get(), beta.get(), y.get()); - auto prod = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); - this->fbsample2.apply(x.get(), prod.get()); - - yref->scale(beta.get()); - yref->add_scaled(alpha.get(), prod.get()); + this->ref2csrmtx->apply(alpha.get(), x.get(), beta.get(), yref.get()); const double tolerance = std::numeric_limits>::epsilon(); @@ -375,7 +367,9 @@ TYPED_TEST(Fbcsr, ConvertsToDense) this->mtx->convert_to(dense_mtx.get()); - GKO_ASSERT_MTX_NEAR(dense_mtx, this->refdenmtx, 0.0); + auto refdenmtx = Dense::create(this->mtx->get_executor()); + this->refcsrmtx->convert_to(refdenmtx.get()); + GKO_ASSERT_MTX_NEAR(dense_mtx, refdenmtx, 0.0); } @@ -386,7 +380,9 @@ TYPED_TEST(Fbcsr, MovesToDense) this->mtx->move_to(dense_mtx.get()); - GKO_ASSERT_MTX_NEAR(dense_mtx, this->refdenmtx, 0.0); + auto refdenmtx = Dense::create(this->mtx->get_executor()); + this->refcsrmtx->convert_to(refdenmtx.get()); + GKO_ASSERT_MTX_NEAR(dense_mtx, refdenmtx, 0.0); } @@ -395,10 +391,13 @@ TYPED_TEST(Fbcsr, ConvertsToCsr) using Csr = typename TestFixture::Csr; auto csr_mtx = Csr::create(this->mtx->get_executor(), std::make_shared()); - this->mtx->convert_to(csr_mtx.get()); - this->assert_equal_to_mtx(csr_mtx.get()); + + auto csr_mtx_2 = Csr::create(this->mtx->get_executor(), + std::make_shared()); + this->ref2mtx->convert_to(csr_mtx_2.get()); + GKO_ASSERT_MTX_NEAR(csr_mtx_2, this->ref2csrmtx, 0.0); } @@ -546,9 +545,14 @@ TYPED_TEST(Fbcsr, CalculatesNonzerosPerRow) gko::kernels::reference::fbcsr::calculate_nonzeros_per_row( this->exec, this->mtx2.get(), &row_nnz); - auto row_nnz_val = row_nnz.get_data(); - const gko::Array refrnnz = this->fbsample2.getNonzerosPerRow(); + // const gko::Array refrnnz = + // this->fbsample2.getNonzerosPerRow(); + gko::Array refrnnz(this->exec, this->mtx2->get_size()[0]); + gko::kernels::reference::csr ::calculate_nonzeros_per_row( + this->exec, this->ref2csrmtx.get(), &refrnnz); + ASSERT_EQ(row_nnz.get_num_elems(), refrnnz.get_num_elems()); + auto row_nnz_val = row_nnz.get_data(); for (gko::size_type i = 0; i < this->mtx2->get_size()[0]; i++) ASSERT_EQ(row_nnz_val[i], refrnnz.get_const_data()[i]); } @@ -557,24 +561,36 @@ TYPED_TEST(Fbcsr, CalculatesNonzerosPerRow) TYPED_TEST(Fbcsr, SquareMtxIsTransposable) { using Fbcsr = typename TestFixture::Mtx; - auto reftmtx = this->fbsamplesquare.generate_transpose_fbcsr(); + using Csr = typename TestFixture::Csr; + + auto csrmtxsq = + Csr::create(this->exec, std::make_shared()); + this->mtxsq->convert_to(csrmtxsq.get()); + std::unique_ptr reftmtx = csrmtxsq->transpose(); + auto reftmtx_as_csr = static_cast(reftmtx.get()); auto trans = this->mtxsq->transpose(); auto trans_as_fbcsr = static_cast(trans.get()); - GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftmtx, 0.0); + GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftmtx_as_csr, 0.0); } TYPED_TEST(Fbcsr, NonSquareMtxIsTransposable) { using Fbcsr = typename TestFixture::Mtx; - auto reftmtx = this->fbsample2.generate_transpose_fbcsr(); + using Csr = typename TestFixture::Csr; + + auto csrmtx = + Csr::create(this->exec, std::make_shared()); + this->mtx2->convert_to(csrmtx.get()); + std::unique_ptr reftmtx = csrmtx->transpose(); + auto reftmtx_as_csr = static_cast(reftmtx.get()); auto trans = this->mtx2->transpose(); auto trans_as_fbcsr = static_cast(trans.get()); - GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftmtx, 0.0); + GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftmtx_as_csr, 0.0); } @@ -612,13 +628,13 @@ TYPED_TEST(Fbcsr, ExtractsDiagonal) TYPED_TEST(Fbcsr, InplaceAbsolute) { using Mtx = typename TestFixture::Mtx; + using Csr = typename TestFixture::Csr; auto mtx = this->fbsample2.generate_fbcsr(); - const std::unique_ptr refabs = - this->fbsample2.generate_abs_fbcsr(); + const std::unique_ptr refabs = this->ref2csrmtx->clone(); + refabs->compute_absolute_inplace(); using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - const value_type *const refvals = refabs->get_const_values(); mtx->compute_absolute_inplace(); @@ -632,11 +648,12 @@ TYPED_TEST(Fbcsr, OutplaceAbsolute) { using value_type = typename TestFixture::value_type; using Mtx = typename TestFixture::Mtx; + using AbsCsr = typename gko::remove_complex; using AbsMtx = typename gko::remove_complex; auto mtx = this->fbsample2.generate_fbcsr(); - const std::unique_ptr refabs = - this->fbsample2.generate_abs_fbcsr_abstype(); + const std::unique_ptr refabs = + this->ref2csrmtx->compute_absolute(); auto abs_mtx = mtx->compute_absolute(); @@ -654,13 +671,15 @@ class FbcsrComplex : public ::testing::Test { using index_type = typename std::tuple_element<1, decltype(ValueIndexType())>::type; using Mtx = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; }; TYPED_TEST_SUITE(FbcsrComplex, gko::test::ComplexValueIndexTypes); -TYPED_TEST(FbcsrComplex, MtxIsConjugateTransposable) +TYPED_TEST(FbcsrComplex, ConvertsComplexToCsr) { + using Csr = typename TestFixture::Csr; using Fbcsr = typename TestFixture::Mtx; using T = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; @@ -668,9 +687,27 @@ TYPED_TEST(FbcsrComplex, MtxIsConjugateTransposable) auto exec = gko::ReferenceExecutor::create(); gko::testing::FbcsrSampleComplex csample(exec); std::unique_ptr mtx = csample.generate_fbcsr(); - std::unique_ptr reftrans = - csample.generate_conjtranspose_fbcsr(); + auto csr_mtx = + Csr::create(exec, std::make_shared()); + mtx->convert_to(csr_mtx.get()); + GKO_ASSERT_MTX_NEAR(csr_mtx, mtx, 0.0); +} + + +TYPED_TEST(FbcsrComplex, MtxIsConjugateTransposable) +{ + using Csr = typename TestFixture::Csr; + using Fbcsr = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto exec = gko::ReferenceExecutor::create(); + gko::testing::FbcsrSampleComplex csample(exec); + auto csrmtx = csample.generate_csr(); + auto reftranslinop = csrmtx->conj_transpose(); + const Csr *const reftrans = static_cast(reftranslinop.get()); + + std::unique_ptr mtx = csample.generate_fbcsr(); auto trans = mtx->conj_transpose(); auto trans_as_fbcsr = static_cast(trans.get()); @@ -680,27 +717,23 @@ TYPED_TEST(FbcsrComplex, MtxIsConjugateTransposable) TYPED_TEST(FbcsrComplex, InplaceAbsolute) { + using Csr = typename TestFixture::Csr; using Mtx = typename TestFixture::Mtx; using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; gko::testing::FbcsrSample fbsample( gko::ReferenceExecutor::create()); auto mtx = fbsample.generate_fbcsr(); - - std::cout << " Generated fbcsr: " << mtx->get_values()[34] << ", " - << mtx->get_values()[35] << std::endl; - - const std::unique_ptr refabs = fbsample.generate_abs_fbcsr(); + auto csrmtx = fbsample.generate_csr(); using value_type = typename TestFixture::value_type; - using index_type = typename TestFixture::index_type; - const value_type *const refvals = refabs->get_const_values(); mtx->compute_absolute_inplace(); + csrmtx->compute_absolute_inplace(); const gko::remove_complex tolerance = std::numeric_limits>::epsilon(); - GKO_ASSERT_MTX_NEAR(mtx, refabs, tolerance); + GKO_ASSERT_MTX_NEAR(mtx, csrmtx, tolerance); } @@ -715,10 +748,10 @@ TYPED_TEST(FbcsrComplex, OutplaceAbsolute) gko::ReferenceExecutor::create()); auto mtx = fbsample.generate_fbcsr(); - const std::unique_ptr refabs = - fbsample.generate_abs_fbcsr_abstype(); + auto csrmtx = fbsample.generate_csr(); auto abs_mtx = mtx->compute_absolute(); + auto refabs = mtx->compute_absolute(); const gko::remove_complex tolerance = std::numeric_limits>::epsilon(); From 4f48c2d3b0c0ec479972e344f25b2e5e1f23a9a9 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Sat, 19 Dec 2020 00:04:33 +0100 Subject: [PATCH 33/58] moved fbcsr constructor to header file to fix MSVC linker errors Why do we bother with Windoze builds again? --- core/matrix/fbcsr.cpp | 15 --------------- include/ginkgo/core/matrix/fbcsr.hpp | 10 +++++++++- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 88b17928d06..1075a5396a1 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -146,21 +146,6 @@ class DenseBlock final { } // namespace detail -template -Fbcsr::Fbcsr(const std::shared_ptr exec, - const dim<2> &size, - const size_type num_nonzeros, - const int block_size) - : EnableLinOp(exec, size), - bs_{block_size}, - nbcols_{blockutils::getNumBlocks(block_size, size[1])}, - values_(exec, num_nonzeros), - col_idxs_(exec, blockutils::getNumBlocks(block_size * block_size, - num_nonzeros)), - row_ptrs_(exec, blockutils::getNumBlocks(block_size, size[0]) + 1) -{} - - template void Fbcsr::apply_impl(const LinOp *const b, LinOp *const x) const diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 925e7268749..a0b0d5e6525 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -332,7 +332,15 @@ class Fbcsr : public EnableLinOp>, * @param block_size Size of the small dense square blocks */ Fbcsr(std::shared_ptr exec, const dim<2> &size, - size_type num_nonzeros, int block_size); + size_type num_nonzeros, int block_size) + : EnableLinOp(exec, size), + bs_{block_size}, + nbcols_{blockutils::getNumBlocks(block_size, size[1])}, + values_(exec, num_nonzeros), + col_idxs_(exec, blockutils::getNumBlocks(block_size * block_size, + num_nonzeros)), + row_ptrs_(exec, blockutils::getNumBlocks(block_size, size[0]) + 1) + {} /** * Creates a FBCSR matrix from already allocated (and initialized) row From 26566e693564f9ad1e913d6b2eca6b2065df6957 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Mon, 4 Jan 2021 13:07:28 +0100 Subject: [PATCH 34/58] fixed a bug in fbcsr copy, improved tests --- core/matrix/fbcsr.cpp | 2 ++ core/test/matrix/fbcsr.cpp | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 1075a5396a1..455cc88fee8 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -194,6 +194,7 @@ void Fbcsr::convert_to( result->row_ptrs_ = this->row_ptrs_; result->set_size(this->get_size()); result->bs_ = this->bs_; + result->nbcols_ = this->nbcols_; } @@ -214,6 +215,7 @@ void Fbcsr::convert_to( result->row_ptrs_ = this->row_ptrs_; result->set_size(this->get_size()); result->bs_ = this->bs_; + result->nbcols_ = this->nbcols_; } diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index 619315e5c98..236105cfb48 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -97,6 +97,8 @@ class Fbcsr : public ::testing::Test { ASSERT_EQ(m->get_size(), orig_size); ASSERT_EQ(m->get_num_stored_elements(), orig_vals.size()); ASSERT_EQ(m->get_block_size(), bs); + ASSERT_EQ(m->get_num_block_rows(), m->get_size()[0] / bs); + ASSERT_EQ(m->get_num_block_cols(), m->get_size()[1] / bs); for (index_type irow = 0; irow < orig_size[0] / bs; irow++) { @@ -184,6 +186,8 @@ TYPED_TEST(Fbcsr, KnowsItsSize) ASSERT_EQ(this->mtx->get_size(), gko::dim<2>(6, 12)); ASSERT_EQ(this->mtx->get_block_size(), 3); ASSERT_EQ(this->mtx->get_num_stored_elements(), 36); + ASSERT_EQ(this->mtx->get_num_block_rows(), 2); + ASSERT_EQ(this->mtx->get_num_block_cols(), 4); } @@ -278,6 +282,7 @@ TYPED_TEST(Fbcsr, CanBeReadFromMatrixData) using Mtx = typename TestFixture::Mtx; auto m = Mtx::create(this->exec); m->set_block_size(this->fbsample.bs); + ASSERT_EQ(m->get_block_size(), this->fbsample.bs); m->read(this->fbsample.generate_matrix_data()); From cdc9508932b8566b494d883510a7003f3c1b9fc5 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 13 Jan 2021 11:06:13 +0100 Subject: [PATCH 35/58] updated year in headers --- core/components/fixed_block.hpp | 2 +- core/matrix/fbcsr.cpp | 2 +- core/matrix/fbcsr_builder.hpp | 2 +- core/matrix/fbcsr_kernels.hpp | 2 +- core/test/matrix/fbcsr.cpp | 2 +- core/test/matrix/fbcsr_builder.cpp | 2 +- core/test/matrix/fbcsr_sample.hpp | 2 +- cuda/matrix/fbcsr_kernels.cu | 2 +- dpcpp/matrix/fbcsr_kernels.dp.cpp | 2 +- hip/matrix/fbcsr_kernels.hip.cpp | 2 +- include/ginkgo/core/base/blockutils.hpp | 2 +- include/ginkgo/core/matrix/fbcsr.hpp | 2 +- omp/matrix/fbcsr_kernels.cpp | 2 +- reference/matrix/fbcsr_kernels.cpp | 2 +- reference/test/matrix/fbcsr_kernels.cpp | 2 +- 15 files changed, 15 insertions(+), 15 deletions(-) diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index 2e9595cdc04..3059ee11bbb 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 455cc88fee8..a0298f35cf7 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/matrix/fbcsr_builder.hpp b/core/matrix/fbcsr_builder.hpp index e80d76b3eeb..22d0bcfacb9 100644 --- a/core/matrix/fbcsr_builder.hpp +++ b/core/matrix/fbcsr_builder.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp index af7e527a891..332292eb314 100644 --- a/core/matrix/fbcsr_kernels.hpp +++ b/core/matrix/fbcsr_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index 236105cfb48..34dce1af828 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/matrix/fbcsr_builder.cpp b/core/test/matrix/fbcsr_builder.cpp index 684aa2e324e..3a4bf358d51 100644 --- a/core/test/matrix/fbcsr_builder.cpp +++ b/core/test/matrix/fbcsr_builder.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 082fb5f0342..52f8e66acd5 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu index b8d397ef5fc..b4da9345e48 100644 --- a/cuda/matrix/fbcsr_kernels.cu +++ b/cuda/matrix/fbcsr_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/dpcpp/matrix/fbcsr_kernels.dp.cpp b/dpcpp/matrix/fbcsr_kernels.dp.cpp index 72a708bc5ff..cebba0ed528 100644 --- a/dpcpp/matrix/fbcsr_kernels.dp.cpp +++ b/dpcpp/matrix/fbcsr_kernels.dp.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp index ed2eb2667c1..fd30554a469 100644 --- a/hip/matrix/fbcsr_kernels.hip.cpp +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/include/ginkgo/core/base/blockutils.hpp b/include/ginkgo/core/base/blockutils.hpp index f3664e1d092..0638390ad1a 100644 --- a/include/ginkgo/core/base/blockutils.hpp +++ b/include/ginkgo/core/base/blockutils.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index a0b0d5e6525..7b3cc390769 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/omp/matrix/fbcsr_kernels.cpp b/omp/matrix/fbcsr_kernels.cpp index 442d6f7c107..15f63ee9794 100644 --- a/omp/matrix/fbcsr_kernels.cpp +++ b/omp/matrix/fbcsr_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index c3532238db6..108697537df 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 92549484030..11c5d1bfa7a 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without From 0e325c02f90d5c9d4f298411f293dd948628a671 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 13 Jan 2021 11:23:50 +0100 Subject: [PATCH 36/58] review comments: removed FixedBlock for now, renamed getNumBlocks to get_num_blocks, fixed fbcsr doc block --- core/components/fixed_block.hpp | 46 ------------------------- core/matrix/fbcsr.cpp | 8 ++--- include/ginkgo/core/base/blockutils.hpp | 2 +- include/ginkgo/core/matrix/fbcsr.hpp | 23 ++++--------- 4 files changed, 12 insertions(+), 67 deletions(-) diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index 3059ee11bbb..32e53510f74 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -44,52 +44,6 @@ namespace gko { namespace blockutils { -/** - * @brief A dense block of values with compile-time constant dimensions - * - * The blocks are interpreted as row-major. However, in the future, - * a layout template parameter can be added if needed. - * - * The primary use is to reinterpret subsets of entries in a big array as - * small dense blocks. - * - * @tparam ValueType The numeric type of entries of the block - * @tparam nrows Number of rows - * @tparam ncols Number of columns - */ -template -class FixedBlock final { - static_assert(nrows > 0, "Requires positive number of rows!"); - static_assert(ncols > 0, "Requires positive number of columns!"); - -public: - using value_type = ValueType; - - value_type &at(const int row, const int col) - { - return vals[row * ncols + col]; - } - - const value_type &at(const int row, const int col) const - { - return vals[row * ncols + col]; - } - - value_type &operator()(const int row, const int col) - { - return at(row, col); - } - - const value_type &operator()(const int row, const int col) const - { - return at(row, col); - } - -private: - ValueType vals[nrows * ncols]; -}; - - /** * @brief A view into a an array of dense blocks of some runtime-defined size * diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index a0298f35cf7..d709639a4da 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -270,13 +270,13 @@ template void Fbcsr::convert_to( SparsityCsr *const result) const { - using blockutils::getNumBlocks; + using blockutils::get_num_blocks; auto exec = this->get_executor(); auto tmp = SparsityCsr::create( exec, - gko::dim<2>{getNumBlocks(bs_, this->get_size()[0]), - getNumBlocks(bs_, this->get_size()[1])}, - getNumBlocks(bs_ * bs_, this->get_num_stored_elements())); + gko::dim<2>{get_num_blocks(bs_, this->get_size()[0]), + get_num_blocks(bs_, this->get_size()[1])}, + get_num_blocks(bs_ * bs_, this->get_num_stored_elements())); tmp->col_idxs_ = this->col_idxs_; tmp->row_ptrs_ = this->row_ptrs_; diff --git a/include/ginkgo/core/base/blockutils.hpp b/include/ginkgo/core/base/blockutils.hpp index 0638390ad1a..c0b16ef6ac1 100644 --- a/include/ginkgo/core/base/blockutils.hpp +++ b/include/ginkgo/core/base/blockutils.hpp @@ -63,7 +63,7 @@ class BlockSizeError : public Error { * but throws when they don't divide */ template -IndexType getNumBlocks(const int block_size, const IndexType size) +IndexType get_num_blocks(const int block_size, const IndexType size) { if (size % block_size != 0) throw BlockSizeError(__FILE__, __LINE__, block_size, size); diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 7b3cc390769..6eb4b061724 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -61,6 +61,8 @@ class FbcsrBuilder; /** + * @brief Fixed-block compressed sparse row storage matrix format + * * FBCSR is a matrix format meant for matrices having a natural block structure * made up of small, dense, disjoint blocks. It is similar to CSR \sa Csr. * However, unlike Csr, each non-zero location stores a small dense block of @@ -85,22 +87,11 @@ class FbcsrBuilder; * matrix::Fbcsr *A, *B, *C; // matrices * matrix::Dense *b, *x; // vectors tall-and-skinny matrices * matrix::Dense *alpha, *beta; // scalars of dimension 1x1 - * matrix::Identity *I; // identity matrix * * // Applying to Dense matrices computes an SpMV/SpMM product * A->apply(b, x) // x = A*b * A->apply(alpha, b, beta, x) // x = alpha*A*b + beta*x - * - * // Applying to Fbcsr matrices computes a SpGEMM product of two sparse - * matrices A->apply(B, C) // C = A*B A->apply(alpha, B, beta, C) - * // C = alpha*A*B + beta*C - * - * // Applying to an Identity matrix computes a SpGEAM sparse matrix addition - * A->apply(alpha, I, beta, B) // B = alpha*A + beta*B * ``` - * Both the SpGEMM and SpGEAM operation require the input matrices to be sorted - * by block-column index, otherwise the algorithms will produce incorrect - * results. * * @tparam ValueType precision of matrix elements * @tparam IndexType precision of matrix indexes @@ -335,11 +326,11 @@ class Fbcsr : public EnableLinOp>, size_type num_nonzeros, int block_size) : EnableLinOp(exec, size), bs_{block_size}, - nbcols_{blockutils::getNumBlocks(block_size, size[1])}, + nbcols_{blockutils::get_num_blocks(block_size, size[1])}, values_(exec, num_nonzeros), - col_idxs_(exec, blockutils::getNumBlocks(block_size * block_size, - num_nonzeros)), - row_ptrs_(exec, blockutils::getNumBlocks(block_size, size[0]) + 1) + col_idxs_(exec, blockutils::get_num_blocks(block_size * block_size, + num_nonzeros)), + row_ptrs_(exec, blockutils::get_num_blocks(block_size, size[0]) + 1) {} /** @@ -370,7 +361,7 @@ class Fbcsr : public EnableLinOp>, RowPtrsArray &&row_ptrs) : EnableLinOp(exec, size), bs_{block_size}, - nbcols_{blockutils::getNumBlocks(block_size, size[1])}, + nbcols_{blockutils::get_num_blocks(block_size, size[1])}, values_{exec, std::forward(values)}, col_idxs_{exec, std::forward(col_idxs)}, row_ptrs_{exec, std::forward(row_ptrs)} From aeefc603e04124b454453718c323b26e8e72d237 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 13 Jan 2021 11:43:39 +0100 Subject: [PATCH 37/58] first fbcsr constructor now takes optional block size --- include/ginkgo/core/matrix/fbcsr.hpp | 8 +++++--- reference/test/matrix/fbcsr_kernels.cpp | 2 -- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 6eb4b061724..e783d50e078 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -306,12 +306,14 @@ class Fbcsr : public EnableLinOp>, protected: /** - * Creates an uninitialized FBCSR matrix with a block size of 1. + * Creates an uninitialized FBCSR matrix with the given block size. * * @param exec Executor associated to the matrix + * @param block_size The desired size of the dense square nonzero blocks; + * defaults to 1. */ - Fbcsr(std::shared_ptr exec) - : Fbcsr(std::move(exec), dim<2>{}, {}, 1) + Fbcsr(std::shared_ptr exec, int block_size = 1) + : Fbcsr(std::move(exec), dim<2>{}, {}, block_size) {} /** diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 11c5d1bfa7a..8299f36465e 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -545,8 +545,6 @@ TYPED_TEST(Fbcsr, CalculatesNonzerosPerRow) gko::kernels::reference::fbcsr::calculate_nonzeros_per_row( this->exec, this->mtx2.get(), &row_nnz); - // const gko::Array refrnnz = - // this->fbsample2.getNonzerosPerRow(); gko::Array refrnnz(this->exec, this->mtx2->get_size()[0]); gko::kernels::reference::csr ::calculate_nonzeros_per_row( this->exec, this->ref2csrmtx.get(), &refrnnz); From 0980e93faccaec8e299bba4bcd13dd43f395d1bc Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 13 Jan 2021 20:31:57 +0100 Subject: [PATCH 38/58] Incorporated suggestions from review Co-authored-by: Terry Cojean Co-authored-by: Pratik Nayak --- core/components/fixed_block.hpp | 8 +- core/matrix/fbcsr.cpp | 32 +-- core/matrix/fbcsr_builder.hpp | 4 +- core/test/matrix/fbcsr.cpp | 29 +- core/test/matrix/fbcsr_sample.hpp | 336 +++++------------------- hip/matrix/fbcsr_kernels.hip.cpp | 2 +- include/ginkgo/core/base/blockutils.hpp | 21 +- include/ginkgo/core/matrix/fbcsr.hpp | 16 +- omp/matrix/fbcsr_kernels.cpp | 2 +- reference/matrix/fbcsr_kernels.cpp | 5 +- reference/test/matrix/fbcsr_kernels.cpp | 28 +- 11 files changed, 141 insertions(+), 342 deletions(-) diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index 32e53510f74..9bc8facc041 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -61,10 +61,10 @@ class DenseBlocksView final { using index_type = IndexType; /** - * @param buffer Segment of memory to be interpreted as an array of 2D - * blocks - * @param num_rows Number of rows in each block - * @param num_cols Number of columns in each block + * @param buffer Pointer to the segment of memory to be interpreted as + * an array of 2D blocks + * @param num_rows Number of rows in each block + * @param num_cols Number of columns in each block */ DenseBlocksView(ValueType *const buffer, const int num_rows, const int num_cols) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index d709639a4da..dbd21047da3 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -89,7 +89,7 @@ namespace detail { * * @internal Currently used only while reading a FBCSR matrix from matrix_data. * - * @tparam ValueType The numeric type of entries of the block + * @tparam ValueType The numeric type of entries of the block */ template class DenseBlock final { @@ -153,8 +153,7 @@ void Fbcsr::apply_impl(const LinOp *const b, using Dense = Dense; if (auto b_fbcsr = dynamic_cast *>(b)) { // if b is a FBCSR matrix, we compute a SpGeMM - throw /*::gko::*/ NotImplemented(__FILE__, __LINE__, - "SpGeMM for Fbcsr"); + throw NotImplemented(__FILE__, __LINE__, "SpGeMM for Fbcsr"); } else { // otherwise we assume that b is dense and compute a SpMV/SpMM this->get_executor()->run( @@ -293,24 +292,22 @@ void Fbcsr::move_to( } -/* Within blocks, the storage order is row-major. - * Currently, this implementation is sequential and has complexity O(n log n) - * assuming nnz = O(n). - * Can this be changed to a parallel O(n) implementation? +/* + * Currently, this implementation is sequential and has complexity + * O(nnz log(nnz)). + * @note Can this be changed to a parallel O(nnz) implementation? */ template void Fbcsr::read(const mat_data &data) { - if (data.nonzeros.size() > std::numeric_limits::max()) - throw std::range_error(std::string("file: ") + __FILE__ + ":" + - std::to_string(__LINE__) + - ": List of nonzeros is too big!"); + GKO_ENSURE_IN_BOUNDS(data.nonzeros.size(), + std::numeric_limits::max()); const auto nnz = static_cast(data.nonzeros.size()); const int bs = this->bs_; - using Blk_t = detail::DenseBlock; + using Block_t = detail::DenseBlock; struct FbEntry { index_type block_row; @@ -328,7 +325,7 @@ void Fbcsr::read(const mat_data &data) }; auto create_block_map = [nnz, bs](const mat_data &mdata) { - std::map blocks; + std::map blocks; for (index_type inz = 0; inz < nnz; inz++) { const index_type row = mdata.nonzeros[inz].row; const index_type col = mdata.nonzeros[inz].column; @@ -339,22 +336,21 @@ void Fbcsr::read(const mat_data &data) const index_type blockrow = row / bs; const index_type blockcol = col / bs; - Blk_t &nnzblk = blocks[{blockrow, blockcol}]; + Block_t &nnzblk = blocks[{blockrow, blockcol}]; if (nnzblk.size() == 0) { nnzblk.resize(bs, bs); nnzblk.zero(); nnzblk(localrow, localcol) = val; } else { - if (nnzblk(localrow, localcol) != gko::zero()) - throw Error(__FILE__, __LINE__, - "Error: re-visited the same non-zero!"); + // If this does not happen, we re-visited a nonzero + assert(nnzblk(localrow, localcol) == gko::zero()); nnzblk(localrow, localcol) = val; } } return blocks; }; - const std::map blocks = create_block_map(data); + const std::map blocks = create_block_map(data); auto tmp = Fbcsr::create(this->get_executor()->get_master(), data.size, blocks.size() * bs * bs, bs); diff --git a/core/matrix/fbcsr_builder.hpp b/core/matrix/fbcsr_builder.hpp index 22d0bcfacb9..df10c2a3a57 100644 --- a/core/matrix/fbcsr_builder.hpp +++ b/core/matrix/fbcsr_builder.hpp @@ -68,8 +68,8 @@ class FbcsrBuilder { int get_block_size() const { return matrix_->bs_; } /** - * @param matrix An existing FBCSR matrix - * for which intrusive access is needed + * @param matrix An existing FBCSR matrix + * for which intrusive access is needed */ explicit FbcsrBuilder(Fbcsr *const matrix) : matrix_{matrix} diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index 34dce1af828..a38e1b32695 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include +#include #include @@ -71,10 +71,9 @@ class Fbcsr : public ::testing::Test { orig_rowptrs.resize(fbsample.nbrows + 1); orig_colinds.resize(fbsample.nbnz); orig_vals.resize(fbsample.nnz); - for (index_type i = 0; i < fbsample.nbrows + 1; i++) - orig_rowptrs[i] = r[i]; - for (index_type i = 0; i < fbsample.nbnz; i++) orig_colinds[i] = c[i]; - for (index_type i = 0; i < fbsample.nnz; i++) orig_vals[i] = v[i]; + std::copy(r, r + fbsample.nbrows + 1, orig_rowptrs.data()); + std::copy(c, c + fbsample.nbnz, orig_colinds.data()); + std::copy(v, v + fbsample.nnz, orig_vals.data()); } std::shared_ptr exec; @@ -197,6 +196,15 @@ TYPED_TEST(Fbcsr, ContainsCorrectData) } +TYPED_TEST(Fbcsr, BlockSizeIsSetCorrectly) +{ + using Mtx = typename TestFixture::Mtx; + auto m = Mtx::create(this->exec); + m->set_block_size(6); + ASSERT_EQ(m->get_block_size(), 6); +} + + TYPED_TEST(Fbcsr, CanBeEmpty) { using Mtx = typename TestFixture::Mtx; @@ -282,7 +290,6 @@ TYPED_TEST(Fbcsr, CanBeReadFromMatrixData) using Mtx = typename TestFixture::Mtx; auto m = Mtx::create(this->exec); m->set_block_size(this->fbsample.bs); - ASSERT_EQ(m->get_block_size(), this->fbsample.bs); m->read(this->fbsample.generate_matrix_data()); @@ -294,16 +301,14 @@ TYPED_TEST(Fbcsr, GeneratesCorrectMatrixData) { using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - using tpl = typename gko::matrix_data::nonzero_type; - gko::matrix_data data; - - this->mtx->write(data); - data.ensure_row_major_order(); - gko::matrix_data refdata = this->fbsample.generate_matrix_data_with_explicit_zeros(); refdata.ensure_row_major_order(); + gko::matrix_data data; + this->mtx->write(data); + data.ensure_row_major_order(); + ASSERT_EQ(data.size, refdata.size); ASSERT_EQ(data.nonzeros.size(), refdata.nonzeros.size()); for (size_t i = 0; i < data.nonzeros.size(); i++) diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 52f8e66acd5..098aef69aa1 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -43,12 +43,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/fixed_block.hpp" +#include "core/test/utils.hpp" #define FBCSR_TEST_OFFSET 0.000011118888 - #define FBCSR_TEST_C_MAG 0.1 + FBCSR_TEST_OFFSET - #define FBCSR_TEST_IMAGINARY \ sct(std::complex>(0, FBCSR_TEST_C_MAG)) @@ -150,101 +149,20 @@ class FbcsrSample { */ std::unique_ptr generate_csr() const { - std::unique_ptr csrm = - Csr::create(exec, gko::dim<2>{nrows, ncols}, nnz, - std::make_shared()); - index_type *const csrrow = csrm->get_row_ptrs(); - index_type *const csrcols = csrm->get_col_idxs(); - value_type *const csrvals = csrm->get_values(); - - csrrow[0] = 0; - csrrow[1] = 6; - csrrow[2] = 12; - csrrow[3] = 18; - csrrow[4] = 24; - csrrow[5] = 30; - csrrow[6] = 36; - - csrcols[0] = 3; - csrvals[0] = 2; - csrcols[1] = 4; - csrvals[1] = 3; - csrcols[2] = 5; - csrvals[2] = 4; - csrcols[6] = 3; - csrvals[6] = 5; - csrcols[7] = 4; - csrvals[7] = 6; - csrcols[8] = 5; - csrvals[8] = 7; - csrcols[12] = 3; - csrvals[12] = 0; - csrcols[13] = 4; - csrvals[13] = 9; - csrcols[14] = 5; - csrvals[14] = 0; - - csrcols[3] = 9; - csrvals[3] = 4; - csrcols[4] = 10; - csrvals[4] = 5; - csrcols[5] = 11; - csrvals[5] = 6; - csrcols[9] = 9; - csrvals[9] = 7; - csrcols[10] = 10; - csrvals[10] = 8; - csrcols[11] = 11; - csrvals[11] = 9; - csrcols[15] = 9; - csrvals[15] = 10; - csrcols[16] = 10; - csrvals[16] = 11; - csrcols[17] = 11; - csrvals[17] = 12; - - csrcols[18] = 0; - csrvals[18] = 2; - csrcols[19] = 1; - csrvals[19] = 3; - csrcols[20] = 2; - csrvals[20] = 4; - csrcols[24] = 0; - csrvals[24] = 5; - csrcols[25] = 1; - csrvals[25] = 6; - csrcols[26] = 2; - csrvals[26] = 7; - csrcols[30] = 0; - csrvals[30] = 8; - csrcols[31] = 1; - csrvals[31] = 9; - csrcols[32] = 2; - csrvals[32] = 10; - - csrcols[21] = 6; - csrvals[21] = 0; - csrcols[22] = 7; - csrvals[22] = 7; - csrcols[23] = 8; - csrvals[23] = 8; - csrcols[27] = 6; - csrvals[27] = 9; - csrcols[28] = 7; - csrvals[28] = 10; - csrcols[29] = 8; - csrvals[29] = 11; - csrcols[33] = 6; - csrvals[33] = 12; - csrcols[34] = 7; - csrvals[34] = 13; - csrcols[35] = 8; - csrvals[35] = 14; - - csrvals[34] += FBCSR_TEST_IMAGINARY; - csrvals[35] += FBCSR_TEST_IMAGINARY; - - return csrm; + gko::Array csrrow(exec, {0, 6, 12, 18, 24, 30, 36}); + gko::Array csrcols( + exec, {3, 4, 5, 9, 10, 11, 3, 4, 5, 9, 10, 11, 3, 4, 5, 9, 10, 11, + 0, 1, 2, 6, 7, 8, 0, 1, 2, 6, 7, 8, 0, 1, 2, 6, 7, 8}); + // clang-format off + gko::Array csrvals(exec, I + {2, 3, 4, 4, 5, 6, 5, 6, 7, 7, 8, 9, 0, 9, 0, + 10, 11, 12, 2, 3, 4, 0, 7, 8, 5, 6, 7, + 9, 10, 11, 8, 9, 10, 12, + sct(13.0) + FBCSR_TEST_IMAGINARY, + sct(14.0) + FBCSR_TEST_IMAGINARY}); + // clang-format on + return Csr::create(exec, gko::dim<2>{nrows, ncols}, csrvals, csrcols, + csrrow); } /** @@ -317,46 +235,12 @@ class FbcsrSample { */ MatData generate_matrix_data_with_explicit_zeros() const { - return MatData({{6, 12}, - {{0, 3, 2.0}, - {0, 4, 3.0}, - {0, 5, 4.0}, - {1, 3, 5.0}, - {1, 4, 6.0}, - {1, 5, 7.0}, - {2, 3, 0.0}, - {2, 4, 9.0}, - {2, 5, 0.0}, - - {0, 9, 4.0}, - {0, 10, 5.0}, - {0, 11, 6.0}, - {1, 9, 7.0}, - {1, 10, 8.0}, - {1, 11, 9.0}, - {2, 9, 10.0}, - {2, 10, 11.0}, - {2, 11, 12.0}, - - {3, 0, 2.0}, - {3, 1, 3.0}, - {3, 2, 4.0}, - {4, 0, 5.0}, - {4, 1, 6.0}, - {4, 2, 7.0}, - {5, 0, 8.0}, - {5, 1, 9.0}, - {5, 2, 10.0}, - - {3, 6, 0.0}, - {3, 7, 7.0}, - {3, 8, 8.0}, - {4, 6, 9.0}, - {4, 7, 10.0}, - {4, 8, 11.0}, - {5, 6, 12.0}, - {5, 7, sct(13.0) + FBCSR_TEST_IMAGINARY}, - {5, 8, sct(14.0) + FBCSR_TEST_IMAGINARY}}}); + auto mdata = generate_matrix_data(); + mdata.nonzeros.push_back({2, 3, 0.0}); + mdata.nonzeros.push_back({2, 5, 0.0}); + mdata.nonzeros.push_back({3, 6, 0.0}); + mdata.ensure_row_major_order(); + return mdata; } private: @@ -411,24 +295,10 @@ class FbcsrSample2 { std::unique_ptr generate_fbcsr() const { - std::unique_ptr mtx = - Fbcsr::create(exec, - gko::dim<2>{static_cast(nrows), - static_cast(ncols)}, - nnz, bs); - - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); - r[0] = 0; - r[1] = 1; - r[2] = 3; - r[3] = 4; - c[0] = 0; - c[1] = 0; - c[2] = 3; - c[3] = 2; - + gko::Array r(exec, {0, 1, 3, 4}); + gko::Array c(exec, {0, 0, 3, 2}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; v[0] = 1; @@ -442,47 +312,20 @@ class FbcsrSample2 { v[14] = -2; v[15] = -11; - return mtx; + return Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + bs, vals, c, r); } std::unique_ptr generate_csr() const { - std::unique_ptr mtx = - Csr::create(exec, - gko::dim<2>{static_cast(nrows), - static_cast(ncols)}, - nnz, std::make_shared()); - - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); - r[0] = 0; - r[1] = 2; - r[2] = 4; - r[3] = 8; - r[4] = 12; - r[5] = 14; - r[6] = 16; - - c[0] = 0; - c[1] = 1; - c[2] = 0; - c[3] = 1; - c[4] = 0; - c[5] = 1; - c[6] = 6; - c[7] = 7; - c[8] = 0; - c[9] = 1; - c[10] = 6; - c[11] = 7; - c[12] = 4; - c[13] = 5; - c[14] = 4; - c[15] = 5; - + gko::Array r(exec, {0, 2, 4, 8, 12, 14, 16}); + gko::Array c( + exec, {0, 1, 0, 1, 0, 1, 6, 7, 0, 1, 6, 7, 4, 5, 4, 5}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; - v[0] = 1; v[1] = 2; v[2] = 3; @@ -494,19 +337,16 @@ class FbcsrSample2 { v[14] = -2; v[15] = -11; - return mtx; + return Csr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + vals, c, r, + std::make_shared()); } std::unique_ptr extract_diagonal() const { - gko::Array dvals(exec, nrows); - ValueType *const dv = dvals.get_data(); - dv[0] = 1; - dv[1] = 0; - dv[2] = 0; - dv[3] = 0; - dv[4] = -12; - dv[5] = -11; + gko::Array dvals(exec, {1, 0, 0, 0, -12, -11}); return Diagonal::create(exec, nrows, dvals); } @@ -573,24 +413,16 @@ class FbcsrSampleSquare { std::unique_ptr generate_fbcsr() const { - std::unique_ptr mtx = - Fbcsr::create(exec, - gko::dim<2>{static_cast(nrows), - static_cast(ncols)}, - nnz, bs); - - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); - r[0] = 0; - r[1] = 1; - r[2] = 2; - c[0] = 1; - c[1] = 1; - + gko::Array c(exec, {1, 1}); + gko::Array r(exec, {0, 1, 2}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); for (IndexType i = 0; i < nnz; i++) v[i] = i; - return mtx; + return Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + bs, vals, c, r); } }; @@ -627,24 +459,10 @@ class FbcsrSampleComplex { std::unique_ptr generate_fbcsr() const { - std::unique_ptr mtx = - Fbcsr::create(exec, - gko::dim<2>{static_cast(nrows), - static_cast(ncols)}, - nnz, bs); - - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); - r[0] = 0; - r[1] = 1; - r[2] = 3; - r[3] = 4; - c[0] = 0; - c[1] = 0; - c[2] = 3; - c[3] = 2; - + gko::Array r(exec, {0, 1, 3, 4}); + gko::Array c(exec, {0, 0, 3, 2}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; using namespace std::complex_literals; @@ -659,45 +477,19 @@ class FbcsrSampleComplex { v[14] = -2.0 - 2.15i; v[15] = -11.0 - 11.15i; - return mtx; + return Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + bs, vals, c, r); } std::unique_ptr generate_csr() const { - std::unique_ptr mtx = - Csr::create(exec, - gko::dim<2>{static_cast(nrows), - static_cast(ncols)}, - nnz, std::make_shared()); - - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); - r[0] = 0; - r[1] = 2; - r[2] = 4; - r[3] = 8; - r[4] = 12; - r[5] = 14; - r[6] = 16; - - c[0] = 0; - c[1] = 1; - c[2] = 0; - c[3] = 1; - c[4] = 0; - c[5] = 1; - c[6] = 6; - c[7] = 7; - c[8] = 0; - c[9] = 1; - c[10] = 6; - c[11] = 7; - c[12] = 4; - c[13] = 5; - c[14] = 4; - c[15] = 5; - + gko::Array r(exec, {0, 2, 4, 8, 12, 14, 16}); + gko::Array c( + exec, {0, 1, 0, 1, 0, 1, 6, 7, 0, 1, 6, 7, 4, 5, 4, 5}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; using namespace std::complex_literals; @@ -712,7 +504,11 @@ class FbcsrSampleComplex { v[14] = -2.0 - 2.15i; v[15] = -11.0 - 11.15i; - return mtx; + return Csr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + vals, c, r, + std::make_shared()); } }; diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp index fd30554a469..3bf18bc6515 100644 --- a/hip/matrix/fbcsr_kernels.hip.cpp +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -53,7 +53,7 @@ namespace gko { namespace kernels { namespace hip { /** - * @brief The Compressed sparse row matrix format namespace. + * @brief The fixed-size block compressed sparse row matrix format namespace. * * @ingroup fbcsr */ diff --git a/include/ginkgo/core/base/blockutils.hpp b/include/ginkgo/core/base/blockutils.hpp index c0b16ef6ac1..20c6ac5162a 100644 --- a/include/ginkgo/core/base/blockutils.hpp +++ b/include/ginkgo/core/base/blockutils.hpp @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GINKGO_CORE_BASE_BLOCKUTILS_HPP_ -#define GINKGO_CORE_BASE_BLOCKUTILS_HPP_ +#ifndef GINKGO_PUBLIC_CORE_BASE_BLOCKUTILS_HPP_ +#define GINKGO_PUBLIC_CORE_BASE_BLOCKUTILS_HPP_ #include @@ -41,7 +41,12 @@ namespace gko { namespace blockutils { -/// Error that denotes issues between block sizes and matrix dimensions +/** + * Error that denotes issues between block sizes and matrix dimensions + * + * \tparam IntexType Type of index used by the linear algebra object that is + * incompatible with the requried block size. + */ template class BlockSizeError : public Error { public: @@ -57,10 +62,12 @@ class BlockSizeError : public Error { /** * Computes the number of blocks * - * @param block_size The size of each block - * @param size The total size of some array/vector - * @return The quotient of the size divided by the block size - * but throws when they don't divide + * @param block_size The size of each block + * @param size The total size of some array/vector + * @return The number of blocks, ie., + * quotient of the size divided by the block size. + * + * @throw BlockSizeError when block_size does not divide the total size. */ template IndexType get_num_blocks(const int block_size, const IndexType size) diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index e783d50e078..feda178c6a5 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -163,7 +163,9 @@ class Fbcsr : public EnableLinOp>, void move_to(SparsityCsr *result) override; - /** Convert COO data into block CSR + /** + * Reads a @ref matrix_data into Fbcsr format. + * Requires the block size to be set beforehand @sa set_block_size. * * @warning Unlike Csr::read, here explicit non-zeros are NOT dropped. */ @@ -186,7 +188,7 @@ class Fbcsr : public EnableLinOp>, */ void sort_by_column_index(); - /* + /** * Tests if all row entry pairs (value, col_idx) are sorted by column index * * @returns True if all row entry pairs (value, col_idx) are sorted by @@ -328,7 +330,8 @@ class Fbcsr : public EnableLinOp>, size_type num_nonzeros, int block_size) : EnableLinOp(exec, size), bs_{block_size}, - nbcols_{blockutils::get_num_blocks(block_size, size[1])}, + nbcols_{static_cast( + blockutils::get_num_blocks(block_size, size[1]))}, values_(exec, num_nonzeros), col_idxs_(exec, blockutils::get_num_blocks(block_size * block_size, num_nonzeros)), @@ -345,7 +348,7 @@ class Fbcsr : public EnableLinOp>, * * @param exec Executor associated to the matrix * @param size size of the matrix - * @param block_size + * @param block_size Size of the small square dense nonzero blocks * @param values array of matrix values * @param col_idxs array of column indexes * @param row_ptrs array of row pointers @@ -363,7 +366,8 @@ class Fbcsr : public EnableLinOp>, RowPtrsArray &&row_ptrs) : EnableLinOp(exec, size), bs_{block_size}, - nbcols_{blockutils::get_num_blocks(block_size, size[1])}, + nbcols_{static_cast( + blockutils::get_num_blocks(block_size, size[1]))}, values_{exec, std::forward(values)}, col_idxs_{exec, std::forward(col_idxs)}, row_ptrs_{exec, std::forward(row_ptrs)} @@ -380,7 +384,7 @@ class Fbcsr : public EnableLinOp>, private: int bs_; ///< Block size - size_type nbcols_; ///< Number of block-columns + index_type nbcols_; ///< Number of block-columns Array values_; ///< Non-zero values of all blocks Array col_idxs_; ///< Block-column indices of all blocks Array row_ptrs_; ///< Block-row pointers into @ref col_idxs_ diff --git a/omp/matrix/fbcsr_kernels.cpp b/omp/matrix/fbcsr_kernels.cpp index 15f63ee9794..9bc8a091906 100644 --- a/omp/matrix/fbcsr_kernels.cpp +++ b/omp/matrix/fbcsr_kernels.cpp @@ -51,7 +51,7 @@ namespace gko { namespace kernels { namespace omp { /** - * @brief The Compressed sparse row matrix format namespace. + * @brief The fixed-block block compressed sparse row matrix format namespace. * * @ingroup fbcsr */ diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 108697537df..6590c895376 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -58,8 +58,8 @@ namespace gko { namespace kernels { namespace reference { /** - * @brief The Compressed sparse row matrix format namespace. - * @ref Fbcsr + * @brief The fixed-block compressed sparse row matrix format namespace. + * * @ingroup fbcsr */ namespace fbcsr { @@ -83,7 +83,6 @@ void spmv(const std::shared_ptr, auto *const cvals = c->get_values(); for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { - // const IndexType crowblkend = (ibrow + 1) * bs * nvecs; for (IndexType i = ibrow * bs * nvecs; i < (ibrow + 1) * bs * nvecs; ++i) cvals[i] = zero(); diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 8299f36465e..e9fff20c1ec 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -539,7 +539,6 @@ TYPED_TEST(Fbcsr, MovesEmptyToSparsityCsr) TYPED_TEST(Fbcsr, CalculatesNonzerosPerRow) { using IndexType = typename TestFixture::index_type; - gko::Array row_nnz(this->exec, this->mtx2->get_size()[0]); gko::kernels::reference::fbcsr::calculate_nonzeros_per_row( @@ -560,7 +559,6 @@ TYPED_TEST(Fbcsr, SquareMtxIsTransposable) { using Fbcsr = typename TestFixture::Mtx; using Csr = typename TestFixture::Csr; - auto csrmtxsq = Csr::create(this->exec, std::make_shared()); this->mtxsq->convert_to(csrmtxsq.get()); @@ -578,7 +576,6 @@ TYPED_TEST(Fbcsr, NonSquareMtxIsTransposable) { using Fbcsr = typename TestFixture::Mtx; using Csr = typename TestFixture::Csr; - auto csrmtx = Csr::create(this->exec, std::make_shared()); this->mtx2->convert_to(csrmtx.get()); @@ -602,10 +599,10 @@ TYPED_TEST(Fbcsr, RecognizeUnsortedMatrix) { using Fbcsr = typename TestFixture::Mtx; using index_type = typename TestFixture::index_type; - auto cpmat = this->mtx->clone(); index_type *const colinds = cpmat->get_col_idxs(); std::swap(colinds[0], colinds[1]); + ASSERT_FALSE(cpmat->is_sorted_by_column_index()); } @@ -614,6 +611,7 @@ TYPED_TEST(Fbcsr, ExtractsDiagonal) { using T = typename TestFixture::value_type; auto matrix = this->mtx2->clone(); + auto diag = matrix->extract_diagonal(); ASSERT_EQ(this->m2diag->get_size(), diag->get_size()); @@ -625,15 +623,14 @@ TYPED_TEST(Fbcsr, ExtractsDiagonal) TYPED_TEST(Fbcsr, InplaceAbsolute) { + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; using Mtx = typename TestFixture::Mtx; using Csr = typename TestFixture::Csr; auto mtx = this->fbsample2.generate_fbcsr(); const std::unique_ptr refabs = this->ref2csrmtx->clone(); refabs->compute_absolute_inplace(); - using value_type = typename TestFixture::value_type; - using index_type = typename TestFixture::index_type; - mtx->compute_absolute_inplace(); const gko::remove_complex tolerance = @@ -648,7 +645,6 @@ TYPED_TEST(Fbcsr, OutplaceAbsolute) using Mtx = typename TestFixture::Mtx; using AbsCsr = typename gko::remove_complex; using AbsMtx = typename gko::remove_complex; - auto mtx = this->fbsample2.generate_fbcsr(); const std::unique_ptr refabs = this->ref2csrmtx->compute_absolute(); @@ -681,13 +677,14 @@ TYPED_TEST(FbcsrComplex, ConvertsComplexToCsr) using Fbcsr = typename TestFixture::Mtx; using T = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - auto exec = gko::ReferenceExecutor::create(); gko::testing::FbcsrSampleComplex csample(exec); std::unique_ptr mtx = csample.generate_fbcsr(); auto csr_mtx = Csr::create(exec, std::make_shared()); + mtx->convert_to(csr_mtx.get()); + GKO_ASSERT_MTX_NEAR(csr_mtx, mtx, 0.0); } @@ -698,16 +695,15 @@ TYPED_TEST(FbcsrComplex, MtxIsConjugateTransposable) using Fbcsr = typename TestFixture::Mtx; using T = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - auto exec = gko::ReferenceExecutor::create(); gko::testing::FbcsrSampleComplex csample(exec); auto csrmtx = csample.generate_csr(); - auto reftranslinop = csrmtx->conj_transpose(); - const Csr *const reftrans = static_cast(reftranslinop.get()); + auto mtx = csample.generate_fbcsr(); - std::unique_ptr mtx = csample.generate_fbcsr(); + auto reftranslinop = csrmtx->conj_transpose(); + auto reftrans = static_cast(reftranslinop.get()); auto trans = mtx->conj_transpose(); - auto trans_as_fbcsr = static_cast(trans.get()); + auto trans_as_fbcsr = static_cast(trans.get()); GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftrans, 0.0); } @@ -724,8 +720,6 @@ TYPED_TEST(FbcsrComplex, InplaceAbsolute) auto mtx = fbsample.generate_fbcsr(); auto csrmtx = fbsample.generate_csr(); - using value_type = typename TestFixture::value_type; - mtx->compute_absolute_inplace(); csrmtx->compute_absolute_inplace(); @@ -741,10 +735,8 @@ TYPED_TEST(FbcsrComplex, OutplaceAbsolute) using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; using AbsMtx = typename gko::remove_complex; - gko::testing::FbcsrSample fbsample( gko::ReferenceExecutor::create()); - auto mtx = fbsample.generate_fbcsr(); auto csrmtx = fbsample.generate_csr(); From d67d61af169d43b79a8335f922cb66f9ab88f01d Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Thu, 14 Jan 2021 16:06:29 +0100 Subject: [PATCH 39/58] removed extra getter overloads --- core/test/matrix/fbcsr.cpp | 4 ++-- core/test/matrix/fbcsr_sample.hpp | 4 ++-- include/ginkgo/core/matrix/fbcsr.hpp | 24 ------------------------ 3 files changed, 4 insertions(+), 28 deletions(-) diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index a38e1b32695..e7244220d17 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -145,7 +145,7 @@ TYPED_TEST(Fbcsr, SampleGeneratorIsCorrect) fbmtx->get_const_values(), bs, bs); for (index_type ibrow = 0; ibrow < this->fbsample.nbrows; ibrow++) { - const index_type *const browptr = fbmtx->get_row_ptrs(); + const index_type *const browptr = fbmtx->get_const_row_ptrs(); const index_type numblocksbrow = browptr[ibrow + 1] - browptr[ibrow]; for (index_type irow = ibrow * bs; irow < ibrow * bs + bs; irow++) { const index_type rowstart = @@ -154,7 +154,7 @@ TYPED_TEST(Fbcsr, SampleGeneratorIsCorrect) ASSERT_EQ(csmtx->get_const_row_ptrs()[irow], rowstart); } - const index_type *const bcolinds = fbmtx->get_col_idxs(); + const index_type *const bcolinds = fbmtx->get_const_col_idxs(); for (index_type ibnz = browptr[ibrow]; ibnz < browptr[ibrow + 1]; ibnz++) { diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 098aef69aa1..ecd0aaff7f6 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -174,9 +174,9 @@ class FbcsrSample { gko::Array rowptrs(exec, nbrows + 1); const std::unique_ptr fbmat = generate_fbcsr(); for (index_type i = 0; i < nbrows + 1; i++) - rowptrs.get_data()[i] = fbmat->get_row_ptrs()[i]; + rowptrs.get_data()[i] = fbmat->get_const_row_ptrs()[i]; for (index_type i = 0; i < nbnz; i++) - colids.get_data()[i] = fbmat->get_col_idxs()[i]; + colids.get_data()[i] = fbmat->get_const_col_idxs()[i]; return SparCsr::create(exec, gko::dim<2>{nbrows, nbcols}, colids, rowptrs); } diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index feda178c6a5..facb8306a24 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -201,14 +201,6 @@ class Fbcsr : public EnableLinOp>, */ value_type *get_values() noexcept { return values_.get_data(); } - /** - * @see Fbcsr::get_const_values() - */ - const value_type *get_values() const noexcept - { - return values_.get_const_data(); - } - /** * @copydoc Fbcsr::get_values() * @@ -226,14 +218,6 @@ class Fbcsr : public EnableLinOp>, */ index_type *get_col_idxs() noexcept { return col_idxs_.get_data(); } - /** - * @see Fbcsr::get_const_col_idxs() - */ - const index_type *get_col_idxs() const noexcept - { - return col_idxs_.get_const_data(); - } - /** * @copydoc Fbcsr::get_col_idxs() * @@ -251,14 +235,6 @@ class Fbcsr : public EnableLinOp>, */ index_type *get_row_ptrs() noexcept { return row_ptrs_.get_data(); } - /** - * @see Fbcsr::get_const_row_ptrs() - */ - const index_type *get_row_ptrs() const noexcept - { - return row_ptrs_.get_const_data(); - } - /** * @copydoc Fbcsr::get_row_ptrs() * From a06bbae85c92c999a235b5ba3e9ae41a5f4ffd87 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Thu, 14 Jan 2021 20:16:49 +0100 Subject: [PATCH 40/58] incorporated some suggestions from SonarCloud --- core/components/fixed_block.hpp | 11 ++--- core/matrix/fbcsr.cpp | 10 +--- dpcpp/matrix/fbcsr_kernels.dp.cpp | 2 +- include/ginkgo/core/matrix/fbcsr.hpp | 7 ++- omp/matrix/fbcsr_kernels.cpp | 69 +++++++++++++++------------- reference/matrix/fbcsr_kernels.cpp | 12 ++--- 6 files changed, 52 insertions(+), 59 deletions(-) diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index 9bc8facc041..889cb2145a2 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -34,9 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_COMPONENTS_FIXED_BLOCK_HPP_ -#include - - #include @@ -76,8 +73,8 @@ class DenseBlocksView final { return vals_[block * nrows * ncols + row * ncols + col]; } - const typename std::remove_const::type &at( - const index_type block, const int row, const int col) const + const value_type &at(const index_type block, const int row, + const int col) const { return vals_[block * nrows * ncols + row * ncols + col]; } @@ -87,8 +84,8 @@ class DenseBlocksView final { return at(block, row, col); } - const typename std::remove_const::type &operator()( - const index_type block, const int row, const int col) const + const value_type &operator()(const index_type block, const int row, + const int col) const { return at(block, row, col); } diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index dbd21047da3..f0f03224ada 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -96,7 +96,7 @@ class DenseBlock final { public: using value_type = ValueType; - DenseBlock() {} + DenseBlock() = default; DenseBlock(const int num_rows, const int num_cols) : nrows_{num_rows}, ncols_{num_cols}, vals_(num_rows * num_cols) @@ -197,14 +197,6 @@ void Fbcsr::convert_to( } -template -void Fbcsr::move_to( - Fbcsr *const result) -{ - EnableLinOp::move_to(result); -} - - template void Fbcsr::convert_to( Fbcsr, IndexType> *const result) const diff --git a/dpcpp/matrix/fbcsr_kernels.dp.cpp b/dpcpp/matrix/fbcsr_kernels.dp.cpp index cebba0ed528..b31bbc9a479 100644 --- a/dpcpp/matrix/fbcsr_kernels.dp.cpp +++ b/dpcpp/matrix/fbcsr_kernels.dp.cpp @@ -47,7 +47,7 @@ namespace gko { namespace kernels { namespace dpcpp { /** - * @brief The Compressed sparse row matrix format namespace. + * @brief The fixed-size block compressed sparse row matrix format namespace. * * @ingroup fbcsr */ diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index facb8306a24..9e5423395ba 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -130,7 +130,12 @@ class Fbcsr : public EnableLinOp>, void convert_to(Fbcsr *result) const override; - void move_to(Fbcsr *result) override; + /** + * For moving to another Fbcsr of the same type, use the default + * implementation provided by EnableLinOp via the + * EnablePolymorphicAssignment mixin. + */ + using EnableLinOp>::move_to; friend class Fbcsr, IndexType>; diff --git a/omp/matrix/fbcsr_kernels.cpp b/omp/matrix/fbcsr_kernels.cpp index 9bc8a091906..52e8caefb37 100644 --- a/omp/matrix/fbcsr_kernels.cpp +++ b/omp/matrix/fbcsr_kernels.cpp @@ -51,7 +51,7 @@ namespace gko { namespace kernels { namespace omp { /** - * @brief The fixed-block block compressed sparse row matrix format namespace. + * @brief The fixed-block compressed sparse row matrix format namespace. * * @ingroup fbcsr */ @@ -60,20 +60,20 @@ namespace fbcsr { template void spmv(std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Dense *b, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr *const a, + const matrix::Dense *const b, + matrix::Dense *const c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::Dense *const alpha, + const matrix::Fbcsr *const a, + const matrix::Dense *const b, + const matrix::Dense *const beta, + matrix::Dense *const c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); @@ -81,8 +81,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Dense *result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr *const source, + matrix::Dense *const result) + GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); @@ -99,25 +100,26 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -inline void convert_fbcsr_to_csc(size_type num_rows, const IndexType *row_ptrs, - const IndexType *col_idxs, - const ValueType *fbcsr_vals, - IndexType *row_idxs, IndexType *col_ptrs, - ValueType *csc_vals, - UnaryOperator op) GKO_NOT_IMPLEMENTED; +inline void convert_fbcsr_to_csc( + size_type num_rows, const IndexType *const row_ptrs, + const IndexType *const col_idxs, const ValueType *const fbcsr_vals, + IndexType *const row_idxs, IndexType *const col_ptrs, + ValueType *const csc_vals, UnaryOperator op) GKO_NOT_IMPLEMENTED; template -void transpose_and_transform(std::shared_ptr exec, - matrix::Fbcsr *trans, - const matrix::Fbcsr *orig, - UnaryOperator op) GKO_NOT_IMPLEMENTED; +void transpose_and_transform( + std::shared_ptr exec, + matrix::Fbcsr *const trans, + const matrix::Fbcsr *const orig, + UnaryOperator op) GKO_NOT_IMPLEMENTED; template void transpose(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr *const orig, + matrix::Fbcsr *const trans) + GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); @@ -125,8 +127,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Fbcsr *trans) + const matrix::Fbcsr *const orig, + matrix::Fbcsr *const trans) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -136,8 +138,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row( std::shared_ptr exec, - const matrix::Fbcsr *source, - size_type *result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr *const source, + size_type *const result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); @@ -146,8 +148,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row( std::shared_ptr exec, - const matrix::Fbcsr *source, - Array *result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr *const source, + Array *const result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); @@ -156,8 +158,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Fbcsr *to_check, - bool *is_sorted) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr *const to_check, + bool *const is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); @@ -165,8 +167,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr *const orig, + matrix::Diagonal *const diag) + GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 6590c895376..441c0b10d28 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -80,12 +80,10 @@ void spmv(const std::shared_ptr, const blockutils::DenseBlocksView avalues( vals, bs, bs); - auto *const cvals = c->get_values(); - for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { for (IndexType i = ibrow * bs * nvecs; i < (ibrow + 1) * bs * nvecs; ++i) - cvals[i] = zero(); + c->get_values()[i] = zero(); for (IndexType inz = row_ptrs[ibrow]; inz < row_ptrs[ibrow + 1]; ++inz) { @@ -124,12 +122,10 @@ void advanced_spmv(const std::shared_ptr, const blockutils::DenseBlocksView avalues( vals, bs, bs); - auto *const cvals = c->get_values(); - for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { for (IndexType i = ibrow * bs * nvecs; i < (ibrow + 1) * bs * nvecs; ++i) - cvals[i] *= vbeta; + c->get_values()[i] *= vbeta; for (IndexType inz = row_ptrs[ibrow]; inz < row_ptrs[ibrow + 1]; ++inz) { @@ -165,7 +161,7 @@ void convert_to_dense(const std::shared_ptr, const gko::blockutils::DenseBlocksView values( vals, bs, bs); - for (size_type brow = 0; brow < nbrows; ++brow) { + for (IndexType brow = 0; brow < nbrows; ++brow) { for (size_type bcol = 0; bcol < nbcols; ++bcol) { for (int ib = 0; ib < bs; ib++) for (int jb = 0; jb < bs; jb++) @@ -212,7 +208,7 @@ void convert_to_csr(const std::shared_ptr, const gko::blockutils::DenseBlocksView bvalues( bvals, bs, bs); - for (size_type brow = 0; brow < nbrows; ++brow) { + for (IndexType brow = 0; brow < nbrows; ++brow) { const IndexType nz_browstart = browptrs[brow] * bs * bs; const IndexType numblocks_brow = browptrs[brow + 1] - browptrs[brow]; for (int ib = 0; ib < bs; ib++) From a13ed336ae19b72d08b1fd20325e973deb3f0b4f Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Tue, 19 Jan 2021 11:02:15 +0100 Subject: [PATCH 41/58] added test for fbcsr calculate max nnz per row --- reference/test/matrix/fbcsr_kernels.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index e9fff20c1ec..3e1a9eed629 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -555,6 +555,22 @@ TYPED_TEST(Fbcsr, CalculatesNonzerosPerRow) } +TYPED_TEST(Fbcsr, CalculatesMaxNnzPerRow) +{ + using IndexType = typename TestFixture::index_type; + gko::size_type max_row_nnz{}; + + gko::kernels::reference::fbcsr::calculate_max_nnz_per_row( + this->exec, this->mtx2.get(), &max_row_nnz); + + gko::size_type ref_max_row_nnz{}; + gko::kernels::reference::csr::calculate_max_nnz_per_row( + this->exec, this->ref2csrmtx.get(), &ref_max_row_nnz); + + ASSERT_EQ(max_row_nnz, ref_max_row_nnz); +} + + TYPED_TEST(Fbcsr, SquareMtxIsTransposable) { using Fbcsr = typename TestFixture::Mtx; From 8a0216e297734b86446c7f17cd97e08b4cd812cb Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Tue, 19 Jan 2021 18:07:03 +0100 Subject: [PATCH 42/58] fixed doc-string as suggested in review --- core/matrix/fbcsr.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index f0f03224ada..c7d9c6b7b55 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -85,9 +85,10 @@ namespace detail { /** + * @internal * A lightweight dynamic block type on the host * - * @internal Currently used only while reading a FBCSR matrix from matrix_data. + * Currently used only while reading a FBCSR matrix from matrix_data. * * @tparam ValueType The numeric type of entries of the block */ From 818b44ce590af412b5eb8ffbc555b0056ebbdd67 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Tue, 19 Jan 2021 21:16:09 +0100 Subject: [PATCH 43/58] added reference kernel for sorting fbcsr matrix by column --- core/device_hooks/common_kernels.inc.cpp | 6 +++ core/matrix/fbcsr.cpp | 7 +++- core/matrix/fbcsr_kernels.hpp | 6 +++ core/test/matrix/fbcsr_sample.hpp | 46 ++++++++++++++++++++++ cuda/matrix/fbcsr_kernels.cu | 9 +++++ dpcpp/matrix/fbcsr_kernels.dp.cpp | 9 +++++ hip/matrix/fbcsr_kernels.hip.cpp | 9 +++++ omp/matrix/fbcsr_kernels.cpp | 9 +++++ reference/matrix/fbcsr_kernels.cpp | 50 ++++++++++++++++++++++++ reference/test/matrix/fbcsr_kernels.cpp | 17 ++++++++ 10 files changed, 167 insertions(+), 1 deletion(-) diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index b2818c4959a..81d74a33f88 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -860,6 +860,12 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); +template +GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + template GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index c7d9c6b7b55..341840e215a 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -70,6 +70,7 @@ GKO_REGISTER_OPERATION(calculate_nonzeros_per_row, fbcsr::calculate_nonzeros_per_row); GKO_REGISTER_OPERATION(is_sorted_by_column_index, fbcsr::is_sorted_by_column_index); +GKO_REGISTER_OPERATION(sort_by_column_index, fbcsr::sort_by_column_index); GKO_REGISTER_OPERATION(extract_diagonal, fbcsr::extract_diagonal); GKO_REGISTER_OPERATION(fill_array, components::fill_array); GKO_REGISTER_OPERATION(inplace_absolute_array, @@ -447,7 +448,11 @@ std::unique_ptr Fbcsr::conj_transpose() const template -void Fbcsr::sort_by_column_index() GKO_NOT_IMPLEMENTED; +void Fbcsr::sort_by_column_index() +{ + auto exec = this->get_executor(); + exec->run(fbcsr::make_sort_by_column_index(this)); +} template diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp index 332292eb314..5d2492a3b37 100644 --- a/core/matrix/fbcsr_kernels.hpp +++ b/core/matrix/fbcsr_kernels.hpp @@ -95,6 +95,10 @@ namespace kernels { const matrix::Fbcsr *source, \ Array *result) +#define GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \ + void sort_by_column_index(std::shared_ptr exec, \ + matrix::Fbcsr *to_sort) + #define GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType) \ void is_sorted_by_column_index( \ std::shared_ptr exec, \ @@ -125,6 +129,8 @@ namespace kernels { template \ GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType); \ template \ + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType); \ + template \ GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index ecd0aaff7f6..4f6bcc864f2 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -512,6 +512,52 @@ class FbcsrSampleComplex { } }; + +/** + * Generates a fixed-block CSR matrix with longer and unsorted columns + */ +template +class FbcsrSampleUnsorted { +public: + using value_type = ValueType; + using index_type = IndexType; + using Fbcsr = gko::matrix::Fbcsr; + + + const size_type nbrows = 3; + const size_type nbcols = 20; + const size_type nbnz = 30; + const int bs = 3; + const size_type nrows = nbrows * bs; + const size_type ncols = nbcols * bs; + const size_type nnz = nbnz * bs * bs; + const std::shared_ptr exec; + + + FbcsrSampleUnsorted(std::shared_ptr rexec) + : exec(rexec) + {} + + std::unique_ptr generate_fbcsr() const + { + gko::Array r(exec, {0, 8, 19, 30}); + gko::Array c( + exec, {0, 1, 20, 15, 12, 18, 5, 28, 3, 10, 29, 5, 9, 2, 16, + 12, 21, 2, 0, 1, 5, 9, 12, 15, 17, 20, 22, 24, 27, 28}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); + for (IndexType i = 0; i < nnz; i++) { + v[i] = static_cast(i + 0.15 + FBCSR_TEST_OFFSET); + } + + return Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + bs, vals, c, r); + } +}; + + } // namespace testing } // namespace gko diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu index b4da9345e48..056c90d57ed 100644 --- a/cuda/matrix/fbcsr_kernels.cu +++ b/cuda/matrix/fbcsr_kernels.cu @@ -169,6 +169,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); +template +void sort_by_column_index(const std::shared_ptr exec, + matrix::Fbcsr *const to_sort) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + template void extract_diagonal(std::shared_ptr exec, const matrix::Fbcsr *orig, diff --git a/dpcpp/matrix/fbcsr_kernels.dp.cpp b/dpcpp/matrix/fbcsr_kernels.dp.cpp index b31bbc9a479..32244099d6b 100644 --- a/dpcpp/matrix/fbcsr_kernels.dp.cpp +++ b/dpcpp/matrix/fbcsr_kernels.dp.cpp @@ -165,6 +165,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); +template +void sort_by_column_index(const std::shared_ptr exec, + matrix::Fbcsr *const to_sort) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + template void extract_diagonal(std::shared_ptr exec, const matrix::Fbcsr *orig, diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp index 3bf18bc6515..934b7f270cd 100644 --- a/hip/matrix/fbcsr_kernels.hip.cpp +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -166,6 +166,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); +template +void sort_by_column_index(const std::shared_ptr exec, + matrix::Fbcsr *const to_sort) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + template void extract_diagonal(std::shared_ptr exec, const matrix::Fbcsr *orig, diff --git a/omp/matrix/fbcsr_kernels.cpp b/omp/matrix/fbcsr_kernels.cpp index 52e8caefb37..f2747e12e18 100644 --- a/omp/matrix/fbcsr_kernels.cpp +++ b/omp/matrix/fbcsr_kernels.cpp @@ -165,6 +165,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); +template +void sort_by_column_index(const std::shared_ptr exec, + matrix::Fbcsr *const to_sort) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + template void extract_diagonal(std::shared_ptr exec, const matrix::Fbcsr *const orig, diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 441c0b10d28..f56fa70315b 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -390,6 +390,56 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); +template +static void sort_by_column_index_impl( + matrix::Fbcsr *const to_sort) +{ + auto row_ptrs = to_sort->get_const_row_ptrs(); + auto col_idxs = to_sort->get_col_idxs(); + auto values = to_sort->get_values(); + const auto nbrows = to_sort->get_num_block_rows(); + constexpr int bs2 = matBlkSz * matBlkSz; + for (IndexType i = 0; i < nbrows; ++i) { + IndexType *const brow_col_idxs = col_idxs + row_ptrs[i]; + ValueType *const brow_vals = values + row_ptrs[i] * bs2; + const IndexType nbnz_brow = row_ptrs[i + 1] - row_ptrs[i]; + + std::vector col_permute(nbnz_brow); + std::iota(col_permute.begin(), col_permute.end(), 0); + auto helper = detail::IteratorFactory( + brow_col_idxs, col_permute.data(), nbnz_brow); + std::sort(helper.begin(), helper.end()); + + std::vector oldvalues(nbnz_brow * bs2); + std::copy(brow_vals, brow_vals + nbnz_brow * bs2, oldvalues.begin()); + for (IndexType ibz = 0; ibz < nbnz_brow; ibz++) { + for (int i = 0; i < bs2; i++) { + brow_vals[ibz * bs2 + i] = + oldvalues[col_permute[ibz] * bs2 + i]; + } + } + } +} + +template +void sort_by_column_index(const std::shared_ptr exec, + matrix::Fbcsr *const to_sort) +{ + const int bs = to_sort->get_block_size(); + if (bs == 2) + sort_by_column_index_impl<2>(to_sort); + else if (bs == 3) + sort_by_column_index_impl<3>(to_sort); + else if (bs == 4) + sort_by_column_index_impl<4>(to_sort); + else + GKO_NOT_IMPLEMENTED; +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + template void extract_diagonal(std::shared_ptr, const matrix::Fbcsr *const orig, diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 3e1a9eed629..1d639d0df7f 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -623,6 +623,23 @@ TYPED_TEST(Fbcsr, RecognizeUnsortedMatrix) } +TYPED_TEST(Fbcsr, SortUnsortedMatrix) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using Mtx = typename TestFixture::Mtx; + const gko::testing::FbcsrSampleUnsorted fbsample( + this->exec); + auto unsrt_mtx = fbsample.generate_fbcsr(); + auto srt_mtx = unsrt_mtx->clone(); + + srt_mtx->sort_by_column_index(); + + GKO_ASSERT_MTX_NEAR(unsrt_mtx, srt_mtx, 0.0); + ASSERT_TRUE(srt_mtx->is_sorted_by_column_index()); +} + + TYPED_TEST(Fbcsr, ExtractsDiagonal) { using T = typename TestFixture::value_type; From 433655282b2805fabf0b4c42d7be05a36c88d36d Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 20 Jan 2021 01:24:36 +0100 Subject: [PATCH 44/58] Addressed code structures issues raised in review - blockutils.hpp is removed for now, its contents moved to pre-existing files - added braces after control-flow statements - More public functions in class Fbcsr are marked noexecpt --- core/matrix/fbcsr.cpp | 22 +++---- core/test/matrix/fbcsr.cpp | 12 ++-- cuda/matrix/fbcsr_kernels.cu | 7 --- include/ginkgo/core/base/blockutils.hpp | 84 ------------------------- include/ginkgo/core/base/exception.hpp | 24 +++++++ include/ginkgo/core/matrix/fbcsr.hpp | 60 +++++++++++++----- include/ginkgo/ginkgo.hpp | 1 - reference/matrix/fbcsr_kernels.cpp | 1 - reference/test/matrix/fbcsr_kernels.cpp | 3 +- 9 files changed, 90 insertions(+), 124 deletions(-) delete mode 100644 include/ginkgo/core/base/blockutils.hpp diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 341840e215a..b9cc0d8d7ae 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -154,7 +154,7 @@ void Fbcsr::apply_impl(const LinOp *const b, { using Dense = Dense; if (auto b_fbcsr = dynamic_cast *>(b)) { - // if b is a FBCSR matrix, we compute a SpGeMM + // if b is a FBCSR matrix, we need an SpGeMM throw NotImplemented(__FILE__, __LINE__, "SpGeMM for Fbcsr"); } else { // otherwise we assume that b is dense and compute a SpMV/SpMM @@ -172,10 +172,10 @@ void Fbcsr::apply_impl(const LinOp *const alpha, { using Dense = Dense; if (auto b_fbcsr = dynamic_cast *>(b)) { - // if b is a FBCSR matrix, we compute a SpGeMM + // if b is a FBCSR matrix, we need an SpGeMM throw NotImplemented(__FILE__, __LINE__, "Adv SpGeMM for Fbcsr"); } else if (dynamic_cast *>(b)) { - // if b is an identity matrix, we compute an SpGEAM + // if b is an identity matrix, we need an SpGEAM throw NotImplemented(__FILE__, __LINE__, "Adv SpGEAM for Fbcsr"); } else { // otherwise we assume that b is dense and compute a SpMV/SpMM @@ -263,13 +263,12 @@ template void Fbcsr::convert_to( SparsityCsr *const result) const { - using blockutils::get_num_blocks; auto exec = this->get_executor(); auto tmp = SparsityCsr::create( exec, - gko::dim<2>{get_num_blocks(bs_, this->get_size()[0]), - get_num_blocks(bs_, this->get_size()[1])}, - get_num_blocks(bs_ * bs_, this->get_num_stored_elements())); + gko::dim<2>{static_cast(this->get_num_block_rows()), + static_cast(this->get_num_block_cols())}, + this->get_num_stored_blocks()); tmp->col_idxs_ = this->col_idxs_; tmp->row_ptrs_ = this->row_ptrs_; @@ -298,7 +297,6 @@ void Fbcsr::read(const mat_data &data) std::numeric_limits::max()); const auto nnz = static_cast(data.nonzeros.size()); - const int bs = this->bs_; using Block_t = detail::DenseBlock; @@ -362,11 +360,11 @@ void Fbcsr::read(const mat_data &data) GKO_ENSURE_IN_BOUNDS(cur_brow, num_brows); tmp->col_idxs_.get_data()[cur_bnz] = it->first.block_column; - - for (int ibr = 0; ibr < bs; ibr++) - for (int jbr = 0; jbr < bs; jbr++) + for (int ibr = 0; ibr < bs; ibr++) { + for (int jbr = 0; jbr < bs; jbr++) { values(cur_bnz, ibr, jbr) = it->second(ibr, jbr); - + } + } if (it->first.block_row > cur_brow) { tmp->row_ptrs_.get_data()[++cur_brow] = cur_bnz; } else { diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index e7244220d17..3330ab2237a 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -311,8 +311,9 @@ TYPED_TEST(Fbcsr, GeneratesCorrectMatrixData) ASSERT_EQ(data.size, refdata.size); ASSERT_EQ(data.nonzeros.size(), refdata.nonzeros.size()); - for (size_t i = 0; i < data.nonzeros.size(); i++) + for (size_t i = 0; i < data.nonzeros.size(); i++) { ASSERT_EQ(data.nonzeros[i], refdata.nonzeros[i]); + } } @@ -332,10 +333,13 @@ TYPED_TEST(Fbcsr, DenseBlocksViewWorksCorrectly) Dbv refdbv(ref_dbv_array.data(), fbsample.bs, fbsample.bs); fbsample.fill_value_blocks_view(refdbv); - for (index_type ibz = 0; ibz < fbsample.nbnz; ibz++) - for (int i = 0; i < fbsample.bs; ++i) - for (int j = 0; j < fbsample.bs; ++j) + for (index_type ibz = 0; ibz < fbsample.nbnz; ibz++) { + for (int i = 0; i < fbsample.bs; ++i) { + for (int j = 0; j < fbsample.bs; ++j) { ASSERT_EQ(testdbv(ibz, i, j), refdbv(ibz, i, j)); + } + } + } } diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu index 056c90d57ed..2d3677ea6bb 100644 --- a/cuda/matrix/fbcsr_kernels.cu +++ b/cuda/matrix/fbcsr_kernels.cu @@ -57,13 +57,6 @@ namespace cuda { namespace fbcsr { -constexpr int default_block_size = 512; -constexpr int warps_in_block = 4; -constexpr int spmv_block_size = warps_in_block * config::warp_size; -constexpr int wsize = config::warp_size; -constexpr int classical_overweight = 32; - - /** * A compile-time list of the number items per threads for which spmv kernel * should be compiled. diff --git a/include/ginkgo/core/base/blockutils.hpp b/include/ginkgo/core/base/blockutils.hpp deleted file mode 100644 index 20c6ac5162a..00000000000 --- a/include/ginkgo/core/base/blockutils.hpp +++ /dev/null @@ -1,84 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#ifndef GINKGO_PUBLIC_CORE_BASE_BLOCKUTILS_HPP_ -#define GINKGO_PUBLIC_CORE_BASE_BLOCKUTILS_HPP_ - - -#include - - -namespace gko { -namespace blockutils { - - -/** - * Error that denotes issues between block sizes and matrix dimensions - * - * \tparam IntexType Type of index used by the linear algebra object that is - * incompatible with the requried block size. - */ -template -class BlockSizeError : public Error { -public: - BlockSizeError(const std::string &file, const int line, - const int block_size, const IndexType size) - : Error(file, line, - " block size = " + std::to_string(block_size) + - ", size = " + std::to_string(size)) - {} -}; - - -/** - * Computes the number of blocks - * - * @param block_size The size of each block - * @param size The total size of some array/vector - * @return The number of blocks, ie., - * quotient of the size divided by the block size. - * - * @throw BlockSizeError when block_size does not divide the total size. - */ -template -IndexType get_num_blocks(const int block_size, const IndexType size) -{ - if (size % block_size != 0) - throw BlockSizeError(__FILE__, __LINE__, block_size, size); - return size / block_size; -} - - -} // namespace blockutils -} // namespace gko - -#endif diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp index 46fc9db93af..e6e4076164c 100644 --- a/include/ginkgo/core/base/exception.hpp +++ b/include/ginkgo/core/base/exception.hpp @@ -421,6 +421,30 @@ class BadDimension : public Error { }; +/** + * Error that denotes issues between block sizes and matrix dimensions + * + * \tparam IntexType Type of index used by the linear algebra object that is + * incompatible with the requried block size. + */ +template +class BlockSizeError : public Error { +public: + /** + * @param file The name of the offending source file + * @param line The source code line number where the error occurred + * @param block_size Size of small dense blocks in a matrix + * @param size The size that is not exactly divided by the block size + */ + BlockSizeError(const std::string &file, const int line, + const int block_size, const IndexType size) + : Error(file, line, + " block size = " + std::to_string(block_size) + + ", size = " + std::to_string(size)) + {} +}; + + /** * ValueMismatch is thrown if two values are not equal. */ diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 9e5423395ba..ba912d1e034 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -35,7 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #include #include @@ -60,6 +59,32 @@ template class FbcsrBuilder; +namespace detail { + + +/** + * Computes the number of blocks in some array of given size + * + * @param block_size The size of each block + * @param size The total size of some array/vector + * @return The number of blocks, ie., + * quotient of the size divided by the block size. + * + * @throw BlockSizeError when block_size does not divide the total size. + */ +template +IndexType get_num_blocks(const int block_size, const IndexType size) +{ + if (size % block_size != 0) { + throw BlockSizeError(__FILE__, __LINE__, block_size, size); + } + return size / block_size; +} + + +} // namespace detail + + /** * @brief Fixed-block compressed sparse row storage matrix format * @@ -189,7 +214,8 @@ class Fbcsr : public EnableLinOp>, void compute_absolute_inplace() override; /** - * Sorts all (value, col_idx) pairs in each row by column index + * Sorts the values blocks and block-column indices in each row + * by column index */ void sort_by_column_index(); @@ -253,31 +279,37 @@ class Fbcsr : public EnableLinOp>, } /** - * Returns the number of elements explicitly stored in the matrix. - * - * @return the number of elements explicitly stored in the matrix + * @return The number of elements explicitly stored in the matrix */ size_type get_num_stored_elements() const noexcept { return values_.get_num_elems(); } + /** + * @return The number of non-zero blocks explicitly stored in the matrix + */ + size_type get_num_stored_blocks() const noexcept + { + return col_idxs_.get_num_elems(); + } + /** * @return The fixed block size for this matrix */ - int get_block_size() const { return bs_; } + int get_block_size() const noexcept { return bs_; } /** * Set the fixed block size for this matrix * * @param block_size The block size */ - void set_block_size(const int block_size) { bs_ = block_size; } + void set_block_size(const int block_size) noexcept { bs_ = block_size; } /** * @return The number of block-rows in the matrix */ - index_type get_num_block_rows() const + index_type get_num_block_rows() const noexcept { return row_ptrs_.get_num_elems() - 1; } @@ -285,7 +317,7 @@ class Fbcsr : public EnableLinOp>, /** * @return The number of block-columns in the matrix */ - index_type get_num_block_cols() const { return nbcols_; } + index_type get_num_block_cols() const noexcept { return nbcols_; } protected: /** @@ -312,11 +344,11 @@ class Fbcsr : public EnableLinOp>, : EnableLinOp(exec, size), bs_{block_size}, nbcols_{static_cast( - blockutils::get_num_blocks(block_size, size[1]))}, + detail::get_num_blocks(block_size, size[1]))}, values_(exec, num_nonzeros), - col_idxs_(exec, blockutils::get_num_blocks(block_size * block_size, - num_nonzeros)), - row_ptrs_(exec, blockutils::get_num_blocks(block_size, size[0]) + 1) + col_idxs_(exec, detail::get_num_blocks(block_size * block_size, + num_nonzeros)), + row_ptrs_(exec, detail::get_num_blocks(block_size, size[0]) + 1) {} /** @@ -348,7 +380,7 @@ class Fbcsr : public EnableLinOp>, : EnableLinOp(exec, size), bs_{block_size}, nbcols_{static_cast( - blockutils::get_num_blocks(block_size, size[1]))}, + detail::get_num_blocks(block_size, size[1]))}, values_{exec, std::forward(values)}, col_idxs_{exec, std::forward(col_idxs)}, row_ptrs_{exec, std::forward(row_ptrs)} diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 38ac3fd861c..a2b62ec8671 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -39,7 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#include #include #include #include diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index f56fa70315b..39ddaa91116 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -40,7 +40,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #include #include #include diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 1d639d0df7f..341ed67d690 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -648,9 +648,10 @@ TYPED_TEST(Fbcsr, ExtractsDiagonal) auto diag = matrix->extract_diagonal(); ASSERT_EQ(this->m2diag->get_size(), diag->get_size()); - for (gko::size_type i = 0; i < this->m2diag->get_size()[0]; i++) + for (gko::size_type i = 0; i < this->m2diag->get_size()[0]; i++) { ASSERT_EQ(this->m2diag->get_const_values()[i], diag->get_const_values()[i]); + } } From 323083cf3718a17a96f15d719c36c5f966bd6953 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Mon, 25 Jan 2021 21:17:47 +0100 Subject: [PATCH 45/58] Improved test coverage Added tests for BlockSizeError, getNumBlocks and especially FbcsrSample classes. --- core/test/base/exception.cpp | 8 + core/test/matrix/fbcsr.cpp | 268 +++++++++++++++++++------ cuda/test/matrix/CMakeLists.txt | 1 + cuda/test/matrix/fbcsr_kernels.cpp | 93 +++++++++ include/ginkgo/core/base/exception.hpp | 4 +- 5 files changed, 315 insertions(+), 59 deletions(-) create mode 100644 cuda/test/matrix/fbcsr_kernels.cpp diff --git a/core/test/base/exception.cpp b/core/test/base/exception.cpp index 2c1b672f1db..bbbe623dc30 100644 --- a/core/test/base/exception.cpp +++ b/core/test/base/exception.cpp @@ -148,6 +148,14 @@ TEST(ExceptionClasses, DimensionMismatchReturnsCorrectWhatMessage) } +TEST(ExceptionClasses, BlockSizeErrorCorrectWhatMessage) +{ + gko::BlockSizeError error("test_file.cpp", 243, 3, 20); + ASSERT_EQ(std::string("test_file.cpp:243: block size = 3, size = 20"), + error.what()); +} + + TEST(ExceptionClasses, AllocationErrorReturnsCorrectWhatMessage) { gko::AllocationError error("test_file.cpp", 42, "OMP", 135); diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index 3330ab2237a..c4a7e3c3420 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -48,6 +48,192 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { +template +void assert_matrices_are_same( + const gko::matrix::Fbcsr *const bm, + const gko::matrix::Csr *const cm, + const gko::matrix::Diagonal *const diam = nullptr, + const gko::matrix_data *const md = nullptr) +{ + if (cm) { + ASSERT_EQ(bm->get_size(), cm->get_size()); + ASSERT_EQ(bm->get_num_stored_elements(), cm->get_num_stored_elements()); + } + if (md) { + ASSERT_EQ(bm->get_size(), md->size); + ASSERT_EQ(bm->get_num_stored_elements(), md->nonzeros.size()); + } + if (diam) { + const gko::size_type minsize = + std::min(bm->get_size()[0], bm->get_size()[1]); + ASSERT_EQ(minsize, diam->get_size()[0]); + ASSERT_EQ(minsize, diam->get_size()[1]); + } + + const IndexType nbrows = bm->get_num_block_rows(); + const int bs = bm->get_block_size(); + gko::blockutils::DenseBlocksView fbvals( + bm->get_const_values(), bs, bs); + + for (IndexType ibrow = 0; ibrow < nbrows; ibrow++) { + const IndexType *const browptr = bm->get_const_row_ptrs(); + const IndexType numblocksbrow = browptr[ibrow + 1] - browptr[ibrow]; + for (IndexType irow = ibrow * bs; irow < ibrow * bs + bs; irow++) { + const IndexType rowstart = browptr[ibrow] * bs * bs + + (irow - ibrow * bs) * numblocksbrow * bs; + if (cm) { + ASSERT_EQ(cm->get_const_row_ptrs()[irow], rowstart); + } + } + + const IndexType iz_browstart = browptr[ibrow] * bs * bs; + const IndexType *const bcolinds = bm->get_const_col_idxs(); + + for (IndexType ibnz = browptr[ibrow]; ibnz < browptr[ibrow + 1]; + ibnz++) { + const IndexType bcol = bcolinds[ibnz]; + const IndexType blkoffset_frombrowstart = ibnz - browptr[ibrow]; + + for (int ib = 0; ib < bs; ib++) { + const IndexType row = ibrow * bs + ib; + const IndexType inz_rowstart = + iz_browstart + ib * numblocksbrow * bs; + const IndexType inz_blockstart_row = + inz_rowstart + blkoffset_frombrowstart * bs; + + for (int jb = 0; jb < bs; jb++) { + const IndexType col = bcol * bs + jb; + const IndexType inz = inz_blockstart_row + jb; + if (cm) { + ASSERT_EQ(col, cm->get_const_col_idxs()[inz]); + ASSERT_EQ(fbvals(ibnz, ib, jb), + cm->get_const_values()[inz]); + } + if (md) { + ASSERT_EQ(row, md->nonzeros[inz].row); + ASSERT_EQ(col, md->nonzeros[inz].column); + ASSERT_EQ(fbvals(ibnz, ib, jb), + md->nonzeros[inz].value); + } + if (row == col && diam) { + ASSERT_EQ(fbvals(ibnz, ib, jb), + diam->get_const_values()[row]); + } + } + } + } + } +} + + +template +void check_sample_generator_common(const SampleGenerator sg) +{ + auto fbmtx = sg.generate_fbcsr(); + ASSERT_EQ(fbmtx->get_num_block_rows(), sg.nbrows); + ASSERT_EQ(fbmtx->get_num_block_cols(), sg.nbcols); + ASSERT_EQ(fbmtx->get_size()[0], sg.nrows); + ASSERT_EQ(fbmtx->get_size()[1], sg.ncols); + ASSERT_EQ(fbmtx->get_num_stored_blocks(), sg.nbnz); + ASSERT_EQ(fbmtx->get_num_stored_elements(), sg.nnz); +} + + +template +class FbcsrSample : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + + FbcsrSample() : ref(gko::ReferenceExecutor::create()) {} + + std::shared_ptr ref; +}; + + +TYPED_TEST_SUITE(FbcsrSample, gko::test::ValueIndexTypes); + + +TYPED_TEST(FbcsrSample, SampleGeneratorsAreCorrect) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using Mtx = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using MtxData = gko::matrix_data; + using Diag = gko::matrix::Diagonal; + auto ref = this->ref; + gko::testing::FbcsrSample fbsample(ref); + gko::testing::FbcsrSample2 fbsample2(ref); + + std::unique_ptr fbmtx = fbsample.generate_fbcsr(); + std::unique_ptr csmtx = fbsample.generate_csr(); + const MtxData mdata = fbsample.generate_matrix_data_with_explicit_zeros(); + std::unique_ptr fbmtx2 = fbsample2.generate_fbcsr(); + std::unique_ptr csmtx2 = fbsample2.generate_csr(); + std::unique_ptr diag2 = fbsample2.extract_diagonal(); + const gko::Array nnzperrow = fbsample2.getNonzerosPerRow(); + + check_sample_generator_common(fbsample); + assert_matrices_are_same(fbmtx.get(), csmtx.get(), + static_cast(nullptr), &mdata); + check_sample_generator_common(fbsample2); + assert_matrices_are_same(fbmtx2.get(), csmtx2.get(), diag2.get()); + for (index_type irow = 0; irow < fbsample2.nrows; irow++) { + const index_type *const row_ptrs = csmtx2->get_const_row_ptrs(); + const index_type num_nnz_row = row_ptrs[irow + 1] - row_ptrs[irow]; + ASSERT_EQ(nnzperrow.get_const_data()[irow], num_nnz_row); + for (index_type iz = row_ptrs[irow]; iz < row_ptrs[irow + 1]; iz++) { + const index_type col = csmtx2->get_const_col_idxs()[iz]; + if (irow == col) { + ASSERT_EQ(csmtx2->get_const_values()[iz], + diag2->get_const_values()[irow]); + } + } + } + check_sample_generator_common( + gko::testing::FbcsrSampleUnsorted(ref)); + check_sample_generator_common( + gko::testing::FbcsrSampleSquare(ref)); +} + + +template +class FbcsrSampleComplex : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + + FbcsrSampleComplex() : ref(gko::ReferenceExecutor::create()) {} + + std::shared_ptr ref; +}; + + +TYPED_TEST_SUITE(FbcsrSampleComplex, gko::test::ComplexValueIndexTypes); + + +TYPED_TEST(FbcsrSampleComplex, ComplexSampleGeneratorIsCorrect) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using Mtx = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + auto ref = this->ref; + gko::testing::FbcsrSampleComplex fbsample3(ref); + + std::unique_ptr fbmtx3 = fbsample3.generate_fbcsr(); + std::unique_ptr csmtx3 = fbsample3.generate_csr(); + + check_sample_generator_common(fbsample3); + assert_matrices_are_same(fbmtx3.get(), csmtx3.get()); +} + + template class Fbcsr : public ::testing::Test { protected: @@ -56,11 +242,12 @@ class Fbcsr : public ::testing::Test { using index_type = typename std::tuple_element<1, decltype(ValueIndexType())>::type; using Mtx = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using MtxData = gko::matrix_data; Fbcsr() : exec(gko::ReferenceExecutor::create()), - fbsample( - std::static_pointer_cast(exec)), + fbsample(exec), mtx(fbsample.generate_fbcsr()) { // backup for move tests @@ -76,7 +263,7 @@ class Fbcsr : public ::testing::Test { std::copy(v, v + fbsample.nnz, orig_vals.data()); } - std::shared_ptr exec; + std::shared_ptr exec; const gko::testing::FbcsrSample fbsample; std::unique_ptr mtx; @@ -91,7 +278,7 @@ class Fbcsr : public ::testing::Test { auto c = m->get_const_col_idxs(); auto r = m->get_const_row_ptrs(); - const int bs = 3; + const int bs = fbsample.bs; ASSERT_EQ(m->get_size(), orig_size); ASSERT_EQ(m->get_num_stored_elements(), orig_vals.size()); @@ -99,7 +286,6 @@ class Fbcsr : public ::testing::Test { ASSERT_EQ(m->get_num_block_rows(), m->get_size()[0] / bs); ASSERT_EQ(m->get_num_block_cols(), m->get_size()[1] / bs); - for (index_type irow = 0; irow < orig_size[0] / bs; irow++) { const index_type *const rowptr = &orig_rowptrs[0]; ASSERT_EQ(r[irow], rowptr[irow]); @@ -129,54 +315,24 @@ class Fbcsr : public ::testing::Test { TYPED_TEST_SUITE(Fbcsr, gko::test::ValueIndexTypes); -TYPED_TEST(Fbcsr, SampleGeneratorIsCorrect) +TYPED_TEST(Fbcsr, GetNumBlocksCorrectlyThrows) { - using Mtx = typename TestFixture::Mtx; - using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - using Csr = gko::matrix::Csr; + const index_type vec_sz = 47; + const int blk_sz = 9; - std::unique_ptr fbmtx = this->fbsample.generate_fbcsr(); - std::unique_ptr csmtx = this->fbsample.generate_csr(); - const int bs = this->fbsample.bs; - ASSERT_EQ(bs, fbmtx->get_block_size()); - - gko::blockutils::DenseBlocksView fbvals( - fbmtx->get_const_values(), bs, bs); - - for (index_type ibrow = 0; ibrow < this->fbsample.nbrows; ibrow++) { - const index_type *const browptr = fbmtx->get_const_row_ptrs(); - const index_type numblocksbrow = browptr[ibrow + 1] - browptr[ibrow]; - for (index_type irow = ibrow * bs; irow < ibrow * bs + bs; irow++) { - const index_type rowstart = - browptr[ibrow] * bs * bs + - (irow - ibrow * bs) * numblocksbrow * bs; - ASSERT_EQ(csmtx->get_const_row_ptrs()[irow], rowstart); - } - - const index_type *const bcolinds = fbmtx->get_const_col_idxs(); + ASSERT_THROW(gko::matrix::detail::get_num_blocks(blk_sz, vec_sz), + gko::BlockSizeError); +} - for (index_type ibnz = browptr[ibrow]; ibnz < browptr[ibrow + 1]; - ibnz++) { - const index_type bcol = bcolinds[ibnz]; - const index_type blkoffset_frombrowstart = ibnz - browptr[ibrow]; - for (int ib = 0; ib < bs; ib++) { - const index_type row = ibrow * bs + ib; - const index_type inz_rowstart = - csmtx->get_const_row_ptrs()[row] + - blkoffset_frombrowstart * bs; +TYPED_TEST(Fbcsr, GetNumBlocksWorks) +{ + using index_type = typename TestFixture::index_type; + const index_type vec_sz = 45; + const int blk_sz = 9; - for (int jb = 0; jb < bs; jb++) { - const index_type col = bcol * bs + jb; - const index_type inz = inz_rowstart + jb; - ASSERT_EQ(col, csmtx->get_const_col_idxs()[inz]); - ASSERT_EQ(fbvals(ibnz, ib, jb), - csmtx->get_const_values()[inz]); - } - } - } - } + ASSERT_EQ(gko::matrix::detail::get_num_blocks(blk_sz, vec_sz), 5); } @@ -220,7 +376,6 @@ TYPED_TEST(Fbcsr, CanBeCreatedFromExistingData) using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; using size_type = gko::size_type; - const int bs = this->fbsample.bs; const size_type nbrows = this->fbsample.nbrows; const size_type nbcols = this->fbsample.nbcols; @@ -269,6 +424,7 @@ TYPED_TEST(Fbcsr, CanBeMoved) TYPED_TEST(Fbcsr, CanBeCloned) { using Mtx = typename TestFixture::Mtx; + auto clone = this->mtx->clone(); this->assert_equal_to_original_mtx(this->mtx.get()); @@ -301,11 +457,11 @@ TYPED_TEST(Fbcsr, GeneratesCorrectMatrixData) { using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - gko::matrix_data refdata = - this->fbsample.generate_matrix_data_with_explicit_zeros(); + using MtxData = typename TestFixture::MtxData; + MtxData refdata = this->fbsample.generate_matrix_data_with_explicit_zeros(); refdata.ensure_row_major_order(); - gko::matrix_data data; + MtxData data; this->mtx->write(data); data.ensure_row_major_order(); @@ -322,17 +478,15 @@ TYPED_TEST(Fbcsr, DenseBlocksViewWorksCorrectly) using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; using Dbv = gko::blockutils::DenseBlocksView; - const gko::testing::FbcsrSample2 fbsample( - std::static_pointer_cast(this->exec)); - - auto refmtx = fbsample.generate_fbcsr(); - const Dbv testdbv(refmtx->get_values(), fbsample.bs, fbsample.bs); - + this->exec); std::vector ref_dbv_array(fbsample.nnz); Dbv refdbv(ref_dbv_array.data(), fbsample.bs, fbsample.bs); fbsample.fill_value_blocks_view(refdbv); + auto refmtx = fbsample.generate_fbcsr(); + const Dbv testdbv(refmtx->get_values(), fbsample.bs, fbsample.bs); + for (index_type ibz = 0; ibz < fbsample.nbnz; ibz++) { for (int i = 0; i < fbsample.bs; ++i) { for (int j = 0; j < fbsample.bs; ++j) { diff --git a/cuda/test/matrix/CMakeLists.txt b/cuda/test/matrix/CMakeLists.txt index 65ce218ac71..a1d7ca4a7a0 100644 --- a/cuda/test/matrix/CMakeLists.txt +++ b/cuda/test/matrix/CMakeLists.txt @@ -1,5 +1,6 @@ ginkgo_create_test(coo_kernels) ginkgo_create_test(csr_kernels) +ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) diff --git a/cuda/test/matrix/fbcsr_kernels.cpp b/cuda/test/matrix/fbcsr_kernels.cpp new file mode 100644 index 00000000000..79b455cb21c --- /dev/null +++ b/cuda/test/matrix/fbcsr_kernels.cpp @@ -0,0 +1,93 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +//#include +#include + + +#include "core/test/matrix/fbcsr_sample.hpp" +#include "cuda/test/utils.hpp" + + +namespace { + + +class Fbcsr : public ::testing::Test { +protected: + using Mtx = gko::matrix::Fbcsr<>; + + void SetUp() + { + ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); + ref = gko::ReferenceExecutor::create(); + cuda = gko::CudaExecutor::create(0, ref); + } + + void TearDown() + { + if (cuda != nullptr) { + ASSERT_NO_THROW(cuda->synchronize()); + } + } + + std::shared_ptr ref; + std::shared_ptr cuda; + + std::unique_ptr mtx; +}; + + +TEST_F(Fbcsr, CanWriteFromMatrixOnDevice) +{ + using value_type = Mtx::value_type; + using index_type = Mtx::index_type; + using MatData = gko::matrix_data; + gko::testing::FbcsrSample sample(ref); + auto refmat = sample.generate_fbcsr(); + auto cudamat = Mtx::create(cuda); + cudamat->copy_from(gko::lend(refmat)); + + MatData refdata, cudadata; + refmat->write(refdata); + cudamat->write(cudadata); + + ASSERT_TRUE(refdata.nonzeros == cudadata.nonzeros); +} + + +} // namespace diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp index e6e4076164c..7556602f13b 100644 --- a/include/ginkgo/core/base/exception.hpp +++ b/include/ginkgo/core/base/exception.hpp @@ -424,7 +424,7 @@ class BadDimension : public Error { /** * Error that denotes issues between block sizes and matrix dimensions * - * \tparam IntexType Type of index used by the linear algebra object that is + * \tparam IndexType Type of index used by the linear algebra object that is * incompatible with the requried block size. */ template @@ -439,7 +439,7 @@ class BlockSizeError : public Error { BlockSizeError(const std::string &file, const int line, const int block_size, const IndexType size) : Error(file, line, - " block size = " + std::to_string(block_size) + + "block size = " + std::to_string(block_size) + ", size = " + std::to_string(size)) {} }; From 688311c0740ba5475b3e98c201ecef1f152bc47b Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Tue, 26 Jan 2021 00:52:02 +0100 Subject: [PATCH 46/58] Addressed points raised in review Co-authored-by: Terry Cojean --- core/matrix/fbcsr.cpp | 21 +++------------ include/ginkgo/core/matrix/fbcsr.hpp | 9 ++++--- reference/matrix/fbcsr_kernels.cpp | 34 +++++++++++++++---------- reference/test/matrix/fbcsr_kernels.cpp | 27 ++++---------------- 4 files changed, 36 insertions(+), 55 deletions(-) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index b9cc0d8d7ae..99e6c9e8914 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -155,7 +155,7 @@ void Fbcsr::apply_impl(const LinOp *const b, using Dense = Dense; if (auto b_fbcsr = dynamic_cast *>(b)) { // if b is a FBCSR matrix, we need an SpGeMM - throw NotImplemented(__FILE__, __LINE__, "SpGeMM for Fbcsr"); + GKO_NOT_SUPPORTED(b_fbcsr); } else { // otherwise we assume that b is dense and compute a SpMV/SpMM this->get_executor()->run( @@ -173,10 +173,10 @@ void Fbcsr::apply_impl(const LinOp *const alpha, using Dense = Dense; if (auto b_fbcsr = dynamic_cast *>(b)) { // if b is a FBCSR matrix, we need an SpGeMM - throw NotImplemented(__FILE__, __LINE__, "Adv SpGeMM for Fbcsr"); - } else if (dynamic_cast *>(b)) { + GKO_NOT_SUPPORTED(b_fbcsr); + } else if (auto b_ident = dynamic_cast *>(b)) { // if b is an identity matrix, we need an SpGEAM - throw NotImplemented(__FILE__, __LINE__, "Adv SpGEAM for Fbcsr"); + GKO_NOT_SUPPORTED(b_ident); } else { // otherwise we assume that b is dense and compute a SpMV/SpMM this->get_executor()->run( @@ -186,19 +186,6 @@ void Fbcsr::apply_impl(const LinOp *const alpha, } -template -void Fbcsr::convert_to( - Fbcsr *const result) const -{ - result->values_ = this->values_; - result->col_idxs_ = this->col_idxs_; - result->row_ptrs_ = this->row_ptrs_; - result->set_size(this->get_size()); - result->bs_ = this->bs_; - result->nbcols_ = this->nbcols_; -} - - template void Fbcsr::convert_to( Fbcsr, IndexType> *const result) const diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index ba912d1e034..16a7dd6b726 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -152,9 +152,6 @@ class Fbcsr : public EnableLinOp>, using mat_data = matrix_data; using absolute_type = remove_complex; - - void convert_to(Fbcsr *result) const override; - /** * For moving to another Fbcsr of the same type, use the default * implementation provided by EnableLinOp via the @@ -162,6 +159,12 @@ class Fbcsr : public EnableLinOp>, */ using EnableLinOp>::move_to; + /** + * For converting (copying) to another Fbcsr of the same type, + * use the default implementation provided by EnableLinOp. + */ + using EnableLinOp>::convert_to; + friend class Fbcsr, IndexType>; void convert_to( diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 39ddaa91116..da7877c2363 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -81,9 +81,9 @@ void spmv(const std::shared_ptr, for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { for (IndexType i = ibrow * bs * nvecs; i < (ibrow + 1) * bs * nvecs; - ++i) + ++i) { c->get_values()[i] = zero(); - + } for (IndexType inz = row_ptrs[ibrow]; inz < row_ptrs[ibrow + 1]; ++inz) { for (int ib = 0; ib < bs; ib++) { @@ -91,8 +91,9 @@ void spmv(const std::shared_ptr, for (int jb = 0; jb < bs; jb++) { const auto val = avalues(inz, ib, jb); const auto col = col_idxs[inz] * bs + jb; - for (size_type j = 0; j < nvecs; ++j) + for (size_type j = 0; j < nvecs; ++j) { c->at(row, j) += val * b->at(col, j); + } } } } @@ -162,18 +163,21 @@ void convert_to_dense(const std::shared_ptr, for (IndexType brow = 0; brow < nbrows; ++brow) { for (size_type bcol = 0; bcol < nbcols; ++bcol) { - for (int ib = 0; ib < bs; ib++) - for (int jb = 0; jb < bs; jb++) + for (int ib = 0; ib < bs; ib++) { + for (int jb = 0; jb < bs; jb++) { result->at(brow * bs + ib, bcol * bs + jb) = zero(); + } + } } for (IndexType ibnz = row_ptrs[brow]; ibnz < row_ptrs[brow + 1]; ++ibnz) { for (int ib = 0; ib < bs; ib++) { const IndexType row = brow * bs + ib; - for (int jb = 0; jb < bs; jb++) + for (int jb = 0; jb < bs; jb++) { result->at(row, col_idxs[ibnz] * bs + jb) = values(ibnz, ib, jb); + } } } } @@ -258,11 +262,13 @@ void convert_fbcsr_to_fbcsc(const IndexType num_blk_rows, const int blksz, const auto dest_idx = col_ptrs[col_idxs[i]]; col_ptrs[col_idxs[i]]++; row_idxs[dest_idx] = brow; - for (int ib = 0; ib < blksz; ib++) - for (int jb = 0; jb < blksz; jb++) + for (int ib = 0; ib < blksz; ib++) { + for (int jb = 0; jb < blksz; jb++) { cvalues(dest_idx, ib, jb) = op(transpose_blocks ? rvalues(i, jb, ib) : rvalues(i, ib, jb)); + } + } } } } @@ -425,14 +431,15 @@ void sort_by_column_index(const std::shared_ptr exec, matrix::Fbcsr *const to_sort) { const int bs = to_sort->get_block_size(); - if (bs == 2) + if (bs == 2) { sort_by_column_index_impl<2>(to_sort); - else if (bs == 3) + } else if (bs == 3) { sort_by_column_index_impl<3>(to_sort); - else if (bs == 4) + } else if (bs == 4) { sort_by_column_index_impl<4>(to_sort); - else + } else { GKO_NOT_IMPLEMENTED; + } } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -460,8 +467,9 @@ void extract_diagonal(std::shared_ptr, for (IndexType idx = row_ptrs[ibrow]; idx < row_ptrs[ibrow + 1]; ++idx) { if (col_idxs[idx] == ibrow) { - for (int ib = 0; ib < bs; ib++) + for (int ib = 0; ib < bs; ib++) { diag_values[ibrow * bs + ib] = vblocks(idx, ib, ib); + } break; } } diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 341ed67d690..2ed4e2be557 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -159,7 +159,6 @@ TYPED_TEST(Fbcsr, AppliesToDenseVector) using Vec = typename TestFixture::Vec; using T = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - const index_type nrows = this->mtx2->get_size()[0]; const index_type ncols = this->mtx2->get_size()[1]; auto x = Vec::create(this->exec, gko::dim<2>{(gko::size_type)ncols, 1}); @@ -168,13 +167,12 @@ TYPED_TEST(Fbcsr, AppliesToDenseVector) xvals[i] = std::sin(static_cast(static_cast((i + 1) ^ 2))); auto y = Vec::create(this->exec, gko::dim<2>{(gko::size_type)nrows, 1}); auto yref = Vec::create(this->exec, gko::dim<2>{(gko::size_type)nrows, 1}); - - this->mtx2->apply(x.get(), y.get()); - using Csr = typename TestFixture::Csr; auto csr_mtx = Csr::create(this->mtx->get_executor(), std::make_shared()); this->mtx2->convert_to(csr_mtx.get()); + + this->mtx2->apply(x.get(), y.get()); csr_mtx->apply(x.get(), yref.get()); const double tolerance = @@ -188,12 +186,10 @@ TYPED_TEST(Fbcsr, AppliesToDenseMatrix) using Vec = typename TestFixture::Vec; using T = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - const gko::size_type nrows = this->mtx2->get_size()[0]; const gko::size_type ncols = this->mtx2->get_size()[1]; const gko::size_type nvecs = 3; auto x = Vec::create(this->exec, gko::dim<2>{ncols, nvecs}); - for (index_type i = 0; i < ncols; i++) for (index_type j = 0; j < nvecs; j++) x->at(i, j) = (static_cast(3.0 * i) + get_some_number()) / @@ -202,7 +198,6 @@ TYPED_TEST(Fbcsr, AppliesToDenseMatrix) auto yref = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); this->mtx2->apply(x.get(), y.get()); - this->ref2csrmtx->apply(x.get(), yref.get()); const double tolerance = @@ -216,17 +211,14 @@ TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseVector) using Vec = typename TestFixture::Vec; using T = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - const T alphav = -1.0; const T betav = 2.0; auto alpha = gko::initialize({alphav}, this->exec); auto beta = gko::initialize({betav}, this->exec); - const gko::size_type nrows = this->mtx2->get_size()[0]; const gko::size_type ncols = this->mtx2->get_size()[1]; auto x = Vec::create(this->exec, gko::dim<2>{ncols, 1}); auto y = Vec::create(this->exec, gko::dim<2>{nrows, 1}); - for (index_type i = 0; i < ncols; i++) { x->at(i, 0) = (i + 1.0) * (i + 1.0); } @@ -234,11 +226,9 @@ TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseVector) y->at(i, 0) = static_cast(std::sin(2 * 3.14 * (i + 0.1) / nrows)) + get_some_number(); } - auto yref = y->clone(); this->mtx2->apply(alpha.get(), x.get(), beta.get(), y.get()); - this->ref2csrmtx->apply(alpha.get(), x.get(), beta.get(), yref.get()); const double tolerance = @@ -252,18 +242,15 @@ TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseMatrix) using Vec = typename TestFixture::Vec; using T = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - const T alphav = -1.0; const T betav = 2.0; auto alpha = gko::initialize({alphav}, this->exec); auto beta = gko::initialize({betav}, this->exec); - const gko::size_type nrows = this->mtx2->get_size()[0]; const gko::size_type ncols = this->mtx2->get_size()[1]; const gko::size_type nvecs = 3; auto x = Vec::create(this->exec, gko::dim<2>{ncols, nvecs}); auto y = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); - for (index_type i = 0; i < ncols; i++) for (index_type j = 0; j < nvecs; j++) { x->at(i, j) = @@ -275,11 +262,9 @@ TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseMatrix) static_cast(std::sin(2 * 3.14 * (i + j + 0.1) / nrows)) + get_some_number(); } - auto yref = y->clone(); this->mtx2->apply(alpha.get(), x.get(), beta.get(), y.get()); - this->ref2csrmtx->apply(alpha.get(), x.get(), beta.get(), yref.get()); const double tolerance = @@ -543,7 +528,6 @@ TYPED_TEST(Fbcsr, CalculatesNonzerosPerRow) gko::kernels::reference::fbcsr::calculate_nonzeros_per_row( this->exec, this->mtx2.get(), &row_nnz); - gko::Array refrnnz(this->exec, this->mtx2->get_size()[0]); gko::kernels::reference::csr ::calculate_nonzeros_per_row( this->exec, this->ref2csrmtx.get(), &refrnnz); @@ -562,7 +546,6 @@ TYPED_TEST(Fbcsr, CalculatesMaxNnzPerRow) gko::kernels::reference::fbcsr::calculate_max_nnz_per_row( this->exec, this->mtx2.get(), &max_row_nnz); - gko::size_type ref_max_row_nnz{}; gko::kernels::reference::csr::calculate_max_nnz_per_row( this->exec, this->ref2csrmtx.get(), &ref_max_row_nnz); @@ -578,9 +561,9 @@ TYPED_TEST(Fbcsr, SquareMtxIsTransposable) auto csrmtxsq = Csr::create(this->exec, std::make_shared()); this->mtxsq->convert_to(csrmtxsq.get()); + std::unique_ptr reftmtx = csrmtxsq->transpose(); auto reftmtx_as_csr = static_cast(reftmtx.get()); - auto trans = this->mtxsq->transpose(); auto trans_as_fbcsr = static_cast(trans.get()); @@ -595,9 +578,9 @@ TYPED_TEST(Fbcsr, NonSquareMtxIsTransposable) auto csrmtx = Csr::create(this->exec, std::make_shared()); this->mtx2->convert_to(csrmtx.get()); + std::unique_ptr reftmtx = csrmtx->transpose(); auto reftmtx_as_csr = static_cast(reftmtx.get()); - auto trans = this->mtx2->transpose(); auto trans_as_fbcsr = static_cast(trans.get()); @@ -680,9 +663,9 @@ TYPED_TEST(Fbcsr, OutplaceAbsolute) using AbsCsr = typename gko::remove_complex; using AbsMtx = typename gko::remove_complex; auto mtx = this->fbsample2.generate_fbcsr(); + const std::unique_ptr refabs = this->ref2csrmtx->compute_absolute(); - auto abs_mtx = mtx->compute_absolute(); const gko::remove_complex tolerance = From aebb4333482a4783abb28154923438a1b7253f1c Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Tue, 26 Jan 2021 12:28:19 +0100 Subject: [PATCH 47/58] small dense blocks are now column-major --- core/components/fixed_block.hpp | 6 +++--- core/matrix/fbcsr.cpp | 4 ++-- core/test/matrix/fbcsr_sample.hpp | 26 ++++++++++++------------- reference/test/matrix/fbcsr_kernels.cpp | 15 +++++++++----- 4 files changed, 28 insertions(+), 23 deletions(-) diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp index 889cb2145a2..f6fed3c6a6b 100644 --- a/core/components/fixed_block.hpp +++ b/core/components/fixed_block.hpp @@ -46,7 +46,7 @@ namespace blockutils { * * Accessing BSR values using this type of view abstracts away the * storage layout within the individual blocks, as long as all blocks use the - * same layout. For now, row-major blocks are assumed. + * same layout. For now, column-major blocks are assumed. * * @tparam ValueType The numeric type of entries of the block * @tparam IndexType The type of integer used to identify the different blocks @@ -70,13 +70,13 @@ class DenseBlocksView final { value_type &at(const index_type block, const int row, const int col) { - return vals_[block * nrows * ncols + row * ncols + col]; + return vals_[block * nrows * ncols + row + col * nrows]; } const value_type &at(const index_type block, const int row, const int col) const { - return vals_[block * nrows * ncols + row * ncols + col]; + return vals_[block * nrows * ncols + row + col * nrows]; } value_type &operator()(const index_type block, const int row, const int col) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 99e6c9e8914..38f2766729d 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -106,12 +106,12 @@ class DenseBlock final { value_type &at(const int row, const int col) { - return vals_[row * ncols_ + col]; + return vals_[row + col * nrows_]; } const value_type &at(const int row, const int col) const { - return vals_[row * ncols_ + col]; + return vals_[row + col * nrows_]; } value_type &operator()(const int row, const int col) diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 4f6bcc864f2..107532a44a9 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -136,8 +136,8 @@ class FbcsrSample { vals(0, 2, 2) = gko::zero(); vals(3, 0, 0) = gko::zero(); - v[34] += FBCSR_TEST_IMAGINARY; - v[35] += FBCSR_TEST_IMAGINARY; + vals(3, 2, 1) += FBCSR_TEST_IMAGINARY; + vals(3, 2, 2) += FBCSR_TEST_IMAGINARY; return mtx; } @@ -302,14 +302,14 @@ class FbcsrSample2 { for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; v[0] = 1; - v[1] = 2; - v[2] = 3; + v[1] = 3; + v[2] = 2; v[3] = 0; - v[10] = 0; + v[9] = 0; v[11] = 0; v[12] = -12; - v[13] = -1; - v[14] = -2; + v[13] = -2; + v[14] = -1; v[15] = -11; return Fbcsr::create(exec, @@ -385,7 +385,7 @@ class FbcsrSample2 { }; /** - * @brief Generates the a sample block CSR square matrix and its transpose + * @brief Generates the a sample block CSR square matrix * * This currently a 4 x 4 matrix with 2x2 blocks. */ @@ -467,14 +467,14 @@ class FbcsrSampleComplex { using namespace std::complex_literals; v[0] = 1.0 + 1.15i; - v[1] = 2.0 + 2.15i; - v[2] = 3.0 - 3.15i; + v[2] = 2.0 + 2.15i; + v[1] = 3.0 - 3.15i; v[3] = 0.0 - 0.15i; - v[10] = 0.0; + v[9] = 0.0; v[11] = 0.0; v[12] = -12.0 + 12.15i; - v[13] = -1.0 + 1.15i; - v[14] = -2.0 - 2.15i; + v[14] = -1.0 + 1.15i; + v[13] = -2.0 - 2.15i; v[15] = -11.0 - 11.15i; return Fbcsr::create(exec, diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 2ed4e2be557..96f376fa68c 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -163,8 +163,9 @@ TYPED_TEST(Fbcsr, AppliesToDenseVector) const index_type ncols = this->mtx2->get_size()[1]; auto x = Vec::create(this->exec, gko::dim<2>{(gko::size_type)ncols, 1}); T *const xvals = x->get_values(); - for (index_type i = 0; i < ncols; i++) + for (index_type i = 0; i < ncols; i++) { xvals[i] = std::sin(static_cast(static_cast((i + 1) ^ 2))); + } auto y = Vec::create(this->exec, gko::dim<2>{(gko::size_type)nrows, 1}); auto yref = Vec::create(this->exec, gko::dim<2>{(gko::size_type)nrows, 1}); using Csr = typename TestFixture::Csr; @@ -190,10 +191,12 @@ TYPED_TEST(Fbcsr, AppliesToDenseMatrix) const gko::size_type ncols = this->mtx2->get_size()[1]; const gko::size_type nvecs = 3; auto x = Vec::create(this->exec, gko::dim<2>{ncols, nvecs}); - for (index_type i = 0; i < ncols; i++) - for (index_type j = 0; j < nvecs; j++) + for (index_type i = 0; i < ncols; i++) { + for (index_type j = 0; j < nvecs; j++) { x->at(i, j) = (static_cast(3.0 * i) + get_some_number()) / static_cast(j + 1.0); + } + } auto y = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); auto yref = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); @@ -251,17 +254,19 @@ TYPED_TEST(Fbcsr, AppliesLinearCombinationToDenseMatrix) const gko::size_type nvecs = 3; auto x = Vec::create(this->exec, gko::dim<2>{ncols, nvecs}); auto y = Vec::create(this->exec, gko::dim<2>{nrows, nvecs}); - for (index_type i = 0; i < ncols; i++) + for (index_type i = 0; i < ncols; i++) { for (index_type j = 0; j < nvecs; j++) { x->at(i, j) = std::log(static_cast(0.1 + static_cast((i + 1) ^ 2))); } - for (index_type i = 0; i < nrows; i++) + } + for (index_type i = 0; i < nrows; i++) { for (index_type j = 0; j < nvecs; j++) { y->at(i, j) = static_cast(std::sin(2 * 3.14 * (i + j + 0.1) / nrows)) + get_some_number(); } + } auto yref = y->clone(); this->mtx2->apply(alpha.get(), x.get(), beta.get(), y.get()); From a50178766cdb894270748540d8f665a7dd44f082 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 27 Jan 2021 12:56:32 +0100 Subject: [PATCH 48/58] New exception helper for divisibility by block size Also in fbcsr.hpp, detail::get_num_blocks is now declared inline. --- .../ginkgo/core/base/exception_helpers.hpp | 20 +++++++++++++++++++ include/ginkgo/core/matrix/fbcsr.hpp | 6 ++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp index a10f05896fe..7a34a0835a0 100644 --- a/include/ginkgo/core/base/exception_helpers.hpp +++ b/include/ginkgo/core/base/exception_helpers.hpp @@ -567,6 +567,26 @@ inline T ensure_allocated_impl(T ptr, const std::string &file, int line, "semi-colon warnings") +/** + * Ensures that a given size, typically of a linear algebraic object, + * is divisible by a given block size. + * + * @param _size A size of a vector or matrix + * @param _block_size Size of small dense blocks that make up + * the vector or matrix + * + * @throw BlockSizeError if _block_size does not divide _size + */ +#define GKO_ASSERT_BLOCK_SIZE_CONFORMANT(_size, _block_size) \ + if (_size % _block_size != 0) { \ + throw BlockSizeError(__FILE__, __LINE__, _block_size, \ + _size); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + + } // namespace gko diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 16a7dd6b726..38cac854bc5 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -73,11 +73,9 @@ namespace detail { * @throw BlockSizeError when block_size does not divide the total size. */ template -IndexType get_num_blocks(const int block_size, const IndexType size) +inline IndexType get_num_blocks(const int block_size, const IndexType size) { - if (size % block_size != 0) { - throw BlockSizeError(__FILE__, __LINE__, block_size, size); - } + GKO_ASSERT_BLOCK_SIZE_CONFORMANT(size, block_size); return size / block_size; } From 8ced1e9512f39ad8a568b79e7907be245d2739e5 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 27 Jan 2021 17:34:50 +0100 Subject: [PATCH 49/58] fixed get_num_blocks test --- core/test/matrix/fbcsr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index c4a7e3c3420..f5765bd055a 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -322,7 +322,7 @@ TYPED_TEST(Fbcsr, GetNumBlocksCorrectlyThrows) const int blk_sz = 9; ASSERT_THROW(gko::matrix::detail::get_num_blocks(blk_sz, vec_sz), - gko::BlockSizeError); + gko::BlockSizeError); } From 6bff7cf58870da0381f1a5d4c7954f520ae8ddfd Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 3 Feb 2021 16:07:45 +0100 Subject: [PATCH 50/58] Fbcsr write now writes in row-major order Co-authored-by: Tobias Ribizel --- core/matrix/fbcsr.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 38f2766729d..6ed3f22eac0 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -393,9 +393,9 @@ void Fbcsr::write(mat_data &data) const const auto start = tmp->row_ptrs_.get_const_data()[brow]; const auto end = tmp->row_ptrs_.get_const_data()[brow + 1]; - for (auto inz = start; inz < end; ++inz) { - for (int ib = 0; ib < bs_; ib++) { - const auto row = brow * bs_ + ib; + for (int ib = 0; ib < bs_; ib++) { + const auto row = brow * bs_ + ib; + for (auto inz = start; inz < end; ++inz) { for (int jb = 0; jb < bs_; jb++) { const auto col = tmp->col_idxs_.get_const_data()[inz] * bs_ + jb; From e370d3601a699c5a5e34c98adb6829fface2faaa Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 3 Feb 2021 18:07:03 +0100 Subject: [PATCH 51/58] changed macros to variables in fbcsr sample --- core/test/matrix/fbcsr_sample.hpp | 40 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 107532a44a9..d89dfa4839e 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -46,16 +46,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/test/utils.hpp" -#define FBCSR_TEST_OFFSET 0.000011118888 -#define FBCSR_TEST_C_MAG 0.1 + FBCSR_TEST_OFFSET -#define FBCSR_TEST_IMAGINARY \ - sct(std::complex>(0, FBCSR_TEST_C_MAG)) - - namespace gko { namespace testing { +constexpr auto fbcsr_test_offset = 0.000011118888; + + /** Generates the same sample block CSR matrix in different formats * * This currently a 6 x 12 matrix with 3x3 blocks. @@ -136,8 +133,8 @@ class FbcsrSample { vals(0, 2, 2) = gko::zero(); vals(3, 0, 0) = gko::zero(); - vals(3, 2, 1) += FBCSR_TEST_IMAGINARY; - vals(3, 2, 2) += FBCSR_TEST_IMAGINARY; + vals(3, 2, 1) += fbcsr_test_imaginary; + vals(3, 2, 2) += fbcsr_test_imaginary; return mtx; } @@ -158,8 +155,8 @@ class FbcsrSample { {2, 3, 4, 4, 5, 6, 5, 6, 7, 7, 8, 9, 0, 9, 0, 10, 11, 12, 2, 3, 4, 0, 7, 8, 5, 6, 7, 9, 10, 11, 8, 9, 10, 12, - sct(13.0) + FBCSR_TEST_IMAGINARY, - sct(14.0) + FBCSR_TEST_IMAGINARY}); + sct(13.0) + fbcsr_test_imaginary, + sct(14.0) + fbcsr_test_imaginary}); // clang-format on return Csr::create(exec, gko::dim<2>{nrows, ncols}, csrvals, csrcols, csrrow); @@ -223,8 +220,8 @@ class FbcsrSample { {4, 7, 10.0}, {4, 8, 11.0}, {5, 6, 12.0}, - {5, 7, sct(13.0) + FBCSR_TEST_IMAGINARY}, - {5, 8, sct(14.0) + FBCSR_TEST_IMAGINARY}}}); + {5, 7, sct(13.0) + fbcsr_test_imaginary}, + {5, 8, sct(14.0) + fbcsr_test_imaginary}}}); } /** @@ -261,6 +258,9 @@ class FbcsrSample { { return static_cast(cu.real()); } + + const ValueType fbcsr_test_imaginary = sct( + std::complex>(0, 0.1 + fbcsr_test_offset)); }; /** @@ -299,7 +299,7 @@ class FbcsrSample2 { gko::Array c(exec, {0, 0, 3, 2}); gko::Array vals(exec, nnz); value_type *const v = vals.get_data(); - for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; v[0] = 1; v[1] = 3; @@ -325,7 +325,7 @@ class FbcsrSample2 { exec, {0, 1, 0, 1, 0, 1, 6, 7, 0, 1, 6, 7, 4, 5, 4, 5}); gko::Array vals(exec, nnz); value_type *const v = vals.get_data(); - for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; v[0] = 1; v[1] = 2; v[2] = 3; @@ -363,9 +363,9 @@ class FbcsrSample2 { dbv(0, 1, 0) = 3.0; dbv(0, 1, 1) = 0.0; for (int i = 0; i < 2; ++i) - for (int j = 0; j < 2; ++j) dbv(1, i, j) = 0.15 + FBCSR_TEST_OFFSET; - dbv(2, 0, 0) = 0.15 + FBCSR_TEST_OFFSET; - dbv(2, 0, 1) = 0.15 + FBCSR_TEST_OFFSET; + for (int j = 0; j < 2; ++j) dbv(1, i, j) = 0.15 + fbcsr_test_offset; + dbv(2, 0, 0) = 0.15 + fbcsr_test_offset; + dbv(2, 0, 1) = 0.15 + fbcsr_test_offset; dbv(2, 1, 0) = 0.0; dbv(2, 1, 1) = 0.0; dbv(3, 0, 0) = -12.0; @@ -463,7 +463,7 @@ class FbcsrSampleComplex { gko::Array c(exec, {0, 0, 3, 2}); gko::Array vals(exec, nnz); value_type *const v = vals.get_data(); - for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; using namespace std::complex_literals; v[0] = 1.0 + 1.15i; @@ -490,7 +490,7 @@ class FbcsrSampleComplex { exec, {0, 1, 0, 1, 0, 1, 6, 7, 0, 1, 6, 7, 4, 5, 4, 5}); gko::Array vals(exec, nnz); value_type *const v = vals.get_data(); - for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + FBCSR_TEST_OFFSET; + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; using namespace std::complex_literals; v[0] = 1.0 + 1.15i; @@ -547,7 +547,7 @@ class FbcsrSampleUnsorted { gko::Array vals(exec, nnz); value_type *const v = vals.get_data(); for (IndexType i = 0; i < nnz; i++) { - v[i] = static_cast(i + 0.15 + FBCSR_TEST_OFFSET); + v[i] = static_cast(i + 0.15 + fbcsr_test_offset); } return Fbcsr::create(exec, From 8dc3d858fcd6db9f7f0f19f44f2317b4d049b028 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 3 Feb 2021 20:13:14 +0100 Subject: [PATCH 52/58] review updates - removal of unnecessary lines of code - snake case convention for non-type template param Co-authored-by: Pratik Nayak --- cuda/matrix/fbcsr_kernels.cu | 10 ---------- cuda/test/matrix/fbcsr_kernels.cpp | 4 ++-- hip/matrix/fbcsr_kernels.hip.cpp | 17 ----------------- reference/matrix/fbcsr_kernels.cpp | 4 ++-- 4 files changed, 4 insertions(+), 31 deletions(-) diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu index 2d3677ea6bb..6f7bc48cf92 100644 --- a/cuda/matrix/fbcsr_kernels.cu +++ b/cuda/matrix/fbcsr_kernels.cu @@ -57,16 +57,6 @@ namespace cuda { namespace fbcsr { -/** - * A compile-time list of the number items per threads for which spmv kernel - * should be compiled. - */ -using compiled_kernels = syn::value_list; - -using classical_kernels = - syn::value_list; - - template void spmv(std::shared_ptr exec, const matrix::Fbcsr *a, diff --git a/cuda/test/matrix/fbcsr_kernels.cpp b/cuda/test/matrix/fbcsr_kernels.cpp index 79b455cb21c..02bfa3adfe1 100644 --- a/cuda/test/matrix/fbcsr_kernels.cpp +++ b/cuda/test/matrix/fbcsr_kernels.cpp @@ -36,7 +36,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -//#include #include @@ -82,7 +81,8 @@ TEST_F(Fbcsr, CanWriteFromMatrixOnDevice) auto cudamat = Mtx::create(cuda); cudamat->copy_from(gko::lend(refmat)); - MatData refdata, cudadata; + MatData refdata; + MatData cudadata; refmat->write(refdata); cudamat->write(cudadata); diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp index 934b7f270cd..13ca94ad5f7 100644 --- a/hip/matrix/fbcsr_kernels.hip.cpp +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -60,23 +60,6 @@ namespace hip { namespace fbcsr { -constexpr int default_block_size = 512; -constexpr int warps_in_block = 4; -constexpr int spmv_block_size = warps_in_block * config::warp_size; -constexpr int wsize = config::warp_size; -constexpr int classical_overweight = 32; - - -/** - * A compile-time list of the number items per threads for which spmv kernel - * should be compiled. - */ -using compiled_kernels = syn::value_list; - -using classical_kernels = - syn::value_list; - - template void spmv(std::shared_ptr exec, const matrix::Fbcsr *a, diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index da7877c2363..0c5ba27f511 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -395,7 +395,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); -template +template static void sort_by_column_index_impl( matrix::Fbcsr *const to_sort) { @@ -403,7 +403,7 @@ static void sort_by_column_index_impl( auto col_idxs = to_sort->get_col_idxs(); auto values = to_sort->get_values(); const auto nbrows = to_sort->get_num_block_rows(); - constexpr int bs2 = matBlkSz * matBlkSz; + constexpr int bs2 = mat_blk_sz * mat_blk_sz; for (IndexType i = 0; i < nbrows; ++i) { IndexType *const brow_col_idxs = col_idxs + row_ptrs[i]; ValueType *const brow_vals = values + row_ptrs[i] * bs2; From a5d7929bd0e396e647cbdac9aa288301f860d1f0 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Thu, 4 Feb 2021 11:19:54 +0100 Subject: [PATCH 53/58] added block size check to in fbcsr read, updated contributors list --- contributors.txt | 1 + core/matrix/fbcsr.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/contributors.txt b/contributors.txt index fde5b54d2c3..a7a558499e2 100644 --- a/contributors.txt +++ b/contributors.txt @@ -14,6 +14,7 @@ Grützmacher Thomas Karlsruhe Institute of Technology Heroux Mike Sandia National Laboratories Hoemmen Mark Sandia National Laboratories Holeksa Claudius Karlsruhe Institute of Technology +Kashi Aditya Karlsruhe Institute of Technology Maier Matthias Texas A&M University Nayak Pratik Karlsruhe Institute of Technology Olenik Gregor HPSim diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 6ed3f22eac0..f86292154dd 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -338,7 +338,7 @@ void Fbcsr::read(const mat_data &data) index_type cur_brow = 0; index_type cur_bnz = 0; index_type cur_bcol = blocks.begin()->first.block_column; - const index_type num_brows = data.size[0] / bs; + const index_type num_brows = detail::get_num_blocks(bs, data.size[0]); blockutils::DenseBlocksView values( tmp->values_.get_data(), bs, bs); From 5b57d564b30fc13c910e9d86f9c91bd96965dd8a Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Mon, 8 Feb 2021 21:29:21 +0100 Subject: [PATCH 54/58] fixed a bug in ref fbcsr transpose --- reference/matrix/fbcsr_kernels.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 0c5ba27f511..2076b13c1b2 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -456,14 +456,16 @@ void extract_diagonal(std::shared_ptr, const auto values = orig->get_const_values(); const int bs = orig->get_block_size(); const IndexType nbrows = orig->get_num_block_rows(); + const IndexType nbdim_min = + std::min(orig->get_num_block_rows(), orig->get_num_block_cols()); auto diag_values = diag->get_values(); - assert(diag->get_size()[0] == orig->get_size()[0]); + assert(diag->get_size()[0] == nbdim_min * bs); const gko::blockutils::DenseBlocksView vblocks( values, bs, bs); - for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { + for (IndexType ibrow = 0; ibrow < nbdim_min; ++ibrow) { for (IndexType idx = row_ptrs[ibrow]; idx < row_ptrs[ibrow + 1]; ++idx) { if (col_idxs[idx] == ibrow) { From 1e49c9123771b0d219153b33fccd0447ad6dae4f Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Thu, 18 Feb 2021 19:01:26 +0100 Subject: [PATCH 55/58] replaced new DenseBlocksView with range and col_major accessor --- core/components/fixed_block.hpp | 105 ----------------------------- core/matrix/fbcsr.cpp | 14 ++-- core/test/matrix/fbcsr.cpp | 30 +-------- core/test/matrix/fbcsr_sample.hpp | 26 +------ reference/matrix/fbcsr_kernels.cpp | 31 ++++----- 5 files changed, 30 insertions(+), 176 deletions(-) delete mode 100644 core/components/fixed_block.hpp diff --git a/core/components/fixed_block.hpp b/core/components/fixed_block.hpp deleted file mode 100644 index f6fed3c6a6b..00000000000 --- a/core/components/fixed_block.hpp +++ /dev/null @@ -1,105 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#ifndef GKO_CORE_COMPONENTS_FIXED_BLOCK_HPP_ -#define GKO_CORE_COMPONENTS_FIXED_BLOCK_HPP_ - - -#include - - -namespace gko { -namespace blockutils { - - -/** - * @brief A view into a an array of dense blocks of some runtime-defined size - * - * Accessing BSR values using this type of view abstracts away the - * storage layout within the individual blocks, as long as all blocks use the - * same layout. For now, column-major blocks are assumed. - * - * @tparam ValueType The numeric type of entries of the block - * @tparam IndexType The type of integer used to identify the different blocks - */ -template -class DenseBlocksView final { -public: - using value_type = ValueType; - using index_type = IndexType; - - /** - * @param buffer Pointer to the segment of memory to be interpreted as - * an array of 2D blocks - * @param num_rows Number of rows in each block - * @param num_cols Number of columns in each block - */ - DenseBlocksView(ValueType *const buffer, const int num_rows, - const int num_cols) - : nrows{num_rows}, ncols{num_cols}, vals_{buffer} - {} - - value_type &at(const index_type block, const int row, const int col) - { - return vals_[block * nrows * ncols + row + col * nrows]; - } - - const value_type &at(const index_type block, const int row, - const int col) const - { - return vals_[block * nrows * ncols + row + col * nrows]; - } - - value_type &operator()(const index_type block, const int row, const int col) - { - return at(block, row, col); - } - - const value_type &operator()(const index_type block, const int row, - const int col) const - { - return at(block, row, col); - } - - const int nrows; ///< Number of rows in each block - const int ncols; ///< Number of columns in each block - -private: - value_type *const vals_; -}; - - -} // namespace blockutils -} // namespace gko - - -#endif // GKO_CORE_COMPONENTS_FIXED_BLOCK_HPP_ diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index f86292154dd..53d86048778 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -49,7 +49,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/absolute_array.hpp" #include "core/components/fill_array.hpp" -#include "core/components/fixed_block.hpp" #include "core/matrix/fbcsr_kernels.hpp" @@ -340,8 +339,10 @@ void Fbcsr::read(const mat_data &data) index_type cur_bcol = blocks.begin()->first.block_column; const index_type num_brows = detail::get_num_blocks(bs, data.size[0]); - blockutils::DenseBlocksView values( - tmp->values_.get_data(), bs, bs); + // blockutils::DenseBlocksView values( + // tmp->values_.get_data(), bs, bs); + range> values( + tmp->values_.get_data(), dim<3>(blocks.size(), bs, bs)); for (auto it = blocks.begin(); it != blocks.end(); it++) { GKO_ENSURE_IN_BOUNDS(cur_brow, num_brows); @@ -386,8 +387,11 @@ void Fbcsr::write(mat_data &data) const data = {tmp->get_size(), {}}; - const blockutils::DenseBlocksView vblocks( - tmp->values_.get_const_data(), bs_, bs_); + // const blockutils::DenseBlocksView vblocks( + // tmp->values_.get_const_data(), bs_, bs_); + const size_type nbnz = tmp->get_num_stored_blocks(); + const range> vblocks( + tmp->values_.get_const_data(), dim<3>(nbnz, bs_, bs_)); for (size_type brow = 0; brow < tmp->get_num_block_rows(); ++brow) { const auto start = tmp->row_ptrs_.get_const_data()[brow]; diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index f5765bd055a..3e2757a0050 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -40,7 +40,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/components/fixed_block.hpp" #include "core/test/matrix/fbcsr_sample.hpp" #include "core/test/utils.hpp" @@ -72,8 +71,9 @@ void assert_matrices_are_same( const IndexType nbrows = bm->get_num_block_rows(); const int bs = bm->get_block_size(); - gko::blockutils::DenseBlocksView fbvals( - bm->get_const_values(), bs, bs); + const auto nbnz = bm->get_num_stored_blocks(); + gko::range> fbvals( + bm->get_const_values(), gko::dim<3>(nbnz, bs, bs)); for (IndexType ibrow = 0; ibrow < nbrows; ibrow++) { const IndexType *const browptr = bm->get_const_row_ptrs(); @@ -473,28 +473,4 @@ TYPED_TEST(Fbcsr, GeneratesCorrectMatrixData) } -TYPED_TEST(Fbcsr, DenseBlocksViewWorksCorrectly) -{ - using value_type = typename TestFixture::value_type; - using index_type = typename TestFixture::index_type; - using Dbv = gko::blockutils::DenseBlocksView; - const gko::testing::FbcsrSample2 fbsample( - this->exec); - std::vector ref_dbv_array(fbsample.nnz); - Dbv refdbv(ref_dbv_array.data(), fbsample.bs, fbsample.bs); - fbsample.fill_value_blocks_view(refdbv); - - auto refmtx = fbsample.generate_fbcsr(); - const Dbv testdbv(refmtx->get_values(), fbsample.bs, fbsample.bs); - - for (index_type ibz = 0; ibz < fbsample.nbnz; ibz++) { - for (int i = 0; i < fbsample.bs; ++i) { - for (int j = 0; j < fbsample.bs; ++j) { - ASSERT_EQ(testdbv(ibz, i, j), refdbv(ibz, i, j)); - } - } - } -} - - } // namespace diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index d89dfa4839e..748e5234361 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -42,7 +42,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/components/fixed_block.hpp" #include "core/test/utils.hpp" @@ -106,8 +105,8 @@ class FbcsrSample { c[2] = 0; c[3] = 2; - gko::blockutils::DenseBlocksView vals(v, bs, - bs); + gko::range> vals( + v, gko::dim<3>(nbnz, bs, bs)); if (mtx->get_size()[0] % bs != 0) throw gko::BadDimension(__FILE__, __LINE__, __func__, "test fbcsr", @@ -275,8 +274,6 @@ class FbcsrSample2 { using Fbcsr = gko::matrix::Fbcsr; using Csr = gko::matrix::Csr; using Diagonal = gko::matrix::Diagonal; - using DenseBlocksView = - gko::blockutils::DenseBlocksView; const size_type nrows = 6; @@ -355,25 +352,6 @@ class FbcsrSample2 { return gko::Array(exec, {2, 2, 4, 4, 2, 2}); } - /// Fills a view into a FBCSR values array using the sample matrix's data - void fill_value_blocks_view(DenseBlocksView &dbv) const - { - dbv(0, 0, 0) = 1.0; - dbv(0, 0, 1) = 2.0; - dbv(0, 1, 0) = 3.0; - dbv(0, 1, 1) = 0.0; - for (int i = 0; i < 2; ++i) - for (int j = 0; j < 2; ++j) dbv(1, i, j) = 0.15 + fbcsr_test_offset; - dbv(2, 0, 0) = 0.15 + fbcsr_test_offset; - dbv(2, 0, 1) = 0.15 + fbcsr_test_offset; - dbv(2, 1, 0) = 0.0; - dbv(2, 1, 1) = 0.0; - dbv(3, 0, 0) = -12.0; - dbv(3, 0, 1) = -1.0; - dbv(3, 1, 0) = -2.0; - dbv(3, 1, 1) = -11.0; - } - private: /// Enables use of literals to instantiate value data diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 2076b13c1b2..6bb3563dbe7 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -42,12 +42,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include "core/base/allocator.hpp" #include "core/base/iterator_factory.hpp" -#include "core/components/fixed_block.hpp" #include "core/components/prefix_sum.hpp" #include "core/matrix/fbcsr_builder.hpp" #include "reference/components/format_conversion.hpp" @@ -73,11 +73,12 @@ void spmv(const std::shared_ptr, const int bs = a->get_block_size(); const auto nvecs = static_cast(b->get_size()[1]); const IndexType nbrows = a->get_num_block_rows(); + const size_type nbnz = a->get_num_stored_blocks(); auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); auto vals = a->get_const_values(); - const blockutils::DenseBlocksView avalues( - vals, bs, bs); + const range> avalues{ + vals, dim<3>(nbnz, bs, bs)}; for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { for (IndexType i = ibrow * bs * nvecs; i < (ibrow + 1) * bs * nvecs; @@ -119,8 +120,8 @@ void advanced_spmv(const std::shared_ptr, auto vals = a->get_const_values(); auto valpha = alpha->at(0, 0); auto vbeta = beta->at(0, 0); - const blockutils::DenseBlocksView avalues( - vals, bs, bs); + const range> avalues{ + vals, dim<3>(a->get_num_stored_blocks(), bs, bs)}; for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { for (IndexType i = ibrow * bs * nvecs; i < (ibrow + 1) * bs * nvecs; @@ -158,8 +159,8 @@ void convert_to_dense(const std::shared_ptr, const IndexType *const col_idxs = source->get_const_col_idxs(); const ValueType *const vals = source->get_const_values(); - const gko::blockutils::DenseBlocksView values( - vals, bs, bs); + const range> values{ + vals, dim<3>(source->get_num_stored_blocks(), bs, bs)}; for (IndexType brow = 0; brow < nbrows; ++brow) { for (size_type bcol = 0; bcol < nbcols; ++bcol) { @@ -208,8 +209,8 @@ void convert_to_csr(const std::shared_ptr, IndexType *const col_idxs = result->get_col_idxs(); ValueType *const vals = result->get_values(); - const gko::blockutils::DenseBlocksView bvalues( - bvals, bs, bs); + const range> bvalues{ + bvals, dim<3>(source->get_num_stored_blocks(), bs, bs)}; for (IndexType brow = 0; brow < nbrows; ++brow) { const IndexType nz_browstart = browptrs[brow] * bs * bs; @@ -253,10 +254,10 @@ void convert_fbcsr_to_fbcsc(const IndexType num_blk_rows, const int blksz, IndexType *const col_ptrs, ValueType *const csc_vals, UnaryOperator op) { - const gko::blockutils::DenseBlocksView rvalues( - fbcsr_vals, blksz, blksz); - gko::blockutils::DenseBlocksView cvalues( - csc_vals, blksz, blksz); + const range> rvalues{ + fbcsr_vals, dim<3>(row_ptrs[num_blk_rows], blksz, blksz)}; + const range> cvalues{ + csc_vals, dim<3>(row_ptrs[num_blk_rows], blksz, blksz)}; for (IndexType brow = 0; brow < num_blk_rows; ++brow) { for (auto i = row_ptrs[brow]; i < row_ptrs[brow + 1]; ++i) { const auto dest_idx = col_ptrs[col_idxs[i]]; @@ -462,8 +463,8 @@ void extract_diagonal(std::shared_ptr, assert(diag->get_size()[0] == nbdim_min * bs); - const gko::blockutils::DenseBlocksView vblocks( - values, bs, bs); + const range> vblocks{ + values, dim<3>(orig->get_num_stored_blocks(), bs, bs)}; for (IndexType ibrow = 0; ibrow < nbdim_min; ++ibrow) { for (IndexType idx = row_ptrs[ibrow]; idx < row_ptrs[ibrow + 1]; From 7fb10688ea3cd9ab73714774bd7380870684d960 Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Wed, 24 Feb 2021 10:50:04 +0100 Subject: [PATCH 56/58] rebased onto renamed block_col_major accessor --- core/matrix/fbcsr.cpp | 8 ++------ core/test/matrix/fbcsr.cpp | 2 +- core/test/matrix/fbcsr_sample.hpp | 2 +- reference/matrix/fbcsr_kernels.cpp | 14 +++++++------- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 53d86048778..1876e421751 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -339,9 +339,7 @@ void Fbcsr::read(const mat_data &data) index_type cur_bcol = blocks.begin()->first.block_column; const index_type num_brows = detail::get_num_blocks(bs, data.size[0]); - // blockutils::DenseBlocksView values( - // tmp->values_.get_data(), bs, bs); - range> values( + range> values( tmp->values_.get_data(), dim<3>(blocks.size(), bs, bs)); for (auto it = blocks.begin(); it != blocks.end(); it++) { @@ -387,10 +385,8 @@ void Fbcsr::write(mat_data &data) const data = {tmp->get_size(), {}}; - // const blockutils::DenseBlocksView vblocks( - // tmp->values_.get_const_data(), bs_, bs_); const size_type nbnz = tmp->get_num_stored_blocks(); - const range> vblocks( + const range> vblocks( tmp->values_.get_const_data(), dim<3>(nbnz, bs_, bs_)); for (size_type brow = 0; brow < tmp->get_num_block_rows(); ++brow) { diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index 3e2757a0050..abe8f1c1059 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -72,7 +72,7 @@ void assert_matrices_are_same( const IndexType nbrows = bm->get_num_block_rows(); const int bs = bm->get_block_size(); const auto nbnz = bm->get_num_stored_blocks(); - gko::range> fbvals( + gko::range> fbvals( bm->get_const_values(), gko::dim<3>(nbnz, bs, bs)); for (IndexType ibrow = 0; ibrow < nbrows; ibrow++) { diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 748e5234361..aa34981cd06 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -105,7 +105,7 @@ class FbcsrSample { c[2] = 0; c[3] = 2; - gko::range> vals( + gko::range> vals( v, gko::dim<3>(nbnz, bs, bs)); if (mtx->get_size()[0] % bs != 0) diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 6bb3563dbe7..a5dbc75d120 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -77,7 +77,7 @@ void spmv(const std::shared_ptr, auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); auto vals = a->get_const_values(); - const range> avalues{ + const range> avalues{ vals, dim<3>(nbnz, bs, bs)}; for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { @@ -120,7 +120,7 @@ void advanced_spmv(const std::shared_ptr, auto vals = a->get_const_values(); auto valpha = alpha->at(0, 0); auto vbeta = beta->at(0, 0); - const range> avalues{ + const range> avalues{ vals, dim<3>(a->get_num_stored_blocks(), bs, bs)}; for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { @@ -159,7 +159,7 @@ void convert_to_dense(const std::shared_ptr, const IndexType *const col_idxs = source->get_const_col_idxs(); const ValueType *const vals = source->get_const_values(); - const range> values{ + const range> values{ vals, dim<3>(source->get_num_stored_blocks(), bs, bs)}; for (IndexType brow = 0; brow < nbrows; ++brow) { @@ -209,7 +209,7 @@ void convert_to_csr(const std::shared_ptr, IndexType *const col_idxs = result->get_col_idxs(); ValueType *const vals = result->get_values(); - const range> bvalues{ + const range> bvalues{ bvals, dim<3>(source->get_num_stored_blocks(), bs, bs)}; for (IndexType brow = 0; brow < nbrows; ++brow) { @@ -254,9 +254,9 @@ void convert_fbcsr_to_fbcsc(const IndexType num_blk_rows, const int blksz, IndexType *const col_ptrs, ValueType *const csc_vals, UnaryOperator op) { - const range> rvalues{ + const range> rvalues{ fbcsr_vals, dim<3>(row_ptrs[num_blk_rows], blksz, blksz)}; - const range> cvalues{ + const range> cvalues{ csc_vals, dim<3>(row_ptrs[num_blk_rows], blksz, blksz)}; for (IndexType brow = 0; brow < num_blk_rows; ++brow) { for (auto i = row_ptrs[brow]; i < row_ptrs[brow + 1]; ++i) { @@ -463,7 +463,7 @@ void extract_diagonal(std::shared_ptr, assert(diag->get_size()[0] == nbdim_min * bs); - const range> vblocks{ + const range> vblocks{ values, dim<3>(orig->get_num_stored_blocks(), bs, bs)}; for (IndexType ibrow = 0; ibrow < nbdim_min; ++ibrow) { From 7669264acea6e4ca42a12247ee8e790eecb11ffe Mon Sep 17 00:00:00 2001 From: Aditya Kashi Date: Tue, 23 Mar 2021 01:04:34 +0100 Subject: [PATCH 57/58] addressed changes to accessor sub-library --- core/matrix/fbcsr.cpp | 13 +++++--- core/test/matrix/fbcsr.cpp | 11 +++++-- core/test/matrix/fbcsr_sample.hpp | 8 +++-- reference/matrix/fbcsr_kernels.cpp | 49 +++++++++++++++++++++--------- 4 files changed, 58 insertions(+), 23 deletions(-) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 1876e421751..00f93bf6351 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -47,6 +47,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "accessor/block_col_major.hpp" +#include "accessor/range.hpp" #include "core/components/absolute_array.hpp" #include "core/components/fill_array.hpp" #include "core/matrix/fbcsr_kernels.hpp" @@ -339,8 +341,9 @@ void Fbcsr::read(const mat_data &data) index_type cur_bcol = blocks.begin()->first.block_column; const index_type num_brows = detail::get_num_blocks(bs, data.size[0]); - range> values( - tmp->values_.get_data(), dim<3>(blocks.size(), bs, bs)); + acc::range> values( + std::array{blocks.size(), (size_type)bs, (size_type)bs}, + tmp->values_.get_data()); for (auto it = blocks.begin(); it != blocks.end(); it++) { GKO_ENSURE_IN_BOUNDS(cur_brow, num_brows); @@ -386,8 +389,10 @@ void Fbcsr::write(mat_data &data) const data = {tmp->get_size(), {}}; const size_type nbnz = tmp->get_num_stored_blocks(); - const range> vblocks( - tmp->values_.get_const_data(), dim<3>(nbnz, bs_, bs_)); + const acc::range> vblocks( + std::array{nbnz, static_cast(bs_), + static_cast(bs_)}, + tmp->values_.get_const_data()); for (size_type brow = 0; brow < tmp->get_num_block_rows(); ++brow) { const auto start = tmp->row_ptrs_.get_const_data()[brow]; diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index abe8f1c1059..6b59ce1d30d 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -40,6 +40,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + + +#include "accessor/block_col_major.hpp" +#include "accessor/range.hpp" #include "core/test/matrix/fbcsr_sample.hpp" #include "core/test/utils.hpp" @@ -72,8 +77,10 @@ void assert_matrices_are_same( const IndexType nbrows = bm->get_num_block_rows(); const int bs = bm->get_block_size(); const auto nbnz = bm->get_num_stored_blocks(); - gko::range> fbvals( - bm->get_const_values(), gko::dim<3>(nbnz, bs, bs)); + gko::acc::range> fbvals( + std::array{nbnz, static_cast(bs), + static_cast(bs)}, + bm->get_const_values()); for (IndexType ibrow = 0; ibrow < nbrows; ibrow++) { const IndexType *const browptr = bm->get_const_row_ptrs(); diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index aa34981cd06..ac6007e789c 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -42,6 +42,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "accessor/block_col_major.hpp" +#include "accessor/range.hpp" #include "core/test/utils.hpp" @@ -105,8 +107,10 @@ class FbcsrSample { c[2] = 0; c[3] = 2; - gko::range> vals( - v, gko::dim<3>(nbnz, bs, bs)); + gko::acc::range> vals( + std::array{nbnz, static_cast(bs), + static_cast(bs)}, + v); if (mtx->get_size()[0] % bs != 0) throw gko::BadDimension(__FILE__, __LINE__, __func__, "test fbcsr", diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index a5dbc75d120..a74753623f9 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -42,10 +42,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include +#include "accessor/block_col_major.hpp" +#include "accessor/range.hpp" #include "core/base/allocator.hpp" #include "core/base/iterator_factory.hpp" #include "core/components/prefix_sum.hpp" @@ -77,8 +78,8 @@ void spmv(const std::shared_ptr, auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); auto vals = a->get_const_values(); - const range> avalues{ - vals, dim<3>(nbnz, bs, bs)}; + const acc::range> avalues{ + std::array{nbnz, (size_type)bs, (size_type)bs}, vals}; for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { for (IndexType i = ibrow * bs * nvecs; i < (ibrow + 1) * bs * nvecs; @@ -120,8 +121,11 @@ void advanced_spmv(const std::shared_ptr, auto vals = a->get_const_values(); auto valpha = alpha->at(0, 0); auto vbeta = beta->at(0, 0); - const range> avalues{ - vals, dim<3>(a->get_num_stored_blocks(), bs, bs)}; + const acc::range> avalues{ + std::array{a->get_num_stored_blocks(), + static_cast(bs), + static_cast(bs)}, + vals}; for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { for (IndexType i = ibrow * bs * nvecs; i < (ibrow + 1) * bs * nvecs; @@ -159,8 +163,11 @@ void convert_to_dense(const std::shared_ptr, const IndexType *const col_idxs = source->get_const_col_idxs(); const ValueType *const vals = source->get_const_values(); - const range> values{ - vals, dim<3>(source->get_num_stored_blocks(), bs, bs)}; + const acc::range> values{ + std::array{source->get_num_stored_blocks(), + static_cast(bs), + static_cast(bs)}, + vals}; for (IndexType brow = 0; brow < nbrows; ++brow) { for (size_type bcol = 0; bcol < nbcols; ++bcol) { @@ -209,8 +216,11 @@ void convert_to_csr(const std::shared_ptr, IndexType *const col_idxs = result->get_col_idxs(); ValueType *const vals = result->get_values(); - const range> bvalues{ - bvals, dim<3>(source->get_num_stored_blocks(), bs, bs)}; + const acc::range> bvalues{ + std::array{source->get_num_stored_blocks(), + static_cast(bs), + static_cast(bs)}, + bvals}; for (IndexType brow = 0; brow < nbrows; ++brow) { const IndexType nz_browstart = browptrs[brow] * bs * bs; @@ -254,10 +264,16 @@ void convert_fbcsr_to_fbcsc(const IndexType num_blk_rows, const int blksz, IndexType *const col_ptrs, ValueType *const csc_vals, UnaryOperator op) { - const range> rvalues{ - fbcsr_vals, dim<3>(row_ptrs[num_blk_rows], blksz, blksz)}; - const range> cvalues{ - csc_vals, dim<3>(row_ptrs[num_blk_rows], blksz, blksz)}; + const acc::range> rvalues{ + std::array{static_cast(row_ptrs[num_blk_rows]), + static_cast(blksz), + static_cast(blksz)}, + fbcsr_vals}; + const acc::range> cvalues{ + std::array{static_cast(row_ptrs[num_blk_rows]), + static_cast(blksz), + static_cast(blksz)}, + csc_vals}; for (IndexType brow = 0; brow < num_blk_rows; ++brow) { for (auto i = row_ptrs[brow]; i < row_ptrs[brow + 1]; ++i) { const auto dest_idx = col_ptrs[col_idxs[i]]; @@ -463,8 +479,11 @@ void extract_diagonal(std::shared_ptr, assert(diag->get_size()[0] == nbdim_min * bs); - const range> vblocks{ - values, dim<3>(orig->get_num_stored_blocks(), bs, bs)}; + const acc::range> vblocks{ + std::array{orig->get_num_stored_blocks(), + static_cast(bs), + static_cast(bs)}, + values}; for (IndexType ibrow = 0; ibrow < nbdim_min; ++ibrow) { for (IndexType idx = row_ptrs[ibrow]; idx < row_ptrs[ibrow + 1]; From b2ea21f515c01bbc78bb959691b1aaf5e3e43d70 Mon Sep 17 00:00:00 2001 From: Aditya Date: Tue, 23 Mar 2021 14:43:59 +0100 Subject: [PATCH 58/58] Fixed C-style casts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Thomas Grützmacher --- core/matrix/fbcsr.cpp | 3 ++- reference/matrix/fbcsr_kernels.cpp | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp index 00f93bf6351..5019a047923 100644 --- a/core/matrix/fbcsr.cpp +++ b/core/matrix/fbcsr.cpp @@ -342,7 +342,8 @@ void Fbcsr::read(const mat_data &data) const index_type num_brows = detail::get_num_blocks(bs, data.size[0]); acc::range> values( - std::array{blocks.size(), (size_type)bs, (size_type)bs}, + std::array{blocks.size(), static_cast(bs), + static_cast(bs)}, tmp->values_.get_data()); for (auto it = blocks.begin(); it != blocks.end(); it++) { diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index a74753623f9..35c0bcbbf8b 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -79,7 +79,9 @@ void spmv(const std::shared_ptr, auto col_idxs = a->get_const_col_idxs(); auto vals = a->get_const_values(); const acc::range> avalues{ - std::array{nbnz, (size_type)bs, (size_type)bs}, vals}; + std::array{nbnz, static_cast(bs), + static_cast(bs)}, + vals}; for (IndexType ibrow = 0; ibrow < nbrows; ++ibrow) { for (IndexType i = ibrow * bs * nvecs; i < (ibrow + 1) * bs * nvecs;