diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc index b3296ceeb92..652e8b0df6e 100644 --- a/common/cuda_hip/matrix/csr_kernels.hpp.inc +++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc @@ -736,15 +736,15 @@ __global__ __launch_bounds__(default_block_size) void inv_symm_permute( template __global__ -__launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals( - const size_type num_rows, const size_type num_cols, - const size_type row_offset, const size_type col_offset, - const IndexType* __restrict__ source_row_ptrs, - const IndexType* __restrict__ source_col_idxs, - const ValueType* __restrict__ source_values, - const IndexType* __restrict__ result_row_ptrs, - IndexType* __restrict__ result_col_idxs, - ValueType* __restrict__ result_values) + __launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals( + const size_type num_rows, const size_type num_cols, + const size_type row_offset, const size_type col_offset, + const IndexType* __restrict__ source_row_ptrs, + const IndexType* __restrict__ source_col_idxs, + const ValueType* __restrict__ source_values, + const IndexType* __restrict__ result_row_ptrs, + IndexType* __restrict__ result_col_idxs, + ValueType* __restrict__ result_values) { const auto res_row = thread::get_thread_id_flat(); if (res_row < num_rows) { @@ -766,10 +766,11 @@ __launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals( template __global__ -__launch_bounds__(default_block_size) void calculate_nnz_per_row_in_span( - const span row_span, const span col_span, - const IndexType* __restrict__ row_ptrs, - const IndexType* __restrict__ col_idxs, IndexType* __restrict__ nnz_per_row) + __launch_bounds__(default_block_size) void calculate_nnz_per_row_in_span( + const span row_span, const span col_span, + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + IndexType* __restrict__ nnz_per_row) { const auto src_row = thread::get_thread_id_flat() + row_span.begin; if (src_row < row_span.end) { @@ -925,7 +926,8 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup( gko::matrix::sparsity_type allowed, int64* __restrict__ row_desc, int32* __restrict__ storage) { - constexpr int block_size = 32; + constexpr int bitmap_block_size = + gko::matrix::device_sparsity_lookup::block_size; auto row = thread::get_subwarp_id_flat(); if (row >= num_rows) { return; @@ -953,7 +955,8 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup( return; } // dense bitmap storage - const auto num_blocks = static_cast(ceildiv(col_range, block_size)); + const auto num_blocks = + static_cast(ceildiv(col_range, bitmap_block_size)); if (num_blocks * 2 <= available_storage && ((static_cast(allowed) & static_cast(matrix::sparsity_type::bitmap)) != 0)) { @@ -971,10 +974,11 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup( // fill bitmaps with sparsity pattern for (IndexType base_i = 0; base_i < row_len; base_i += subwarp_size) { const auto i = base_i + lane; - const auto col = i < row_len ? local_cols[i] : INT_MAX; + const auto col = + i < row_len ? local_cols[i] : device_numeric_limits::max; const auto rel_col = static_cast(col - min_col); - const auto block = rel_col / block_size; - const auto col_in_block = rel_col % block_size; + const auto block = rel_col / bitmap_block_size; + const auto col_in_block = rel_col % bitmap_block_size; auto local_bitmap = uint32{i < row_len ? 1u : 0u} << col_in_block; bool is_first = segment_scan(subwarp, block, local_bitmap, @@ -1014,7 +1018,7 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup( static_cast(matrix::sparsity_type::hash); } // fill hashmap with sentinel - constexpr int32 empty = -1; + constexpr int32 empty = invalid_index(); for (int32 i = lane; i < available_storage; i += subwarp_size) { local_storage[i] = empty; } @@ -1048,7 +1052,9 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup( hash -= available_storage; } // this can only fail for row_length < 16 - assert(hash < available_storage); + // because then available_storage < 32 and + // colliding can have more than available_storage bits set. + GKO_ASSERT(hash < available_storage); entry = local_storage[hash]; } colliding = subwarp.match_any(hash); diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp index 7b473b915b5..3b427808107 100644 --- a/core/matrix/csr_kernels.hpp +++ b/core/matrix/csr_kernels.hpp @@ -41,7 +41,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include #include #include @@ -51,6 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/kernel_declaration.hpp" +#include "core/matrix/csr_lookup.hpp" namespace gko { diff --git a/include/ginkgo/core/matrix/csr_lookup.hpp b/core/matrix/csr_lookup.hpp similarity index 81% rename from include/ginkgo/core/matrix/csr_lookup.hpp rename to core/matrix/csr_lookup.hpp index b6b06be3bc7..120fcbfb20f 100644 --- a/include/ginkgo/core/matrix/csr_lookup.hpp +++ b/core/matrix/csr_lookup.hpp @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_PUBLIC_CORE_MATRIX_CSR_LOOKUP_HPP_ -#define GKO_PUBLIC_CORE_MATRIX_CSR_LOOKUP_HPP_ +#ifndef GKO_CORE_MATRIX_CSR_LOOKUP_HPP_ +#define GKO_CORE_MATRIX_CSR_LOOKUP_HPP_ #include @@ -59,10 +59,10 @@ enum class sparsity_type : int { full = 1, /** * The row is sufficiently dense that its sparsity pattern can be stored in - * a dense bitmap consisting of `storage_size` blocks of size `block_size`, - * with a total of `storage_size` blocks. Each block stores its sparsity - * pattern and the number of columns before. This means that the relative - * output index can be computed as + * a dense bitmap consisting of `storage_size` blocks of size `block_size`. + * Each block stores its sparsity pattern as a bitmask and the number of + * columns before the block as integer. This means that the relative output + * index can be computed as * ``` * auto block = (col - min_col) / block_size; * auto local_col = (col - min_col) % block_size; @@ -76,7 +76,7 @@ enum class sparsity_type : int { * The hashtable has size `storage_size` and stores the relative output * index directly, i.e. * ``` - * auto hash_key = col - min_col; + * auto hash_key = col; * auto hash_bucket = hash(hash_key); * while (col_idxs[hashtable[hash_bucket]] != col) { * hash_bucket = (hash_bucket + 1) % storage_size; // linear probing @@ -88,20 +88,19 @@ enum class sparsity_type : int { }; -inline sparsity_type operator|(sparsity_type a, sparsity_type b) +GKO_ATTRIBUTES GKO_INLINE sparsity_type operator|(sparsity_type a, + sparsity_type b) { return static_cast(static_cast(a) | static_cast(b)); } -template -class csr_sparsity_lookup { - const IndexType* row_ptrs; - const IndexType* col_idxs; - Array row_desc; - Array storage; -}; +GKO_ATTRIBUTES GKO_INLINE bool csr_lookup_allowed(matrix::sparsity_type allowed, + matrix::sparsity_type type) +{ + return ((static_cast(allowed) & static_cast(type)) != 0); +} template @@ -124,15 +123,15 @@ struct device_sparsity_lookup { case sparsity_type::hash: return lookup_hash(col); } - assert(false); + GKO_ASSERT(false); } GKO_ATTRIBUTES GKO_INLINE IndexType lookup_full(IndexType col) const { const auto min_col = col_idxs[0]; const auto out_idx = col - min_col; - assert(out_idx < row_nnz); - assert(col_idxs[out_idx] == col); + GKO_ASSERT(out_idx < row_nnz); + GKO_ASSERT(col_idxs[out_idx] == col); return out_idx; } @@ -147,12 +146,12 @@ struct device_sparsity_lookup { const auto block = rel_col / block_size; const auto col_in_block = rel_col % block_size; const auto prefix_mask = (uint32{1} << col_in_block) - 1; - assert(block < num_blocks); - assert(block_bitmaps[block] & (uint32{1} << col_in_block)); + GKO_ASSERT(block < num_blocks); + GKO_ASSERT(block_bitmaps[block] & (uint32{1} << col_in_block)); const auto out_idx = block_bases[block] + gko::detail::popcount(block_bitmaps[block] & prefix_mask); - assert(col_idxs[out_idx] == col); + GKO_ASSERT(col_idxs[out_idx] == col); return out_idx; } @@ -162,19 +161,19 @@ struct device_sparsity_lookup { const auto hash_param = static_cast(desc >> 32); const auto hashmap = storage; auto hash = (static_cast(col) * hash_param) % hashmap_size; - assert(hashmap[hash] >= 0); - assert(hashmap[hash] < row_nnz); + GKO_ASSERT(hashmap[hash] >= 0); + GKO_ASSERT(hashmap[hash] < row_nnz); // linear probing with sentinel to avoid infinite loop while (col_idxs[hashmap[hash]] != col) { hash++; if (hash >= hashmap_size) { hash = 0; } - assert(hashmap[hash] >= 0); - assert(hashmap[hash] < row_nnz); + GKO_ASSERT(hashmap[hash] >= 0); + GKO_ASSERT(hashmap[hash] < row_nnz); } const auto out_idx = hashmap[hash]; - assert(col_idxs[out_idx] == col); + GKO_ASSERT(col_idxs[out_idx] == col); return out_idx; } }; @@ -183,4 +182,4 @@ struct device_sparsity_lookup { } // namespace matrix } // namespace gko -#endif // GKO_PUBLIC_CORE_MATRIX_CSR_LOOKUP_HPP_ +#endif // GKO_CORE_MATRIX_CSR_LOOKUP_HPP_ diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index c4da4687edd..9bdb7e53c2a 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -49,7 +49,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include #include #include @@ -60,6 +59,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/format_conversion_kernels.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/csr_builder.hpp" +#include "core/matrix/csr_lookup.hpp" #include "core/matrix/dense_kernels.hpp" #include "core/synthesizer/implementation_selection.hpp" #include "cuda/base/config.hpp" diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu index fb7830fbeba..dfd352d4996 100644 --- a/cuda/matrix/fbcsr_kernels.cu +++ b/cuda/matrix/fbcsr_kernels.cu @@ -49,7 +49,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include @@ -58,6 +57,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/device_matrix_data_kernels.hpp" #include "core/components/fill_array_kernels.hpp" #include "core/components/format_conversion_kernels.hpp" +#include "core/matrix/csr_lookup.hpp" #include "core/matrix/dense_kernels.hpp" #include "core/synthesizer/implementation_selection.hpp" #include "cuda/base/config.hpp" diff --git a/cuda/test/matrix/csr_kernels.cpp b/cuda/test/matrix/csr_kernels.cpp index ab591555815..483c678d7fb 100644 --- a/cuda/test/matrix/csr_kernels.cpp +++ b/cuda/test/matrix/csr_kernels.cpp @@ -1012,62 +1012,4 @@ TEST_F(Csr, AddScaledIdentityToNonSquare) } -TEST_F(Csr, BuildLookupDataWorks) -{ - set_up_apply_data(std::make_shared()); - using gko::matrix::sparsity_type; - gko::Array row_descs(ref, mtx->get_size()[0]); - gko::Array lookup_info(ref, mtx->get_num_stored_elements() * 2); - gko::Array drow_descs(cuda, mtx->get_size()[0]); - gko::Array dlookup_info(cuda, - mtx->get_num_stored_elements() * 2); - for (auto allowed_methods : - {sparsity_type::full | sparsity_type::bitmap | sparsity_type::hash, - sparsity_type::bitmap | sparsity_type::hash, - sparsity_type::full | sparsity_type::hash, sparsity_type::hash}) { - const auto full_allowed = - static_cast(static_cast(allowed_methods) & - static_cast(sparsity_type::full)); - const auto bitmap_allowed = - static_cast(static_cast(allowed_methods) & - static_cast(sparsity_type::bitmap)); - const auto bitmap_equivalent = - bitmap_allowed ? sparsity_type::bitmap : sparsity_type::hash; - const auto full_equivalent = - full_allowed ? sparsity_type::full : bitmap_equivalent; - SCOPED_TRACE("full: " + std::to_string(full_allowed) + - " bitmap: " + std::to_string(bitmap_allowed)); - - gko::kernels::reference::csr::build_lookup( - ref, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), - mtx->get_size()[0], allowed_methods, row_descs.get_data(), - lookup_info.get_data()); - gko::kernels::cuda::csr::build_lookup( - cuda, dmtx->get_const_row_ptrs(), dmtx->get_const_col_idxs(), - dmtx->get_size()[0], allowed_methods, drow_descs.get_data(), - dlookup_info.get_data()); - - gko::Array host_row_descs(ref, drow_descs); - gko::Array host_lookup_info(ref, dlookup_info); - for (int row = 0; row < dmtx->get_size()[0]; row++) { - SCOPED_TRACE("row: " + std::to_string(row)); - const auto row_begin = mtx->get_const_row_ptrs()[row]; - const auto row_nnz = mtx->get_const_row_ptrs()[row + 1] - row_begin; - gko::matrix::device_sparsity_lookup lookup{ - mtx->get_const_col_idxs() + row_begin, row_nnz, - host_lookup_info.get_const_data() + (row_begin * 2), - host_row_descs.get_const_data()[row]}; - - ASSERT_EQ(host_row_descs.get_const_data()[row] & 0xFFFF, - row_descs.get_const_data()[row] & 0xFFFF); - for (int nz = 0; nz < row_nnz; nz++) { - SCOPED_TRACE("nz: " + std::to_string(nz)); - const auto col = mtx->get_const_col_idxs()[nz + row_begin]; - ASSERT_EQ(nz, lookup[col]); - } - } - } -} - - } // namespace diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index 5a79765626e..677a2627897 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -2270,13 +2270,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL); -bool csr_lookup_allowed(matrix::sparsity_type allowed, - matrix::sparsity_type type) -{ - return ((static_cast(allowed) & static_cast(type)) != 0); -} - - template bool csr_lookup_try_full(IndexType row_len, IndexType col_range, matrix::sparsity_type allowed, int64& row_desc) @@ -2301,7 +2294,7 @@ bool csr_lookup_try_bitmap(IndexType row_len, IndexType col_range, gko::matrix::device_sparsity_lookup::block_size; using matrix::sparsity_type; bool is_allowed = csr_lookup_allowed(allowed, sparsity_type::bitmap); - const auto num_blocks = ceildiv(col_range, block_size); + const auto num_blocks = static_cast(ceildiv(col_range, block_size)); if (is_allowed && num_blocks * 2 <= available_storage) { row_desc = (static_cast(num_blocks) << 32) | static_cast(sparsity_type::bitmap); @@ -2339,12 +2332,12 @@ void csr_lookup_build_hash(IndexType row_len, IndexType available_storage, 1u | static_cast(available_storage * inv_golden_ratio); row_desc = (static_cast(hash_parameter) << 32) | static_cast(matrix::sparsity_type::hash); - std::fill_n(local_storage, available_storage, -1); + std::fill_n(local_storage, available_storage, invalid_index()); for (int32 nz = 0; nz < row_len; nz++) { auto hash = (static_cast(cols[nz]) * hash_parameter) % static_cast(available_storage); // linear probing: find the next empty entry - while (local_storage[hash] != -1) { + while (local_storage[hash] != invalid_index()) { hash++; if (hash >= available_storage) { hash = 0; diff --git a/dpcpp/test/matrix/csr_kernels.cpp b/dpcpp/test/matrix/csr_kernels.cpp index 3de309b336e..375f7d93a93 100644 --- a/dpcpp/test/matrix/csr_kernels.cpp +++ b/dpcpp/test/matrix/csr_kernels.cpp @@ -964,62 +964,4 @@ TEST_F(Csr, CreateSubMatrixIsEquivalentToRef) } -TEST_F(Csr, BuildLookupDataWorks) -{ - set_up_apply_data(std::make_shared()); - using gko::matrix::sparsity_type; - gko::Array row_descs(ref, mtx->get_size()[0]); - gko::Array lookup_info(ref, mtx->get_num_stored_elements() * 2); - gko::Array drow_descs(dpcpp, mtx->get_size()[0]); - gko::Array dlookup_info(dpcpp, - mtx->get_num_stored_elements() * 2); - for (auto allowed_methods : - {sparsity_type::full | sparsity_type::bitmap | sparsity_type::hash, - sparsity_type::bitmap | sparsity_type::hash, - sparsity_type::full | sparsity_type::hash, sparsity_type::hash}) { - const auto full_allowed = - static_cast(static_cast(allowed_methods) & - static_cast(sparsity_type::full)); - const auto bitmap_allowed = - static_cast(static_cast(allowed_methods) & - static_cast(sparsity_type::bitmap)); - const auto bitmap_equivalent = - bitmap_allowed ? sparsity_type::bitmap : sparsity_type::hash; - const auto full_equivalent = - full_allowed ? sparsity_type::full : bitmap_equivalent; - SCOPED_TRACE("full: " + std::to_string(full_allowed) + - " bitmap: " + std::to_string(bitmap_allowed)); - - gko::kernels::reference::csr::build_lookup( - ref, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), - mtx->get_size()[0], allowed_methods, row_descs.get_data(), - lookup_info.get_data()); - gko::kernels::dpcpp::csr::build_lookup( - dpcpp, dmtx->get_const_row_ptrs(), dmtx->get_const_col_idxs(), - dmtx->get_size()[0], allowed_methods, drow_descs.get_data(), - dlookup_info.get_data()); - - gko::Array host_row_descs(ref, drow_descs); - gko::Array host_lookup_info(ref, dlookup_info); - for (int row = 0; row < dmtx->get_size()[0]; row++) { - SCOPED_TRACE("row: " + std::to_string(row)); - const auto row_begin = mtx->get_const_row_ptrs()[row]; - const auto row_nnz = mtx->get_const_row_ptrs()[row + 1] - row_begin; - gko::matrix::device_sparsity_lookup lookup{ - mtx->get_const_col_idxs() + row_begin, row_nnz, - host_lookup_info.get_const_data() + (row_begin * 2), - host_row_descs.get_const_data()[row]}; - - ASSERT_EQ(host_row_descs.get_const_data()[row] & 0xFFFF, - row_descs.get_const_data()[row] & 0xFFFF); - for (int nz = 0; nz < row_nnz; nz++) { - SCOPED_TRACE("nz: " + std::to_string(nz)); - const auto col = mtx->get_const_col_idxs()[nz + row_begin]; - ASSERT_EQ(nz, lookup[col]); - } - } - } -} - - } // namespace diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index 9f5ed7143c9..9025d8c493b 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -50,7 +50,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include #include #include @@ -61,6 +60,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/format_conversion_kernels.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/csr_builder.hpp" +#include "core/matrix/csr_lookup.hpp" #include "core/matrix/dense_kernels.hpp" #include "core/synthesizer/implementation_selection.hpp" #include "hip/base/config.hip.hpp" diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp index df65fed3277..8fbf9a05ac9 100644 --- a/hip/matrix/fbcsr_kernels.hip.cpp +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -50,7 +50,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include @@ -59,6 +58,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/device_matrix_data_kernels.hpp" #include "core/components/fill_array_kernels.hpp" #include "core/components/format_conversion_kernels.hpp" +#include "core/matrix/csr_lookup.hpp" #include "core/matrix/dense_kernels.hpp" #include "core/synthesizer/implementation_selection.hpp" #include "hip/base/config.hip.hpp" diff --git a/hip/test/matrix/csr_kernels.hip.cpp b/hip/test/matrix/csr_kernels.hip.cpp index 670fb28fc94..bb6567f97d3 100644 --- a/hip/test/matrix/csr_kernels.hip.cpp +++ b/hip/test/matrix/csr_kernels.hip.cpp @@ -1022,62 +1022,4 @@ TEST_F(Csr, AddScaledIdentityToNonSquare) } -TEST_F(Csr, BuildLookupDataWorks) -{ - set_up_apply_data(std::make_shared()); - using gko::matrix::sparsity_type; - gko::Array row_descs(ref, mtx->get_size()[0]); - gko::Array lookup_info(ref, mtx->get_num_stored_elements() * 2); - gko::Array drow_descs(hip, mtx->get_size()[0]); - gko::Array dlookup_info(hip, - mtx->get_num_stored_elements() * 2); - for (auto allowed_methods : - {sparsity_type::full | sparsity_type::bitmap | sparsity_type::hash, - sparsity_type::bitmap | sparsity_type::hash, - sparsity_type::full | sparsity_type::hash, sparsity_type::hash}) { - const auto full_allowed = - static_cast(static_cast(allowed_methods) & - static_cast(sparsity_type::full)); - const auto bitmap_allowed = - static_cast(static_cast(allowed_methods) & - static_cast(sparsity_type::bitmap)); - const auto bitmap_equivalent = - bitmap_allowed ? sparsity_type::bitmap : sparsity_type::hash; - const auto full_equivalent = - full_allowed ? sparsity_type::full : bitmap_equivalent; - SCOPED_TRACE("full: " + std::to_string(full_allowed) + - " bitmap: " + std::to_string(bitmap_allowed)); - - gko::kernels::reference::csr::build_lookup( - ref, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), - mtx->get_size()[0], allowed_methods, row_descs.get_data(), - lookup_info.get_data()); - gko::kernels::hip::csr::build_lookup( - hip, dmtx->get_const_row_ptrs(), dmtx->get_const_col_idxs(), - dmtx->get_size()[0], allowed_methods, drow_descs.get_data(), - dlookup_info.get_data()); - - gko::Array host_row_descs(ref, drow_descs); - gko::Array host_lookup_info(ref, dlookup_info); - for (int row = 0; row < dmtx->get_size()[0]; row++) { - SCOPED_TRACE("row: " + std::to_string(row)); - const auto row_begin = mtx->get_const_row_ptrs()[row]; - const auto row_nnz = mtx->get_const_row_ptrs()[row + 1] - row_begin; - gko::matrix::device_sparsity_lookup lookup{ - mtx->get_const_col_idxs() + row_begin, row_nnz, - host_lookup_info.get_const_data() + (row_begin * 2), - host_row_descs.get_const_data()[row]}; - - ASSERT_EQ(host_row_descs.get_const_data()[row] & 0xFFFF, - row_descs.get_const_data()[row] & 0xFFFF); - for (int nz = 0; nz < row_nnz; nz++) { - SCOPED_TRACE("nz: " + std::to_string(nz)); - const auto col = mtx->get_const_col_idxs()[nz + row_begin]; - ASSERT_EQ(nz, lookup[col]); - } - } - } -} - - } // namespace diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 9794b3d4b42..453bfae6e3a 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -87,7 +87,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#include #include #include #include diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 5d3b6be211b..d7263b63a5e 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -1129,13 +1129,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL); -bool csr_lookup_allowed(matrix::sparsity_type allowed, - matrix::sparsity_type type) -{ - return ((static_cast(allowed) & static_cast(type)) != 0); -} - - template bool csr_lookup_try_full(IndexType row_len, IndexType col_range, matrix::sparsity_type allowed, int64& row_desc) @@ -1150,9 +1143,6 @@ bool csr_lookup_try_full(IndexType row_len, IndexType col_range, } -constexpr static int csr_lookup_block_size = 32; - - template bool csr_lookup_try_bitmap(IndexType row_len, IndexType col_range, IndexType min_col, IndexType available_storage, @@ -1161,7 +1151,9 @@ bool csr_lookup_try_bitmap(IndexType row_len, IndexType col_range, { using matrix::sparsity_type; bool is_allowed = csr_lookup_allowed(allowed, sparsity_type::bitmap); - const auto num_blocks = ceildiv(col_range, csr_lookup_block_size); + constexpr auto bitmap_block_size = + gko::matrix::device_sparsity_lookup::block_size; + const auto num_blocks = ceildiv(col_range, bitmap_block_size); if (is_allowed && num_blocks * 2 <= available_storage) { row_desc = (static_cast(num_blocks) << 32) | static_cast(sparsity_type::bitmap); @@ -1171,8 +1163,8 @@ bool csr_lookup_try_bitmap(IndexType row_len, IndexType col_range, std::fill_n(block_bitmaps, num_blocks, 0); for (auto col_it = cols; col_it < cols + row_len; col_it++) { const auto rel_col = *col_it - min_col; - const auto block = rel_col / csr_lookup_block_size; - const auto col_in_block = rel_col % csr_lookup_block_size; + const auto block = rel_col / bitmap_block_size; + const auto col_in_block = rel_col % bitmap_block_size; block_bitmaps[block] |= uint32{1} << col_in_block; } int32 partial_sum{}; @@ -1199,12 +1191,14 @@ void csr_lookup_build_hash(IndexType row_len, IndexType available_storage, 1u | static_cast(available_storage * inv_golden_ratio); row_desc = (static_cast(hash_parameter) << 32) | static_cast(matrix::sparsity_type::hash); - std::fill_n(local_storage, available_storage, -1); + std::fill_n(local_storage, available_storage, invalid_index()); for (int32 nz = 0; nz < row_len; nz++) { - auto hash = (static_cast(cols[nz]) * hash_parameter) % + auto hash = (static_cast::type>( + cols[nz]) * + hash_parameter) % static_cast(available_storage); // linear probing: find the next empty entry - while (local_storage[hash] != -1) { + while (local_storage[hash] != invalid_index()) { hash++; if (hash >= available_storage) { hash = 0; diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index 2a00fbeb220..521a923e55d 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -45,7 +45,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include #include #include @@ -54,6 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/csr_kernels.hpp" +#include "core/matrix/csr_lookup.hpp" #include "core/test/utils.hpp" #include "core/test/utils/unsort_matrix.hpp" #include "core/utils/matrix_utils.hpp" @@ -903,61 +903,4 @@ TEST_F(Csr, CreateSubMatrixFromindex_setIsEquivalentToRef) } -TEST_F(Csr, BuildLookupDataWorks) -{ - set_up_apply_data(); - using gko::matrix::sparsity_type; - gko::Array row_descs(ref, mtx->get_size()[0]); - gko::Array lookup_info(ref, mtx->get_num_stored_elements() * 2); - gko::Array drow_descs(omp, mtx->get_size()[0]); - gko::Array dlookup_info(omp, - mtx->get_num_stored_elements() * 2); - for (auto allowed_methods : - {sparsity_type::full | sparsity_type::bitmap | sparsity_type::hash, - sparsity_type::bitmap | sparsity_type::hash, - sparsity_type::full | sparsity_type::hash, sparsity_type::hash}) { - const auto full_allowed = - static_cast(static_cast(allowed_methods) & - static_cast(sparsity_type::full)); - const auto bitmap_allowed = - static_cast(static_cast(allowed_methods) & - static_cast(sparsity_type::bitmap)); - const auto bitmap_equivalent = - bitmap_allowed ? sparsity_type::bitmap : sparsity_type::hash; - const auto full_equivalent = - full_allowed ? sparsity_type::full : bitmap_equivalent; - SCOPED_TRACE("full: " + std::to_string(full_allowed) + - " bitmap: " + std::to_string(bitmap_allowed)); - - gko::kernels::reference::csr::build_lookup( - ref, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), - mtx->get_size()[0], allowed_methods, row_descs.get_data(), - lookup_info.get_data()); - gko::kernels::omp::csr::build_lookup( - omp, dmtx->get_const_row_ptrs(), dmtx->get_const_col_idxs(), - dmtx->get_size()[0], allowed_methods, drow_descs.get_data(), - dlookup_info.get_data()); - - for (int row = 0; row < dmtx->get_size()[0]; row++) { - SCOPED_TRACE("row: " + std::to_string(row)); - const auto row_begin = mtx->get_const_row_ptrs()[row]; - const auto row_nnz = mtx->get_const_row_ptrs()[row + 1] - row_begin; - gko::matrix::device_sparsity_lookup lookup{ - mtx->get_const_col_idxs() + row_begin, row_nnz, - dlookup_info.get_const_data() + (row_begin * 2), - drow_descs.get_const_data()[row]}; - - ASSERT_EQ( - static_cast(drow_descs.get_const_data()[row]) & 0xFFFF, - static_cast(row_descs.get_const_data()[row]) & 0xFFFF); - for (int nz = 0; nz < row_nnz; nz++) { - SCOPED_TRACE("nz: " + std::to_string(nz)); - const auto col = mtx->get_const_col_idxs()[nz + row_begin]; - ASSERT_EQ(nz, lookup[col]); - } - } - } -} - - } // namespace diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index a5fafe42cb5..1397d069b59 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -1118,13 +1118,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL); -bool csr_lookup_allowed(matrix::sparsity_type allowed, - matrix::sparsity_type type) -{ - return ((static_cast(allowed) & static_cast(type)) != 0); -} - - template bool csr_lookup_try_full(IndexType row_len, IndexType col_range, matrix::sparsity_type allowed, int64& row_desc) @@ -1149,7 +1142,7 @@ bool csr_lookup_try_bitmap(IndexType row_len, IndexType col_range, gko::matrix::device_sparsity_lookup::block_size; using matrix::sparsity_type; bool is_allowed = csr_lookup_allowed(allowed, sparsity_type::bitmap); - const auto num_blocks = ceildiv(col_range, block_size); + const auto num_blocks = static_cast(ceildiv(col_range, block_size)); if (is_allowed && num_blocks * 2 <= available_storage) { row_desc = (static_cast(num_blocks) << 32) | static_cast(sparsity_type::bitmap); @@ -1187,12 +1180,14 @@ void csr_lookup_build_hash(IndexType row_len, IndexType available_storage, 1u | static_cast(available_storage * inv_golden_ratio); row_desc = (static_cast(hash_parameter) << 32) | static_cast(matrix::sparsity_type::hash); - std::fill_n(local_storage, available_storage, -1); + std::fill_n(local_storage, available_storage, invalid_index()); for (int32 nz = 0; nz < row_len; nz++) { - auto hash = (static_cast(cols[nz]) * hash_parameter) % + auto hash = (static_cast::type>( + cols[nz]) * + hash_parameter) % static_cast(available_storage); // linear probing: find the next empty entry - while (local_storage[hash] != -1) { + while (local_storage[hash] != invalid_index()) { hash++; if (hash >= available_storage) { hash = 0; diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index 6115189db22..09a341ccfae 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -44,7 +44,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include #include #include @@ -55,6 +54,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/csr_kernels.hpp" +#include "core/matrix/csr_lookup.hpp" #include "core/test/utils.hpp" @@ -1920,8 +1920,8 @@ TYPED_TEST(Csr, GeneratesLookupData) data.nonzeros.emplace_back(3, 64, 1.0); data.ensure_row_major_order(); mtx->read(data); - gko::Array row_descs(this->exec, mtx->get_size()[0]); - gko::Array lookup_info(this->exec, + gko::array row_descs(this->exec, mtx->get_size()[0]); + gko::array lookup_info(this->exec, mtx->get_num_stored_elements() * 2); for (auto allowed_methods : {sparsity_type::full | sparsity_type::bitmap | sparsity_type::hash, diff --git a/test/matrix/csr_kernels.cpp b/test/matrix/csr_kernels.cpp index 2be151a55de..a19fa1ad8c8 100644 --- a/test/matrix/csr_kernels.cpp +++ b/test/matrix/csr_kernels.cpp @@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include +#include "core/matrix/csr_kernels.hpp" #include @@ -42,11 +42,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include "core/components/fill_array_kernels.hpp" -#include "core/matrix/csr_kernels.hpp" #include "core/test/utils.hpp" #include "test/utils/executor.hpp" @@ -132,5 +132,76 @@ TEST_F(Csr, InvScaleIsEquivalentToRef) GKO_ASSERT_MTX_NEAR(dx, x, r::value); } +TEST_F(Csr, BuildLookupDataWorks) +{ + using gko::matrix::sparsity_type; + auto data = gko::test::generate_random_matrix_data( + 628, 923, std::uniform_int_distribution<>(10, 300), + std::normal_distribution(-1.0, 1.0), rand_engine); + // create a few empty rows + data.nonzeros.erase( + std::remove_if(data.nonzeros.begin(), data.nonzeros.end(), + [](auto entry) { return entry.row % 200 == 21; }), + data.nonzeros.end()); + // insert a full row and a pretty dense row + for (int i = 0; i < 100; i++) { + data.nonzeros.emplace_back(221, i + 100, 1.0); + data.nonzeros.emplace_back(421, i * 3 + 100, 2.0); + } + data.ensure_row_major_order(); + // initialize the matrices + auto mtx = Mtx::create(ref); + mtx->read(data); + auto dmtx = gko::clone(exec, mtx); + gko::array row_descs(ref, mtx->get_size()[0]); + gko::array lookup_info(ref, mtx->get_num_stored_elements() * 2); + gko::array drow_descs(exec, mtx->get_size()[0]); + gko::array dlookup_info(exec, + mtx->get_num_stored_elements() * 2); + for (auto allowed_methods : + {sparsity_type::full | sparsity_type::bitmap | sparsity_type::hash, + sparsity_type::bitmap | sparsity_type::hash, + sparsity_type::full | sparsity_type::hash, sparsity_type::hash}) { + const auto full_allowed = + static_cast(static_cast(allowed_methods) & + static_cast(sparsity_type::full)); + const auto bitmap_allowed = + static_cast(static_cast(allowed_methods) & + static_cast(sparsity_type::bitmap)); + const auto bitmap_equivalent = + bitmap_allowed ? sparsity_type::bitmap : sparsity_type::hash; + const auto full_equivalent = + full_allowed ? sparsity_type::full : bitmap_equivalent; + SCOPED_TRACE("full: " + std::to_string(full_allowed) + + " bitmap: " + std::to_string(bitmap_allowed)); + + gko::kernels::reference::csr::build_lookup( + ref, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), + mtx->get_size()[0], allowed_methods, row_descs.get_data(), + lookup_info.get_data()); + gko::kernels::EXEC_NAMESPACE::csr::build_lookup( + exec, dmtx->get_const_row_ptrs(), dmtx->get_const_col_idxs(), + dmtx->get_size()[0], allowed_methods, drow_descs.get_data(), + dlookup_info.get_data()); + + gko::array host_row_descs(ref, drow_descs); + gko::array host_lookup_info(ref, dlookup_info); + for (int row = 0; row < dmtx->get_size()[0]; row++) { + const auto row_begin = mtx->get_const_row_ptrs()[row]; + const auto row_nnz = mtx->get_const_row_ptrs()[row + 1] - row_begin; + gko::matrix::device_sparsity_lookup lookup{ + mtx->get_const_col_idxs() + row_begin, row_nnz, + host_lookup_info.get_const_data() + (row_begin * 2), + host_row_descs.get_const_data()[row]}; + ASSERT_EQ(host_row_descs.get_const_data()[row] & 0xFFFF, + row_descs.get_const_data()[row] & 0xFFFF); + for (int nz = 0; nz < row_nnz; nz++) { + const auto col = mtx->get_const_col_idxs()[nz + row_begin]; + ASSERT_EQ(nz, lookup[col]); + } + } + } +} + } // namespace