review updates

* move everything to the private interface * remove duplicate tests * test edge cases explicitly Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com> Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
ginkgo-project · May 6, 2022 · e686e9e · e686e9e
1 parent d6cd5b1
commit e686e9e
Show file tree

Hide file tree

Showing 17 changed files with 153 additions and 327 deletions.
diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc
@@ -736,15 +736,15 @@ __global__ __launch_bounds__(default_block_size) void inv_symm_permute(
 
 template <typename ValueType, typename IndexType>
 __global__
-__launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals(
-    const size_type num_rows, const size_type num_cols,
-    const size_type row_offset, const size_type col_offset,
-    const IndexType* __restrict__ source_row_ptrs,
-    const IndexType* __restrict__ source_col_idxs,
-    const ValueType* __restrict__ source_values,
-    const IndexType* __restrict__ result_row_ptrs,
-    IndexType* __restrict__ result_col_idxs,
-    ValueType* __restrict__ result_values)
+    __launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals(
+        const size_type num_rows, const size_type num_cols,
+        const size_type row_offset, const size_type col_offset,
+        const IndexType* __restrict__ source_row_ptrs,
+        const IndexType* __restrict__ source_col_idxs,
+        const ValueType* __restrict__ source_values,
+        const IndexType* __restrict__ result_row_ptrs,
+        IndexType* __restrict__ result_col_idxs,
+        ValueType* __restrict__ result_values)
 {
     const auto res_row = thread::get_thread_id_flat();
     if (res_row < num_rows) {
@@ -766,10 +766,11 @@ __launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals(
 
 template <typename IndexType>
 __global__
-__launch_bounds__(default_block_size) void calculate_nnz_per_row_in_span(
-    const span row_span, const span col_span,
-    const IndexType* __restrict__ row_ptrs,
-    const IndexType* __restrict__ col_idxs, IndexType* __restrict__ nnz_per_row)
+    __launch_bounds__(default_block_size) void calculate_nnz_per_row_in_span(
+        const span row_span, const span col_span,
+        const IndexType* __restrict__ row_ptrs,
+        const IndexType* __restrict__ col_idxs,
+        IndexType* __restrict__ nnz_per_row)
 {
     const auto src_row = thread::get_thread_id_flat() + row_span.begin;
     if (src_row < row_span.end) {
@@ -925,7 +926,8 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
     gko::matrix::sparsity_type allowed, int64* __restrict__ row_desc,
     int32* __restrict__ storage)
 {
-    constexpr int block_size = 32;
+    constexpr int bitmap_block_size =
+        gko::matrix::device_sparsity_lookup<IndexType>::block_size;
     auto row = thread::get_subwarp_id_flat<subwarp_size>();
     if (row >= num_rows) {
         return;
@@ -953,7 +955,8 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
         return;
     }
     // dense bitmap storage
-    const auto num_blocks = static_cast<int>(ceildiv(col_range, block_size));
+    const auto num_blocks =
+        static_cast<int32>(ceildiv(col_range, bitmap_block_size));
     if (num_blocks * 2 <= available_storage &&
         ((static_cast<int>(allowed) &
           static_cast<int>(matrix::sparsity_type::bitmap)) != 0)) {
@@ -971,10 +974,11 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
         // fill bitmaps with sparsity pattern
         for (IndexType base_i = 0; base_i < row_len; base_i += subwarp_size) {
             const auto i = base_i + lane;
-            const auto col = i < row_len ? local_cols[i] : INT_MAX;
+            const auto col =
+                i < row_len ? local_cols[i] : device_numeric_limits<int32>::max;
             const auto rel_col = static_cast<int32>(col - min_col);
-            const auto block = rel_col / block_size;
-            const auto col_in_block = rel_col % block_size;
+            const auto block = rel_col / bitmap_block_size;
+            const auto col_in_block = rel_col % bitmap_block_size;
             auto local_bitmap = uint32{i < row_len ? 1u : 0u} << col_in_block;
             bool is_first =
                 segment_scan(subwarp, block, local_bitmap,
@@ -1014,7 +1018,7 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
                         static_cast<int>(matrix::sparsity_type::hash);
     }
     // fill hashmap with sentinel
-    constexpr int32 empty = -1;
+    constexpr int32 empty = invalid_index<int32>();
     for (int32 i = lane; i < available_storage; i += subwarp_size) {
         local_storage[i] = empty;
     }
@@ -1048,7 +1052,9 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
                     hash -= available_storage;
                 }
                 // this can only fail for row_length < 16
-                assert(hash < available_storage);
+                // because then available_storage < 32 and
+                // colliding can have more than available_storage bits set.
+                GKO_ASSERT(hash < available_storage);
                 entry = local_storage[hash];
             }
             colliding = subwarp.match_any(hash);

diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp
@@ -41,7 +41,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/index_set.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/coo.hpp>
-#include <ginkgo/core/matrix/csr_lookup.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 #include <ginkgo/core/matrix/diagonal.hpp>
 #include <ginkgo/core/matrix/ell.hpp>
@@ -51,6 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "core/base/kernel_declaration.hpp"
+#include "core/matrix/csr_lookup.hpp"
 
 
 namespace gko {

diff --git a/include/ginkgo/core/matrix/csr_lookup.hpp → core/matrix/csr_lookup.hpp b/include/ginkgo/core/matrix/csr_lookup.hpp → core/matrix/csr_lookup.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_PUBLIC_CORE_MATRIX_CSR_LOOKUP_HPP_
-#define GKO_PUBLIC_CORE_MATRIX_CSR_LOOKUP_HPP_
+#ifndef GKO_CORE_MATRIX_CSR_LOOKUP_HPP_
+#define GKO_CORE_MATRIX_CSR_LOOKUP_HPP_
 
 
 #include <type_traits>
@@ -59,10 +59,10 @@ enum class sparsity_type : int {
     full = 1,
     /**
      * The row is sufficiently dense that its sparsity pattern can be stored in
-     * a dense bitmap consisting of `storage_size` blocks of size `block_size`,
-     * with a total of `storage_size` blocks. Each block stores its sparsity
-     * pattern and the number of columns before. This means that the relative
-     * output index can be computed as
+     * a dense bitmap consisting of `storage_size` blocks of size `block_size`.
+     * Each block stores its sparsity pattern as a bitmask and the number of
+     * columns before the block as integer. This means that the relative output
+     * index can be computed as
      * ```
      * auto block = (col - min_col) / block_size;
      * auto local_col = (col - min_col) % block_size;
@@ -76,7 +76,7 @@ enum class sparsity_type : int {
      * The hashtable has size `storage_size` and stores the relative output
      * index directly, i.e.
      * ```
-     * auto hash_key = col - min_col;
+     * auto hash_key = col;
      * auto hash_bucket = hash(hash_key);
      * while (col_idxs[hashtable[hash_bucket]] != col) {
      *     hash_bucket = (hash_bucket + 1) % storage_size; // linear probing
@@ -88,20 +88,19 @@ enum class sparsity_type : int {
 };
 
 
-inline sparsity_type operator|(sparsity_type a, sparsity_type b)
+GKO_ATTRIBUTES GKO_INLINE sparsity_type operator|(sparsity_type a,
+                                                  sparsity_type b)
 {
     return static_cast<sparsity_type>(static_cast<int>(a) |
                                       static_cast<int>(b));
 }
 
 
-template <typename IndexType>
-class csr_sparsity_lookup {
-    const IndexType* row_ptrs;
-    const IndexType* col_idxs;
-    Array<int64> row_desc;
-    Array<int32> storage;
-};
+GKO_ATTRIBUTES GKO_INLINE bool csr_lookup_allowed(matrix::sparsity_type allowed,
+                                                  matrix::sparsity_type type)
+{
+    return ((static_cast<int>(allowed) & static_cast<int>(type)) != 0);
+}
 
 
 template <typename IndexType>
@@ -124,15 +123,15 @@ struct device_sparsity_lookup {
         case sparsity_type::hash:
             return lookup_hash(col);
         }
-        assert(false);
+        GKO_ASSERT(false);
     }
 
     GKO_ATTRIBUTES GKO_INLINE IndexType lookup_full(IndexType col) const
     {
         const auto min_col = col_idxs[0];
         const auto out_idx = col - min_col;
-        assert(out_idx < row_nnz);
-        assert(col_idxs[out_idx] == col);
+        GKO_ASSERT(out_idx < row_nnz);
+        GKO_ASSERT(col_idxs[out_idx] == col);
         return out_idx;
     }
 
@@ -147,12 +146,12 @@ struct device_sparsity_lookup {
         const auto block = rel_col / block_size;
         const auto col_in_block = rel_col % block_size;
         const auto prefix_mask = (uint32{1} << col_in_block) - 1;
-        assert(block < num_blocks);
-        assert(block_bitmaps[block] & (uint32{1} << col_in_block));
+        GKO_ASSERT(block < num_blocks);
+        GKO_ASSERT(block_bitmaps[block] & (uint32{1} << col_in_block));
         const auto out_idx =
             block_bases[block] +
             gko::detail::popcount(block_bitmaps[block] & prefix_mask);
-        assert(col_idxs[out_idx] == col);
+        GKO_ASSERT(col_idxs[out_idx] == col);
         return out_idx;
     }
 
@@ -162,19 +161,19 @@ struct device_sparsity_lookup {
         const auto hash_param = static_cast<uint32>(desc >> 32);
         const auto hashmap = storage;
         auto hash = (static_cast<uint32>(col) * hash_param) % hashmap_size;
-        assert(hashmap[hash] >= 0);
-        assert(hashmap[hash] < row_nnz);
+        GKO_ASSERT(hashmap[hash] >= 0);
+        GKO_ASSERT(hashmap[hash] < row_nnz);
         // linear probing with sentinel to avoid infinite loop
         while (col_idxs[hashmap[hash]] != col) {
             hash++;
             if (hash >= hashmap_size) {
                 hash = 0;
             }
-            assert(hashmap[hash] >= 0);
-            assert(hashmap[hash] < row_nnz);
+            GKO_ASSERT(hashmap[hash] >= 0);
+            GKO_ASSERT(hashmap[hash] < row_nnz);
         }
         const auto out_idx = hashmap[hash];
-        assert(col_idxs[out_idx] == col);
+        GKO_ASSERT(col_idxs[out_idx] == col);
         return out_idx;
     }
 };
@@ -183,4 +182,4 @@ struct device_sparsity_lookup {
 }  // namespace matrix
 }  // namespace gko
 
-#endif  // GKO_PUBLIC_CORE_MATRIX_CSR_LOOKUP_HPP_
+#endif  // GKO_CORE_MATRIX_CSR_LOOKUP_HPP_
diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu
@@ -49,7 +49,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/matrix/coo.hpp>
-#include <ginkgo/core/matrix/csr_lookup.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 #include <ginkgo/core/matrix/ell.hpp>
 #include <ginkgo/core/matrix/hybrid.hpp>
@@ -60,6 +59,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/components/format_conversion_kernels.hpp"
 #include "core/components/prefix_sum_kernels.hpp"
 #include "core/matrix/csr_builder.hpp"
+#include "core/matrix/csr_lookup.hpp"
 #include "core/matrix/dense_kernels.hpp"
 #include "core/synthesizer/implementation_selection.hpp"
 #include "cuda/base/config.hpp"

diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu
@@ -49,7 +49,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
-#include <ginkgo/core/matrix/csr_lookup.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 
 
@@ -58,6 +57,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/base/device_matrix_data_kernels.hpp"
 #include "core/components/fill_array_kernels.hpp"
 #include "core/components/format_conversion_kernels.hpp"
+#include "core/matrix/csr_lookup.hpp"
 #include "core/matrix/dense_kernels.hpp"
 #include "core/synthesizer/implementation_selection.hpp"
 #include "cuda/base/config.hpp"

diff --git a/cuda/test/matrix/csr_kernels.cpp b/cuda/test/matrix/csr_kernels.cpp
@@ -1012,62 +1012,4 @@ TEST_F(Csr, AddScaledIdentityToNonSquare)
 }
 
 
-TEST_F(Csr, BuildLookupDataWorks)
-{
-    set_up_apply_data(std::make_shared<Mtx::sparselib>());
-    using gko::matrix::sparsity_type;
-    gko::Array<gko::int64> row_descs(ref, mtx->get_size()[0]);
-    gko::Array<gko::int32> lookup_info(ref, mtx->get_num_stored_elements() * 2);
-    gko::Array<gko::int64> drow_descs(cuda, mtx->get_size()[0]);
-    gko::Array<gko::int32> dlookup_info(cuda,
-                                        mtx->get_num_stored_elements() * 2);
-    for (auto allowed_methods :
-         {sparsity_type::full | sparsity_type::bitmap | sparsity_type::hash,
-          sparsity_type::bitmap | sparsity_type::hash,
-          sparsity_type::full | sparsity_type::hash, sparsity_type::hash}) {
-        const auto full_allowed =
-            static_cast<bool>(static_cast<int>(allowed_methods) &
-                              static_cast<int>(sparsity_type::full));
-        const auto bitmap_allowed =
-            static_cast<bool>(static_cast<int>(allowed_methods) &
-                              static_cast<int>(sparsity_type::bitmap));
-        const auto bitmap_equivalent =
-            bitmap_allowed ? sparsity_type::bitmap : sparsity_type::hash;
-        const auto full_equivalent =
-            full_allowed ? sparsity_type::full : bitmap_equivalent;
-        SCOPED_TRACE("full: " + std::to_string(full_allowed) +
-                     " bitmap: " + std::to_string(bitmap_allowed));
-
-        gko::kernels::reference::csr::build_lookup(
-            ref, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
-            mtx->get_size()[0], allowed_methods, row_descs.get_data(),
-            lookup_info.get_data());
-        gko::kernels::cuda::csr::build_lookup(
-            cuda, dmtx->get_const_row_ptrs(), dmtx->get_const_col_idxs(),
-            dmtx->get_size()[0], allowed_methods, drow_descs.get_data(),
-            dlookup_info.get_data());
-
-        gko::Array<gko::int64> host_row_descs(ref, drow_descs);
-        gko::Array<gko::int32> host_lookup_info(ref, dlookup_info);
-        for (int row = 0; row < dmtx->get_size()[0]; row++) {
-            SCOPED_TRACE("row: " + std::to_string(row));
-            const auto row_begin = mtx->get_const_row_ptrs()[row];
-            const auto row_nnz = mtx->get_const_row_ptrs()[row + 1] - row_begin;
-            gko::matrix::device_sparsity_lookup<int> lookup{
-                mtx->get_const_col_idxs() + row_begin, row_nnz,
-                host_lookup_info.get_const_data() + (row_begin * 2),
-                host_row_descs.get_const_data()[row]};
-
-            ASSERT_EQ(host_row_descs.get_const_data()[row] & 0xFFFF,
-                      row_descs.get_const_data()[row] & 0xFFFF);
-            for (int nz = 0; nz < row_nnz; nz++) {
-                SCOPED_TRACE("nz: " + std::to_string(nz));
-                const auto col = mtx->get_const_col_idxs()[nz + row_begin];
-                ASSERT_EQ(nz, lookup[col]);
-            }
-        }
-    }
-}
-
-
 }  // namespace
diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp
@@ -2270,13 +2270,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL);
 
 
-bool csr_lookup_allowed(matrix::sparsity_type allowed,
-                        matrix::sparsity_type type)
-{
-    return ((static_cast<int>(allowed) & static_cast<int>(type)) != 0);
-}
-
-
 template <typename IndexType>
 bool csr_lookup_try_full(IndexType row_len, IndexType col_range,
                          matrix::sparsity_type allowed, int64& row_desc)
@@ -2301,7 +2294,7 @@ bool csr_lookup_try_bitmap(IndexType row_len, IndexType col_range,
         gko::matrix::device_sparsity_lookup<IndexType>::block_size;
     using matrix::sparsity_type;
     bool is_allowed = csr_lookup_allowed(allowed, sparsity_type::bitmap);
-    const auto num_blocks = ceildiv(col_range, block_size);
+    const auto num_blocks = static_cast<int32>(ceildiv(col_range, block_size));
     if (is_allowed && num_blocks * 2 <= available_storage) {
         row_desc = (static_cast<int64>(num_blocks) << 32) |
                    static_cast<int>(sparsity_type::bitmap);
@@ -2339,12 +2332,12 @@ void csr_lookup_build_hash(IndexType row_len, IndexType available_storage,
         1u | static_cast<uint32>(available_storage * inv_golden_ratio);
     row_desc = (static_cast<int64>(hash_parameter) << 32) |
                static_cast<int>(matrix::sparsity_type::hash);
-    std::fill_n(local_storage, available_storage, -1);
+    std::fill_n(local_storage, available_storage, invalid_index<int32>());
     for (int32 nz = 0; nz < row_len; nz++) {
         auto hash = (static_cast<uint32>(cols[nz]) * hash_parameter) %
                     static_cast<uint32>(available_storage);
         // linear probing: find the next empty entry
-        while (local_storage[hash] != -1) {
+        while (local_storage[hash] != invalid_index<int32>()) {
             hash++;
             if (hash >= available_storage) {
                 hash = 0;