Skip to content

Commit

Permalink
review updates
Browse files Browse the repository at this point in the history
* move everything to the private interface
* remove duplicate tests
* test edge cases explicitly

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
  • Loading branch information
3 people committed May 6, 2022
1 parent d6cd5b1 commit e686e9e
Show file tree
Hide file tree
Showing 17 changed files with 153 additions and 327 deletions.
46 changes: 26 additions & 20 deletions common/cuda_hip/matrix/csr_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -736,15 +736,15 @@ __global__ __launch_bounds__(default_block_size) void inv_symm_permute(

template <typename ValueType, typename IndexType>
__global__
__launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals(
const size_type num_rows, const size_type num_cols,
const size_type row_offset, const size_type col_offset,
const IndexType* __restrict__ source_row_ptrs,
const IndexType* __restrict__ source_col_idxs,
const ValueType* __restrict__ source_values,
const IndexType* __restrict__ result_row_ptrs,
IndexType* __restrict__ result_col_idxs,
ValueType* __restrict__ result_values)
__launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals(
const size_type num_rows, const size_type num_cols,
const size_type row_offset, const size_type col_offset,
const IndexType* __restrict__ source_row_ptrs,
const IndexType* __restrict__ source_col_idxs,
const ValueType* __restrict__ source_values,
const IndexType* __restrict__ result_row_ptrs,
IndexType* __restrict__ result_col_idxs,
ValueType* __restrict__ result_values)
{
const auto res_row = thread::get_thread_id_flat();
if (res_row < num_rows) {
Expand All @@ -766,10 +766,11 @@ __launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals(

template <typename IndexType>
__global__
__launch_bounds__(default_block_size) void calculate_nnz_per_row_in_span(
const span row_span, const span col_span,
const IndexType* __restrict__ row_ptrs,
const IndexType* __restrict__ col_idxs, IndexType* __restrict__ nnz_per_row)
__launch_bounds__(default_block_size) void calculate_nnz_per_row_in_span(
const span row_span, const span col_span,
const IndexType* __restrict__ row_ptrs,
const IndexType* __restrict__ col_idxs,
IndexType* __restrict__ nnz_per_row)
{
const auto src_row = thread::get_thread_id_flat() + row_span.begin;
if (src_row < row_span.end) {
Expand Down Expand Up @@ -925,7 +926,8 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
gko::matrix::sparsity_type allowed, int64* __restrict__ row_desc,
int32* __restrict__ storage)
{
constexpr int block_size = 32;
constexpr int bitmap_block_size =
gko::matrix::device_sparsity_lookup<IndexType>::block_size;
auto row = thread::get_subwarp_id_flat<subwarp_size>();
if (row >= num_rows) {
return;
Expand Down Expand Up @@ -953,7 +955,8 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
return;
}
// dense bitmap storage
const auto num_blocks = static_cast<int>(ceildiv(col_range, block_size));
const auto num_blocks =
static_cast<int32>(ceildiv(col_range, bitmap_block_size));
if (num_blocks * 2 <= available_storage &&
((static_cast<int>(allowed) &
static_cast<int>(matrix::sparsity_type::bitmap)) != 0)) {
Expand All @@ -971,10 +974,11 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
// fill bitmaps with sparsity pattern
for (IndexType base_i = 0; base_i < row_len; base_i += subwarp_size) {
const auto i = base_i + lane;
const auto col = i < row_len ? local_cols[i] : INT_MAX;
const auto col =
i < row_len ? local_cols[i] : device_numeric_limits<int32>::max;
const auto rel_col = static_cast<int32>(col - min_col);
const auto block = rel_col / block_size;
const auto col_in_block = rel_col % block_size;
const auto block = rel_col / bitmap_block_size;
const auto col_in_block = rel_col % bitmap_block_size;
auto local_bitmap = uint32{i < row_len ? 1u : 0u} << col_in_block;
bool is_first =
segment_scan(subwarp, block, local_bitmap,
Expand Down Expand Up @@ -1014,7 +1018,7 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
static_cast<int>(matrix::sparsity_type::hash);
}
// fill hashmap with sentinel
constexpr int32 empty = -1;
constexpr int32 empty = invalid_index<int32>();
for (int32 i = lane; i < available_storage; i += subwarp_size) {
local_storage[i] = empty;
}
Expand Down Expand Up @@ -1048,7 +1052,9 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
hash -= available_storage;
}
// this can only fail for row_length < 16
assert(hash < available_storage);
// because then available_storage < 32 and
// colliding can have more than available_storage bits set.
GKO_ASSERT(hash < available_storage);
entry = local_storage[hash];
}
colliding = subwarp.match_any(hash);
Expand Down
2 changes: 1 addition & 1 deletion core/matrix/csr_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/base/index_set.hpp>
#include <ginkgo/core/base/types.hpp>
#include <ginkgo/core/matrix/coo.hpp>
#include <ginkgo/core/matrix/csr_lookup.hpp>
#include <ginkgo/core/matrix/dense.hpp>
#include <ginkgo/core/matrix/diagonal.hpp>
#include <ginkgo/core/matrix/ell.hpp>
Expand All @@ -51,6 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "core/base/kernel_declaration.hpp"
#include "core/matrix/csr_lookup.hpp"


namespace gko {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************<GINKGO LICENSE>*******************************/

#ifndef GKO_PUBLIC_CORE_MATRIX_CSR_LOOKUP_HPP_
#define GKO_PUBLIC_CORE_MATRIX_CSR_LOOKUP_HPP_
#ifndef GKO_CORE_MATRIX_CSR_LOOKUP_HPP_
#define GKO_CORE_MATRIX_CSR_LOOKUP_HPP_


#include <type_traits>
Expand Down Expand Up @@ -59,10 +59,10 @@ enum class sparsity_type : int {
full = 1,
/**
* The row is sufficiently dense that its sparsity pattern can be stored in
* a dense bitmap consisting of `storage_size` blocks of size `block_size`,
* with a total of `storage_size` blocks. Each block stores its sparsity
* pattern and the number of columns before. This means that the relative
* output index can be computed as
* a dense bitmap consisting of `storage_size` blocks of size `block_size`.
* Each block stores its sparsity pattern as a bitmask and the number of
* columns before the block as integer. This means that the relative output
* index can be computed as
* ```
* auto block = (col - min_col) / block_size;
* auto local_col = (col - min_col) % block_size;
Expand All @@ -76,7 +76,7 @@ enum class sparsity_type : int {
* The hashtable has size `storage_size` and stores the relative output
* index directly, i.e.
* ```
* auto hash_key = col - min_col;
* auto hash_key = col;
* auto hash_bucket = hash(hash_key);
* while (col_idxs[hashtable[hash_bucket]] != col) {
* hash_bucket = (hash_bucket + 1) % storage_size; // linear probing
Expand All @@ -88,20 +88,19 @@ enum class sparsity_type : int {
};


inline sparsity_type operator|(sparsity_type a, sparsity_type b)
GKO_ATTRIBUTES GKO_INLINE sparsity_type operator|(sparsity_type a,
sparsity_type b)
{
return static_cast<sparsity_type>(static_cast<int>(a) |
static_cast<int>(b));
}


template <typename IndexType>
class csr_sparsity_lookup {
const IndexType* row_ptrs;
const IndexType* col_idxs;
Array<int64> row_desc;
Array<int32> storage;
};
GKO_ATTRIBUTES GKO_INLINE bool csr_lookup_allowed(matrix::sparsity_type allowed,
matrix::sparsity_type type)
{
return ((static_cast<int>(allowed) & static_cast<int>(type)) != 0);
}


template <typename IndexType>
Expand All @@ -124,15 +123,15 @@ struct device_sparsity_lookup {
case sparsity_type::hash:
return lookup_hash(col);
}
assert(false);
GKO_ASSERT(false);
}

GKO_ATTRIBUTES GKO_INLINE IndexType lookup_full(IndexType col) const
{
const auto min_col = col_idxs[0];
const auto out_idx = col - min_col;
assert(out_idx < row_nnz);
assert(col_idxs[out_idx] == col);
GKO_ASSERT(out_idx < row_nnz);
GKO_ASSERT(col_idxs[out_idx] == col);
return out_idx;
}

Expand All @@ -147,12 +146,12 @@ struct device_sparsity_lookup {
const auto block = rel_col / block_size;
const auto col_in_block = rel_col % block_size;
const auto prefix_mask = (uint32{1} << col_in_block) - 1;
assert(block < num_blocks);
assert(block_bitmaps[block] & (uint32{1} << col_in_block));
GKO_ASSERT(block < num_blocks);
GKO_ASSERT(block_bitmaps[block] & (uint32{1} << col_in_block));
const auto out_idx =
block_bases[block] +
gko::detail::popcount(block_bitmaps[block] & prefix_mask);
assert(col_idxs[out_idx] == col);
GKO_ASSERT(col_idxs[out_idx] == col);
return out_idx;
}

Expand All @@ -162,19 +161,19 @@ struct device_sparsity_lookup {
const auto hash_param = static_cast<uint32>(desc >> 32);
const auto hashmap = storage;
auto hash = (static_cast<uint32>(col) * hash_param) % hashmap_size;
assert(hashmap[hash] >= 0);
assert(hashmap[hash] < row_nnz);
GKO_ASSERT(hashmap[hash] >= 0);
GKO_ASSERT(hashmap[hash] < row_nnz);
// linear probing with sentinel to avoid infinite loop
while (col_idxs[hashmap[hash]] != col) {
hash++;
if (hash >= hashmap_size) {
hash = 0;
}
assert(hashmap[hash] >= 0);
assert(hashmap[hash] < row_nnz);
GKO_ASSERT(hashmap[hash] >= 0);
GKO_ASSERT(hashmap[hash] < row_nnz);
}
const auto out_idx = hashmap[hash];
assert(col_idxs[out_idx] == col);
GKO_ASSERT(col_idxs[out_idx] == col);
return out_idx;
}
};
Expand All @@ -183,4 +182,4 @@ struct device_sparsity_lookup {
} // namespace matrix
} // namespace gko

#endif // GKO_PUBLIC_CORE_MATRIX_CSR_LOOKUP_HPP_
#endif // GKO_CORE_MATRIX_CSR_LOOKUP_HPP_
2 changes: 1 addition & 1 deletion cuda/matrix/csr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/base/exception_helpers.hpp>
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/matrix/coo.hpp>
#include <ginkgo/core/matrix/csr_lookup.hpp>
#include <ginkgo/core/matrix/dense.hpp>
#include <ginkgo/core/matrix/ell.hpp>
#include <ginkgo/core/matrix/hybrid.hpp>
Expand All @@ -60,6 +59,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "core/components/format_conversion_kernels.hpp"
#include "core/components/prefix_sum_kernels.hpp"
#include "core/matrix/csr_builder.hpp"
#include "core/matrix/csr_lookup.hpp"
#include "core/matrix/dense_kernels.hpp"
#include "core/synthesizer/implementation_selection.hpp"
#include "cuda/base/config.hpp"
Expand Down
2 changes: 1 addition & 1 deletion cuda/matrix/fbcsr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/base/exception_helpers.hpp>
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/matrix/csr.hpp>
#include <ginkgo/core/matrix/csr_lookup.hpp>
#include <ginkgo/core/matrix/dense.hpp>


Expand All @@ -58,6 +57,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "core/base/device_matrix_data_kernels.hpp"
#include "core/components/fill_array_kernels.hpp"
#include "core/components/format_conversion_kernels.hpp"
#include "core/matrix/csr_lookup.hpp"
#include "core/matrix/dense_kernels.hpp"
#include "core/synthesizer/implementation_selection.hpp"
#include "cuda/base/config.hpp"
Expand Down
58 changes: 0 additions & 58 deletions cuda/test/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1012,62 +1012,4 @@ TEST_F(Csr, AddScaledIdentityToNonSquare)
}


TEST_F(Csr, BuildLookupDataWorks)
{
set_up_apply_data(std::make_shared<Mtx::sparselib>());
using gko::matrix::sparsity_type;
gko::Array<gko::int64> row_descs(ref, mtx->get_size()[0]);
gko::Array<gko::int32> lookup_info(ref, mtx->get_num_stored_elements() * 2);
gko::Array<gko::int64> drow_descs(cuda, mtx->get_size()[0]);
gko::Array<gko::int32> dlookup_info(cuda,
mtx->get_num_stored_elements() * 2);
for (auto allowed_methods :
{sparsity_type::full | sparsity_type::bitmap | sparsity_type::hash,
sparsity_type::bitmap | sparsity_type::hash,
sparsity_type::full | sparsity_type::hash, sparsity_type::hash}) {
const auto full_allowed =
static_cast<bool>(static_cast<int>(allowed_methods) &
static_cast<int>(sparsity_type::full));
const auto bitmap_allowed =
static_cast<bool>(static_cast<int>(allowed_methods) &
static_cast<int>(sparsity_type::bitmap));
const auto bitmap_equivalent =
bitmap_allowed ? sparsity_type::bitmap : sparsity_type::hash;
const auto full_equivalent =
full_allowed ? sparsity_type::full : bitmap_equivalent;
SCOPED_TRACE("full: " + std::to_string(full_allowed) +
" bitmap: " + std::to_string(bitmap_allowed));

gko::kernels::reference::csr::build_lookup(
ref, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
mtx->get_size()[0], allowed_methods, row_descs.get_data(),
lookup_info.get_data());
gko::kernels::cuda::csr::build_lookup(
cuda, dmtx->get_const_row_ptrs(), dmtx->get_const_col_idxs(),
dmtx->get_size()[0], allowed_methods, drow_descs.get_data(),
dlookup_info.get_data());

gko::Array<gko::int64> host_row_descs(ref, drow_descs);
gko::Array<gko::int32> host_lookup_info(ref, dlookup_info);
for (int row = 0; row < dmtx->get_size()[0]; row++) {
SCOPED_TRACE("row: " + std::to_string(row));
const auto row_begin = mtx->get_const_row_ptrs()[row];
const auto row_nnz = mtx->get_const_row_ptrs()[row + 1] - row_begin;
gko::matrix::device_sparsity_lookup<int> lookup{
mtx->get_const_col_idxs() + row_begin, row_nnz,
host_lookup_info.get_const_data() + (row_begin * 2),
host_row_descs.get_const_data()[row]};

ASSERT_EQ(host_row_descs.get_const_data()[row] & 0xFFFF,
row_descs.get_const_data()[row] & 0xFFFF);
for (int nz = 0; nz < row_nnz; nz++) {
SCOPED_TRACE("nz: " + std::to_string(nz));
const auto col = mtx->get_const_col_idxs()[nz + row_begin];
ASSERT_EQ(nz, lookup[col]);
}
}
}
}


} // namespace
13 changes: 3 additions & 10 deletions dpcpp/matrix/csr_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2270,13 +2270,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL);


bool csr_lookup_allowed(matrix::sparsity_type allowed,
matrix::sparsity_type type)
{
return ((static_cast<int>(allowed) & static_cast<int>(type)) != 0);
}


template <typename IndexType>
bool csr_lookup_try_full(IndexType row_len, IndexType col_range,
matrix::sparsity_type allowed, int64& row_desc)
Expand All @@ -2301,7 +2294,7 @@ bool csr_lookup_try_bitmap(IndexType row_len, IndexType col_range,
gko::matrix::device_sparsity_lookup<IndexType>::block_size;
using matrix::sparsity_type;
bool is_allowed = csr_lookup_allowed(allowed, sparsity_type::bitmap);
const auto num_blocks = ceildiv(col_range, block_size);
const auto num_blocks = static_cast<int32>(ceildiv(col_range, block_size));
if (is_allowed && num_blocks * 2 <= available_storage) {
row_desc = (static_cast<int64>(num_blocks) << 32) |
static_cast<int>(sparsity_type::bitmap);
Expand Down Expand Up @@ -2339,12 +2332,12 @@ void csr_lookup_build_hash(IndexType row_len, IndexType available_storage,
1u | static_cast<uint32>(available_storage * inv_golden_ratio);
row_desc = (static_cast<int64>(hash_parameter) << 32) |
static_cast<int>(matrix::sparsity_type::hash);
std::fill_n(local_storage, available_storage, -1);
std::fill_n(local_storage, available_storage, invalid_index<int32>());
for (int32 nz = 0; nz < row_len; nz++) {
auto hash = (static_cast<uint32>(cols[nz]) * hash_parameter) %
static_cast<uint32>(available_storage);
// linear probing: find the next empty entry
while (local_storage[hash] != -1) {
while (local_storage[hash] != invalid_index<int32>()) {
hash++;
if (hash >= available_storage) {
hash = 0;
Expand Down
Loading

0 comments on commit e686e9e

Please sign in to comment.