Skip to content

Commit

Permalink
review updates
Browse files Browse the repository at this point in the history
* allow lookups of non-existent column indices
* store separate offsets for lookup data to reduce memory footprint
* simplify tests
* fix DPC++ execution on float-only devices
* clarify comments on cheap modulo replacement

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
  • Loading branch information
3 people committed May 13, 2022
1 parent 5c88236 commit b8c6984
Show file tree
Hide file tree
Showing 9 changed files with 554 additions and 160 deletions.
34 changes: 19 additions & 15 deletions common/cuda_hip/matrix/csr_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -923,8 +923,8 @@ template <int subwarp_size, typename IndexType>
__global__ __launch_bounds__(default_block_size) void build_csr_lookup(
const IndexType* __restrict__ row_ptrs,
const IndexType* __restrict__ col_idxs, size_type num_rows,
gko::matrix::sparsity_type allowed, int64* __restrict__ row_desc,
int32* __restrict__ storage)
gko::matrix::sparsity_type allowed, const IndexType* storage_offsets,
int64* __restrict__ row_desc, int32* __restrict__ storage)
{
constexpr int bitmap_block_size =
gko::matrix::device_sparsity_lookup<IndexType>::block_size;
Expand All @@ -936,8 +936,8 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
group::tiled_partition<subwarp_size>(group::this_thread_block());
const auto row_begin = row_ptrs[row];
const auto row_len = row_ptrs[row + 1] - row_begin;
const auto storage_begin = 2 * row_begin;
const auto available_storage = 2 * row_len;
const auto storage_begin = storage_offsets[row];
const auto available_storage = storage_offsets[row + 1] - storage_begin;
const auto local_storage = storage + storage_begin;
const auto local_cols = col_idxs + row_begin;
const auto lane = subwarp.thread_rank();
Expand All @@ -947,8 +947,7 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(

// full column range
if (col_range == row_len &&
((static_cast<int>(allowed) &
static_cast<int>(matrix::sparsity_type::full)) != 0)) {
csr_lookup_allowed(allowed, matrix::sparsity_type::full)) {
if (lane == 0) {
row_desc[row] = static_cast<int>(matrix::sparsity_type::full);
}
Expand All @@ -958,8 +957,7 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
const auto num_blocks =
static_cast<int32>(ceildiv(col_range, bitmap_block_size));
if (num_blocks * 2 <= available_storage &&
((static_cast<int>(allowed) &
static_cast<int>(matrix::sparsity_type::bitmap)) != 0)) {
csr_lookup_allowed(allowed, matrix::sparsity_type::bitmap)) {
if (lane == 0) {
row_desc[row] = (static_cast<int64>(num_blocks) << 32) |
static_cast<int>(matrix::sparsity_type::bitmap);
Expand All @@ -974,8 +972,9 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
// fill bitmaps with sparsity pattern
for (IndexType base_i = 0; base_i < row_len; base_i += subwarp_size) {
const auto i = base_i + lane;
const auto col =
i < row_len ? local_cols[i] : device_numeric_limits<int32>::max;
const auto col = i < row_len
? local_cols[i]
: device_numeric_limits<IndexType>::max;
const auto rel_col = static_cast<int32>(col - min_col);
const auto block = rel_col / bitmap_block_size;
const auto col_in_block = rel_col % bitmap_block_size;
Expand Down Expand Up @@ -1007,6 +1006,8 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
return;
}
// sparse hashmap storage
// we need at least one unfilled entry to avoid infinite loops on search
GKO_ASSERT(row_len < available_storage);
constexpr double inv_golden_ratio = 0.61803398875;
// use golden ratio as approximation for hash parameter that spreads
// consecutive values as far apart as possible. Ensure lowest bit is set
Expand Down Expand Up @@ -1051,9 +1052,10 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
if (hash >= available_storage) {
hash -= available_storage;
}
// this can only fail for row_length < 16
// because then available_storage < 32 and
// colliding can have more than available_storage bits set.
// this could only fail for available_storage < warp_size, as
// popcnt(colliding) is at most warp_size. At the same time, we
// only increase hash by row_length at most, so this is still
// safe.
GKO_ASSERT(hash < available_storage);
entry = local_storage[hash];
}
Expand Down Expand Up @@ -1083,13 +1085,15 @@ template <typename IndexType>
void build_lookup(std::shared_ptr<const DefaultExecutor> exec,
const IndexType* row_ptrs, const IndexType* col_idxs,
size_type num_rows, matrix::sparsity_type allowed,
int64* row_desc, int32* storage)
const IndexType* storage_offsets, int64* row_desc,
int32* storage)
{
const auto num_blocks =
ceildiv(num_rows, default_block_size / config::warp_size);
kernel::build_csr_lookup<config::warp_size>
<<<num_blocks, default_block_size>>>(row_ptrs, col_idxs, num_rows,
allowed, row_desc, storage);
allowed, storage_offsets, row_desc,
storage);
}

GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL);
44 changes: 44 additions & 0 deletions common/unified/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common/unified/base/kernel_launch.hpp"
#include "core/components/prefix_sum_kernels.hpp"


namespace gko {
Expand Down Expand Up @@ -243,6 +244,49 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_CONVERT_TO_HYBRID_KERNEL);


template <typename IndexType>
void build_lookup_offsets(std::shared_ptr<const DefaultExecutor> exec,
const IndexType* row_ptrs, const IndexType* col_idxs,
size_type num_rows, matrix::sparsity_type allowed,
IndexType* storage_offsets)
{
using matrix::sparsity_type;
constexpr static int block_size =
gko::matrix::device_sparsity_lookup<IndexType>::block_size;
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto row_ptrs, auto col_idxs, auto num_rows,
auto allowed, auto storage_offsets) {
const auto row_begin = row_ptrs[row];
const auto row_len = row_ptrs[row + 1] - row_begin;
const auto local_cols = col_idxs + row_begin;
const auto min_col = row_len > 0 ? local_cols[0] : 0;
const auto col_range =
row_len > 0 ? local_cols[row_len - 1] - min_col + 1 : 0;
if (csr_lookup_allowed(allowed, sparsity_type::full) &&
row_len == col_range) {
storage_offsets[row] = 0;
} else {
const auto hashmap_storage = row_len == 0 ? 1 : 2 * row_len;
const auto bitmap_num_blocks =
static_cast<int32>(ceildiv(col_range, block_size));
const auto bitmap_storage = 2 * bitmap_num_blocks;
if (csr_lookup_allowed(allowed, sparsity_type::bitmap) &&
bitmap_storage <= hashmap_storage) {
storage_offsets[row] = bitmap_storage;
} else {
storage_offsets[row] = hashmap_storage;
}
}
},
num_rows, row_ptrs, col_idxs, num_rows, allowed, storage_offsets);
components::prefix_sum(exec, storage_offsets, num_rows + 1);
}

GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
GKO_DECLARE_CSR_BUILD_LOOKUP_OFFSETS_KERNEL);


} // namespace csr
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
Expand Down
12 changes: 11 additions & 1 deletion core/matrix/csr_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,11 +225,19 @@ namespace kernels {
const matrix::Dense<ValueType>* beta, \
matrix::Csr<ValueType, IndexType>* mtx)

#define GKO_DECLARE_CSR_BUILD_LOOKUP_OFFSETS_KERNEL(IndexType) \
void build_lookup_offsets(std::shared_ptr<const DefaultExecutor> exec, \
const IndexType* row_ptrs, \
const IndexType* col_idxs, size_type num_rows, \
matrix::sparsity_type allowed, \
IndexType* storage_offsets)

#define GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL(IndexType) \
void build_lookup(std::shared_ptr<const DefaultExecutor> exec, \
const IndexType* row_ptrs, const IndexType* col_idxs, \
size_type num_rows, matrix::sparsity_type allowed, \
int64* row_desc, int32* storage)
const IndexType* storage_offsets, int64* row_desc, \
int32* storage)


#define GKO_DECLARE_ALL_AS_TEMPLATES \
Expand Down Expand Up @@ -292,6 +300,8 @@ namespace kernels {
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL(ValueType, IndexType); \
template <typename IndexType> \
GKO_DECLARE_CSR_BUILD_LOOKUP_OFFSETS_KERNEL(IndexType); \
template <typename IndexType> \
GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL(IndexType)


Expand Down
Loading

0 comments on commit b8c6984

Please sign in to comment.