Skip to content

Commit

Permalink
move memory barrier
Browse files Browse the repository at this point in the history
  • Loading branch information
upsj committed May 6, 2022
1 parent 4d677e4 commit d6cd5b1
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions common/cuda_hip/matrix/csr_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -968,8 +968,6 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
for (int32 i = lane; i < num_blocks; i += subwarp_size) {
block_bitmaps[i] = 0;
}
// memory barrier - just to be sure
subwarp.sync();
// fill bitmaps with sparsity pattern
for (IndexType base_i = 0; base_i < row_len; base_i += subwarp_size) {
const auto i = base_i + lane;
Expand All @@ -982,6 +980,8 @@ __global__ __launch_bounds__(default_block_size) void build_csr_lookup(
segment_scan(subwarp, block, local_bitmap,
[](config::lane_mask_type a,
config::lane_mask_type b) { return a | b; });
// memory barrier - just to be sure
subwarp.sync();
if (is_first && i < row_len) {
block_bitmaps[block] |= local_bitmap;
}
Expand Down

0 comments on commit d6cd5b1

Please sign in to comment.