Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Forbid redux for nvc++
Browse files Browse the repository at this point in the history
  • Loading branch information
gevtushenko committed Jun 25, 2022
1 parent 963ed9b commit 738be7d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
2 changes: 2 additions & 0 deletions cub/warp/specializations/warp_reduce_shfl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ struct WarpReduceShfl
return output;
}

#ifndef _NVHPC_CUDA // NVBug 3694682
template <class U = T>
__device__ __forceinline__
typename std::enable_if<
Expand Down Expand Up @@ -557,6 +558,7 @@ struct WarpReduceShfl

return output;
}
#endif // _NVHPC_CUDA

/// Reduction
template <
Expand Down
12 changes: 7 additions & 5 deletions test/test_warp_reduce.cu
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,8 @@ void Initialize(
RandomBits(bits, flag_entropy);
h_flags[i] = bits & 0x1;
}
h_flags[warps * warp_threads] = {};
h_tail_out[warps * warp_threads] = {};

// Accumulate segments (lane 0 of each warp is implicitly a segment head)
for (int warp = 0; warp < warps; ++warp)
Expand Down Expand Up @@ -483,9 +485,9 @@ void TestReduce(

// Allocate host arrays
T *h_in = new T[BLOCK_THREADS];
int *h_flags = new int[BLOCK_THREADS];
int *h_flags = new int[BLOCK_THREADS + 1];
T *h_out = new T[BLOCK_THREADS];
T *h_tail_out = new T[BLOCK_THREADS];
T *h_tail_out = new T[BLOCK_THREADS + 1];

// Initialize problem
Initialize(gen_mode, -1, h_in, h_flags, WARPS, LOGICAL_WARP_THREADS, valid_warp_threads, reduction_op, h_out, h_tail_out);
Expand Down Expand Up @@ -578,9 +580,9 @@ void TestSegmentedReduce(
// Allocate host arrays
int compare;
T *h_in = new T[BLOCK_THREADS];
int *h_flags = new int[BLOCK_THREADS];
T *h_head_out = new T[BLOCK_THREADS];
T *h_tail_out = new T[BLOCK_THREADS];
int *h_flags = new int[BLOCK_THREADS + 1];
T *h_head_out = new T[BLOCK_THREADS + 1];
T *h_tail_out = new T[BLOCK_THREADS + 1];

// Initialize problem
Initialize(gen_mode, flag_entropy, h_in, h_flags, WARPS, LOGICAL_WARP_THREADS, LOGICAL_WARP_THREADS, reduction_op, h_head_out, h_tail_out);
Expand Down

0 comments on commit 738be7d

Please sign in to comment.