Skip to content

Commit

Permalink
improve document, fix auto usage in for loop
Browse files Browse the repository at this point in the history
Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu>
  • Loading branch information
yhmtsai and Thomas Grützmacher committed Jul 20, 2021
1 parent 7ac6d09 commit a131660
Show file tree
Hide file tree
Showing 24 changed files with 275 additions and 239 deletions.
2 changes: 1 addition & 1 deletion common/components/prefix_sum.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ __forceinline__ __device__ void subwarp_prefix_sum(ValueType element,
total_sum = element;
#pragma unroll
// hypercube prefix sum
for (auto step = 1; step < subwarp.size(); step *= 2) {
for (int step = 1; step < subwarp.size(); step *= 2) {
auto neighbor = subwarp.shfl_xor(total_sum, step);
total_sum += neighbor;
prefix_sum += bool(subwarp.thread_rank() & step) ? neighbor : 0;
Expand Down
12 changes: 6 additions & 6 deletions common/components/sorting.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ struct bitonic_local {
bool reverse)
{
auto els_mid = els + (num_elements / 2);
for (auto i = 0; i < num_elements / 2; ++i) {
for (int i = 0; i < num_elements / 2; ++i) {
bitonic_cas(els[i], els_mid[i], reverse);
}
half::merge(els, reverse);
Expand Down Expand Up @@ -131,7 +131,7 @@ struct bitonic_warp {
auto tile =
group::tiled_partition<num_threads>(group::this_thread_block());
auto new_reverse = reverse != upper_half();
for (auto i = 0; i < num_local; ++i) {
for (int i = 0; i < num_local; ++i) {
auto other = tile.shfl_xor(els[i], num_threads / 2);
bitonic_cas(els[i], other, new_reverse);
}
Expand Down Expand Up @@ -206,7 +206,7 @@ struct bitonic_global {
auto upper_shared_els = shared_els + (num_groups * num_threads / 2);
// only the lower group executes the CAS
if (!upper_half()) {
for (auto i = 0; i < num_local; ++i) {
for (int i = 0; i < num_local; ++i) {
auto j = shared_idx(i);
bitonic_cas(shared_els[j], upper_shared_els[j], reverse);
}
Expand Down Expand Up @@ -241,11 +241,11 @@ struct bitonic_global<ValueType, num_local, num_threads, 1, num_total_threads> {
bool reverse)
{
group::this_thread_block().sync();
for (auto i = 0; i < num_local; ++i) {
for (int i = 0; i < num_local; ++i) {
local_els[i] = shared_els[shared_idx(i)];
}
warp::merge(local_els, reverse);
for (auto i = 0; i < num_local; ++i) {
for (int i = 0; i < num_local; ++i) {
shared_els[shared_idx(i)] = local_els[i];
}
}
Expand All @@ -258,7 +258,7 @@ struct bitonic_global<ValueType, num_local, num_threads, 1, num_total_threads> {
// This is the first step, so we don't need to load from shared memory
warp::sort(local_els, reverse);
// store the sorted elements in shared memory
for (auto i = 0; i < num_local; ++i) {
for (int i = 0; i < num_local; ++i) {
shared_els[shared_idx(i)] = local_els[i];
}
}
Expand Down
7 changes: 4 additions & 3 deletions common/components/uninitialized_array.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/**
* Stores an array with uninitialized contents.
*
* This class needed for datatypes that do have a non-empty constructor when`
* This class is needed for datatypes that do have a non-empty constructor when
* using them as shared memory, for example `thrust::complex<float>`.
*
* @tparam ValueType the type of values
Expand All @@ -49,7 +49,7 @@ public:
*
* @return the constexpr pointer to the first entry of the array.
*/
constexpr GKO_ATTRIBUTES operator ValueType *() const noexcept
constexpr GKO_ATTRIBUTES operator const ValueType *() const noexcept
{
return &(*this)[0];
}
Expand All @@ -70,7 +70,8 @@ public:
*
* @return a reference to the array entry at the given index.
*/
constexpr GKO_ATTRIBUTES ValueType &operator[](size_type pos) const noexcept
constexpr GKO_ATTRIBUTES const ValueType &operator[](size_type pos) const
noexcept
{
return reinterpret_cast<const ValueType *>(data_)[pos];
}
Expand Down
4 changes: 2 additions & 2 deletions common/factorization/par_ilut_filter_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ __device__ void abstract_filter_impl(const IndexType *row_ptrs,
auto end = row_ptrs[row + 1];
begin_cb(row);
auto num_steps = ceildiv(end - begin, subwarp_size);
for (auto step = 0; step < num_steps; ++step) {
for (IndexType step = 0; step < num_steps; ++step) {
auto idx = begin + lane + step * subwarp_size;
auto keep = idx < end && pred(idx, begin, end);
auto mask = subwarp.ballot(keep);
Expand Down Expand Up @@ -189,4 +189,4 @@ __global__ __launch_bounds__(default_block_size) void bucket_filter(
}


} // namespace kernel
} // namespace kernel
Loading

0 comments on commit a131660

Please sign in to comment.