Skip to content

Commit

Permalink
use proper atomics for ParILU(T) and ParIC(T)
Browse files Browse the repository at this point in the history
  • Loading branch information
upsj committed Nov 3, 2023
1 parent 80613be commit cb5f061
Show file tree
Hide file tree
Showing 12 changed files with 35 additions and 21 deletions.
12 changes: 7 additions & 5 deletions common/cuda_hip/factorization/par_ic_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,18 @@ __global__ __launch_bounds__(default_block_size) void ic_sweep(
auto l_col = l_col_idxs[l_row_begin];
auto lh_row = l_col_idxs[lh_col_begin];
if (l_col == lh_row && l_col < last_entry) {
sum += l_vals[l_row_begin] * conj(l_vals[lh_col_begin]);
sum += load_relaxed(l_vals + l_row_begin) *
conj(load_relaxed(l_vals + lh_col_begin));
}
l_row_begin += l_col <= lh_row;
lh_col_begin += l_col >= lh_row;
}
auto to_write = row == col
? sqrt(a_val - sum)
: (a_val - sum) / l_vals[l_row_ptrs[col + 1] - 1];
auto to_write =
row == col
? sqrt(a_val - sum)
: (a_val - sum) / load_relaxed(l_vals + (l_row_ptrs[col + 1] - 1));
if (is_finite(to_write)) {
l_vals[l_nz] = to_write;
store_relaxed(l_vals + l_nz, to_write);
}
}

Expand Down
13 changes: 7 additions & 6 deletions common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ __global__ __launch_bounds__(default_block_size) void ict_sweep(
// we don't need to use the `bool valid` because last_entry is
// already a smaller sentinel value than the one used in group_merge
if (l_col == lh_row && l_col < last_entry) {
sum += l_vals[l_idx + l_row_begin] *
conj(l_vals[lh_idx + lh_col_begin]);
sum += load_relaxed(l_vals + (l_idx + l_row_begin)) *
conj(load_relaxed(l_vals + (lh_idx + lh_col_begin)));
}
// remember the transposed element
auto found_transp = subwarp.ballot(lh_row == row);
Expand All @@ -90,11 +90,12 @@ __global__ __launch_bounds__(default_block_size) void ict_sweep(
sum = reduce(subwarp, sum, [](ValueType a, ValueType b) { return a + b; });

if (subwarp.thread_rank() == 0) {
auto to_write = row == col
? sqrt(a_val - sum)
: (a_val - sum) / l_vals[l_row_ptrs[col + 1] - 1];
auto to_write =
row == col ? sqrt(a_val - sum)
: (a_val - sum) /
load_relaxed(l_vals + (l_row_ptrs[col + 1] - 1));
if (is_finite(to_write)) {
l_vals[l_nz] = to_write;
store_relaxed(l_vals + l_nz, to_write);
}
}
}
Expand Down
10 changes: 6 additions & 4 deletions common/cuda_hip/factorization/par_ilu_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,24 @@ __global__ __launch_bounds__(default_block_size) void compute_l_u_factors(
const auto u_col = u_col_idxs[u_idx];
last_operation = zero<ValueType>();
if (l_col == u_col) {
last_operation = l_values[l_idx] * u_values[u_idx];
last_operation = load_relaxed(l_values + l_idx) *
load_relaxed(u_values + u_idx);
sum -= last_operation;
}
l_idx += (l_col <= u_col);
u_idx += (u_col <= l_col);
}
sum += last_operation; // undo the last operation
if (row > col) {
auto to_write = sum / u_values[u_row_ptrs[col + 1] - 1];
auto to_write =
sum / load_relaxed(u_values + (u_row_ptrs[col + 1] - 1));
if (is_finite(to_write)) {
l_values[l_idx - 1] = to_write;
store_relaxed(l_values + (l_idx - 1), to_write);
}
} else {
auto to_write = sum;
if (is_finite(to_write)) {
u_values[u_idx - 1] = to_write;
store_relaxed(u_values + (u_idx - 1), to_write);
}
}
}
Expand Down
13 changes: 7 additions & 6 deletions common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ __global__ __launch_bounds__(default_block_size) void sweep(
// we don't need to use the `bool valid` because last_entry is
// already a smaller sentinel value than the one used in group_merge
if (l_col == ut_row && l_col < last_entry) {
sum += l_vals[l_idx + l_row_begin] *
ut_vals[ut_idx + ut_col_begin];
sum += load_relaxed(l_vals + (l_idx + l_row_begin)) *
load_relaxed(ut_vals + (ut_idx + ut_col_begin));
}
// remember the transposed element
auto found_transp = subwarp.ballot(ut_row == row);
Expand All @@ -103,15 +103,16 @@ __global__ __launch_bounds__(default_block_size) void sweep(

if (subwarp.thread_rank() == 0) {
if (lower) {
auto to_write = (a_val - sum) / ut_vals[ut_col_ptrs[col + 1] - 1];
auto to_write = (a_val - sum) /
load_relaxed(ut_vals + (ut_col_ptrs[col + 1] - 1));
if (is_finite(to_write)) {
l_vals[l_nz] = to_write;
store_relaxed(l_vals + l_nz, to_write);
}
} else {
auto to_write = a_val - sum;
if (is_finite(to_write)) {
u_vals[u_nz] = to_write;
ut_vals[ut_nz] = to_write;
store_relaxed(u_vals + u_nz, to_write);
store_relaxed(ut_vals + ut_nz, to_write);
}
}
}
Expand Down
1 change: 1 addition & 0 deletions cuda/factorization/par_ic_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "cuda/base/math.hpp"
#include "cuda/base/types.hpp"
#include "cuda/components/memory.cuh"
#include "cuda/components/thread_ids.cuh"


Expand Down
1 change: 1 addition & 0 deletions cuda/factorization/par_ict_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "core/synthesizer/implementation_selection.hpp"
#include "cuda/base/math.hpp"
#include "cuda/components/intrinsics.cuh"
#include "cuda/components/memory.cuh"
#include "cuda/components/merging.cuh"
#include "cuda/components/prefix_sum.cuh"
#include "cuda/components/reduction.cuh"
Expand Down
1 change: 1 addition & 0 deletions cuda/factorization/par_ilu_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "cuda/base/math.hpp"
#include "cuda/base/types.hpp"
#include "cuda/components/memory.cuh"
#include "cuda/components/thread_ids.cuh"


Expand Down
1 change: 1 addition & 0 deletions cuda/factorization/par_ilut_sweep_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "core/synthesizer/implementation_selection.hpp"
#include "cuda/base/math.hpp"
#include "cuda/components/intrinsics.cuh"
#include "cuda/components/memory.cuh"
#include "cuda/components/merging.cuh"
#include "cuda/components/prefix_sum.cuh"
#include "cuda/components/reduction.cuh"
Expand Down
1 change: 1 addition & 0 deletions hip/factorization/par_ic_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "hip/base/math.hip.hpp"
#include "hip/base/types.hip.hpp"
#include "hip/components/memory.hip.hpp"
#include "hip/components/thread_ids.hip.hpp"


Expand Down
1 change: 1 addition & 0 deletions hip/factorization/par_ict_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "core/synthesizer/implementation_selection.hpp"
#include "hip/base/math.hip.hpp"
#include "hip/components/intrinsics.hip.hpp"
#include "hip/components/memory.hip.hpp"
#include "hip/components/merging.hip.hpp"
#include "hip/components/prefix_sum.hip.hpp"
#include "hip/components/reduction.hip.hpp"
Expand Down
1 change: 1 addition & 0 deletions hip/factorization/par_ilu_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "hip/base/math.hip.hpp"
#include "hip/base/types.hip.hpp"
#include "hip/components/memory.hip.hpp"
#include "hip/components/thread_ids.hip.hpp"


Expand Down
1 change: 1 addition & 0 deletions hip/factorization/par_ilut_sweep_kernel.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "core/synthesizer/implementation_selection.hpp"
#include "hip/base/math.hip.hpp"
#include "hip/components/intrinsics.hip.hpp"
#include "hip/components/memory.hip.hpp"
#include "hip/components/merging.hip.hpp"
#include "hip/components/prefix_sum.hip.hpp"
#include "hip/components/reduction.hip.hpp"
Expand Down

0 comments on commit cb5f061

Please sign in to comment.