Skip to content

Commit

Permalink
Use simple kernels for scalar apply, dense convert
Browse files Browse the repository at this point in the history
  • Loading branch information
pratikvn committed Aug 6, 2021
1 parent ffbf037 commit 0c45763
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 284 deletions.
77 changes: 76 additions & 1 deletion common/preconditioner/jacobi_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/base/math.hpp>


#include "common/base/kernel_launch_solver.hpp"
#include "common/base/kernel_launch.hpp"


namespace gko {
Expand Down Expand Up @@ -82,6 +82,81 @@ void invert_diagonal(std::shared_ptr<const DefaultExecutor> exec,
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL);


template <typename ValueType>
void scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *alpha,
const matrix::Dense<ValueType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Dense<ValueType> *x)
{
if (alpha->get_size()[1] > 1) {
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto col, auto diag, auto alpha, auto b,
auto beta, auto x) {
x(row, col) = beta[col] * x(row, col) +
alpha[col] * b(row, col) * diag[row];
},
x->get_size(), diag.get_const_data(), alpha->get_const_values(), b,
beta->get_const_values(), x);
} else {
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto col, auto diag, auto alpha, auto b,
auto beta, auto x) {
x(row, col) =
beta[0] * x(row, col) + alpha[0] * b(row, col) * diag[row];
},
x->get_size(), diag.get_const_data(), alpha->get_const_values(), b,
beta->get_const_values(), x);
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL);


template <typename ValueType>
void simple_scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *b,
matrix::Dense<ValueType> *x)
{
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto col, auto diag, auto b, auto x) {
x(row, col) = b(row, col) * diag[row];
},
x->get_size(), diag.get_const_data(), b, x);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL);


template <typename ValueType>
void scalar_convert_to_dense(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &blocks,
ValueType *result_values,
const gko::dim<2> &matrix_size,
size_type result_stride)
{
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto col, auto stride, auto diag,
auto result_values) {
result_values[row * stride + col] = zero<ValueType>();
if (row == col) {
result_values[row * stride + col] = diag[row];
}
},
matrix_size, result_stride, blocks.get_const_data(), result_values);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL);


} // namespace jacobi
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
Expand Down
56 changes: 0 additions & 56 deletions common/preconditioner/jacobi_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -213,59 +213,3 @@ __launch_bounds__(warps_per_block *config::warp_size) adaptive_transpose_jacobi(
});
}
}


namespace kernel {


template <typename ValueType>
__global__ __launch_bounds__(default_block_size) void scalar_apply(
size_type num_rows, size_type num_cols, const ValueType *__restrict__ diag,
const ValueType *__restrict__ alpha, size_type source_stride,
const ValueType *__restrict__ source_values,
const ValueType *__restrict__ beta, size_type result_stride,
ValueType *__restrict__ result_values)
{
const auto tidx = thread::get_thread_id_flat();
const auto row = tidx / num_cols;
const auto col = tidx % num_cols;

if (row < num_rows) {
result_values[row * result_stride + col] =
result_values[row * result_stride + col] * beta[0] +
alpha[0] * source_values[row * source_stride + col] * diag[row];
}
}


template <typename ValueType>
__global__ __launch_bounds__(default_block_size) void invert_diagonal(
size_type num_elems, const ValueType *__restrict__ diag,
ValueType *__restrict__ inv_diag)
{
const auto tidx = thread::get_thread_id_flat();

if (tidx < num_elems) {
inv_diag[tidx] = ValueType(1.0) / diag[tidx];
}
}


template <typename ValueType>
__global__ __launch_bounds__(default_block_size) void simple_scalar_apply(
size_type num_rows, size_type num_cols, const ValueType *__restrict__ diag,
size_type source_stride, const ValueType *__restrict__ source_values,
size_type result_stride, ValueType *__restrict__ result_values)
{
const auto tidx = thread::get_thread_id_flat();
const auto row = tidx / num_cols;
const auto col = tidx % num_cols;

if (row < num_rows) {
result_values[row * result_stride + col] =
source_values[row * source_stride + col] * diag[row];
}
}


} // namespace kernel
66 changes: 0 additions & 66 deletions cuda/preconditioner/jacobi_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -230,17 +230,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL);


template <typename ValueType>
void scalar_convert_to_dense(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &blocks,
ValueType *result_values,
const gko::dim<2> &mat_size,
size_type result_stride) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType, typename IndexType>
void convert_to_dense(
std::shared_ptr<const DefaultExecutor> exec, size_type num_blocks,
Expand All @@ -254,61 +243,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType>
void scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *alpha,
const matrix::Dense<ValueType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Dense<ValueType> *x)
{
const auto b_size = b->get_size();
const auto num_rows = b_size[0];
const auto num_cols = b_size[1];
const auto b_stride = b->get_stride();
const auto x_stride = x->get_stride();
const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size);

const auto b_values = b->get_const_values();
const auto diag_values = diag.get_const_data();
auto x_values = x->get_values();

kernel::scalar_apply<<<grid_dim, default_block_size>>>(
num_rows, num_cols, as_cuda_type(diag_values),
as_cuda_type(alpha->get_const_values()), b_stride,
as_cuda_type(b_values), as_cuda_type(beta->get_const_values()),
x_stride, as_cuda_type(x_values));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL);


template <typename ValueType>
void simple_scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *b,
matrix::Dense<ValueType> *x)
{
const auto b_size = b->get_size();
const auto num_rows = b_size[0];
const auto num_cols = b_size[1];
const auto b_stride = b->get_stride();
const auto x_stride = x->get_stride();
const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size);

const auto b_values = b->get_const_values();
const auto diag_values = diag.get_const_data();
auto x_values = x->get_values();

kernel::simple_scalar_apply<<<grid_dim, default_block_size>>>(
num_rows, num_cols, as_cuda_type(diag_values), b_stride,
as_cuda_type(b_values), x_stride, as_cuda_type(x_values));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL);


} // namespace jacobi
} // namespace cuda
} // namespace kernels
Expand Down
32 changes: 0 additions & 32 deletions dpcpp/preconditioner/jacobi_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,17 +267,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL);


template <typename ValueType>
void scalar_convert_to_dense(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &blocks,
ValueType *result_values,
const gko::dim<2> &mat_size,
size_type result_stride) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType, typename IndexType>
void convert_to_dense(
std::shared_ptr<const DpcppExecutor> exec, size_type num_blocks,
Expand All @@ -291,27 +280,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType>
void scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *alpha,
const matrix::Dense<ValueType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Dense<ValueType> *x) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL);


template <typename ValueType>
void simple_scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *b,
matrix::Dense<ValueType> *x) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL);


} // namespace jacobi
} // namespace dpcpp
} // namespace kernels
Expand Down
67 changes: 0 additions & 67 deletions hip/preconditioner/jacobi_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,17 +244,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL);


template <typename ValueType>
void scalar_convert_to_dense(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &blocks,
ValueType *result_values,
const gko::dim<2> &mat_size,
size_type result_stride) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType, typename IndexType>
void convert_to_dense(
std::shared_ptr<const HipExecutor> exec, size_type num_blocks,
Expand All @@ -268,62 +257,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType>
void scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *alpha,
const matrix::Dense<ValueType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Dense<ValueType> *x)
{
const auto b_size = b->get_size();
const auto num_rows = b_size[0];
const auto num_cols = b_size[1];
const auto b_stride = b->get_stride();
const auto x_stride = x->get_stride();
const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size);

const auto b_values = b->get_const_values();
const auto diag_values = diag.get_const_data();
auto x_values = x->get_values();

hipLaunchKernelGGL(
kernel::scalar_apply, dim3(grid_dim), dim3(default_block_size), 0, 0,
num_rows, num_cols, as_hip_type(diag_values),
as_hip_type(alpha->get_const_values()), b_stride, as_hip_type(b_values),
as_hip_type(beta->get_const_values()), x_stride, as_hip_type(x_values));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL);


template <typename ValueType>
void simple_scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *b,
matrix::Dense<ValueType> *x)
{
const auto b_size = b->get_size();
const auto num_rows = b_size[0];
const auto num_cols = b_size[1];
const auto b_stride = b->get_stride();
const auto x_stride = x->get_stride();
const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size);

const auto b_values = b->get_const_values();
const auto diag_values = diag.get_const_data();
auto x_values = x->get_values();

hipLaunchKernelGGL(kernel::simple_scalar_apply, dim3(grid_dim),
dim3(default_block_size), 0, 0, num_rows, num_cols,
as_hip_type(diag_values), b_stride,
as_hip_type(b_values), x_stride, as_hip_type(x_values));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL);


} // namespace jacobi
} // namespace hip
} // namespace kernels
Expand Down
Loading

0 comments on commit 0c45763

Please sign in to comment.