From 7b1a52c15a6a9813e3d36286090abec63e9fe995 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 6 Aug 2021 10:49:20 +0200 Subject: [PATCH] Use simple kernels for scalar apply, dense convert --- common/preconditioner/jacobi_kernels.cpp | 75 ++++++++++++++++++++++ cuda/preconditioner/jacobi_kernels.cu | 66 ------------------- dpcpp/preconditioner/jacobi_kernels.dp.cpp | 32 --------- hip/preconditioner/jacobi_kernels.hip.cpp | 67 ------------------- omp/preconditioner/jacobi_kernels.cpp | 62 ------------------ 5 files changed, 75 insertions(+), 227 deletions(-) diff --git a/common/preconditioner/jacobi_kernels.cpp b/common/preconditioner/jacobi_kernels.cpp index f4a2b1a3320..493b685fc0f 100644 --- a/common/preconditioner/jacobi_kernels.cpp +++ b/common/preconditioner/jacobi_kernels.cpp @@ -82,6 +82,81 @@ void invert_diagonal(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL); +template +void scalar_apply(std::shared_ptr exec, + const Array &diag, + const matrix::Dense *alpha, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *x) +{ + if (alpha->get_size()[1] > 1) { + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto diag, auto alpha, auto b, + auto beta, auto x) { + x(row, col) = beta[col] * x(row, col) + + alpha[col] * b(row, col) * diag[row]; + }, + x->get_size(), diag.get_const_data(), alpha->get_const_values(), b, + beta->get_const_values(), x); + } else { + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto diag, auto alpha, auto b, + auto beta, auto x) { + x(row, col) = + beta[0] * x(row, col) + alpha[0] * b(row, col) * diag[row]; + }, + x->get_size(), diag.get_const_data(), alpha->get_const_values(), b, + beta->get_const_values(), x); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL); + + +template +void simple_scalar_apply(std::shared_ptr exec, + const Array &diag, + const matrix::Dense *b, + matrix::Dense *x) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto diag, auto b, auto x) { + x(row, col) = b(row, col) * diag[row]; + }, + x->get_size(), diag.get_const_data(), b, x); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL); + + +template +void scalar_convert_to_dense(std::shared_ptr exec, + const Array &blocks, + ValueType *result_values, + const gko::dim<2> &matrix_size, + size_type result_stride) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto stride, auto diag, + auto result_values) { + result_values[row * stride + col] = zero(); + if (row == col) { + result_values[row * stride + col] = diag[row]; + } + }, + matrix_size, result_stride, blocks.get_const_data(), result_values); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL); + + } // namespace jacobi } // namespace GKO_DEVICE_NAMESPACE } // namespace kernels diff --git a/cuda/preconditioner/jacobi_kernels.cu b/cuda/preconditioner/jacobi_kernels.cu index 75b5f797d4e..1ff609791c7 100644 --- a/cuda/preconditioner/jacobi_kernels.cu +++ b/cuda/preconditioner/jacobi_kernels.cu @@ -230,17 +230,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL); -template -void scalar_convert_to_dense(std::shared_ptr exec, - const Array &blocks, - ValueType *result_values, - const gko::dim<2> &mat_size, - size_type result_stride) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL); - - template void convert_to_dense( std::shared_ptr exec, size_type num_blocks, @@ -254,61 +243,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL); -template -void scalar_apply(std::shared_ptr exec, - const Array &diag, - const matrix::Dense *alpha, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *x) -{ - const auto b_size = b->get_size(); - const auto num_rows = b_size[0]; - const auto num_cols = b_size[1]; - const auto b_stride = b->get_stride(); - const auto x_stride = x->get_stride(); - const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size); - - const auto b_values = b->get_const_values(); - const auto diag_values = diag.get_const_data(); - auto x_values = x->get_values(); - - kernel::scalar_apply<<>>( - num_rows, num_cols, as_cuda_type(diag_values), - as_cuda_type(alpha->get_const_values()), b_stride, - as_cuda_type(b_values), as_cuda_type(beta->get_const_values()), - x_stride, as_cuda_type(x_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL); - - -template -void simple_scalar_apply(std::shared_ptr exec, - const Array &diag, - const matrix::Dense *b, - matrix::Dense *x) -{ - const auto b_size = b->get_size(); - const auto num_rows = b_size[0]; - const auto num_cols = b_size[1]; - const auto b_stride = b->get_stride(); - const auto x_stride = x->get_stride(); - const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size); - - const auto b_values = b->get_const_values(); - const auto diag_values = diag.get_const_data(); - auto x_values = x->get_values(); - - kernel::simple_scalar_apply<<>>( - num_rows, num_cols, as_cuda_type(diag_values), b_stride, - as_cuda_type(b_values), x_stride, as_cuda_type(x_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL); - - } // namespace jacobi } // namespace cuda } // namespace kernels diff --git a/dpcpp/preconditioner/jacobi_kernels.dp.cpp b/dpcpp/preconditioner/jacobi_kernels.dp.cpp index 4f1c9c6cc0f..623e2c121ec 100644 --- a/dpcpp/preconditioner/jacobi_kernels.dp.cpp +++ b/dpcpp/preconditioner/jacobi_kernels.dp.cpp @@ -267,17 +267,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL); -template -void scalar_convert_to_dense(std::shared_ptr exec, - const Array &blocks, - ValueType *result_values, - const gko::dim<2> &mat_size, - size_type result_stride) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL); - - template void convert_to_dense( std::shared_ptr exec, size_type num_blocks, @@ -291,27 +280,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL); -template -void scalar_apply(std::shared_ptr exec, - const Array &diag, - const matrix::Dense *alpha, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *x) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL); - - -template -void simple_scalar_apply(std::shared_ptr exec, - const Array &diag, - const matrix::Dense *b, - matrix::Dense *x) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL); - - } // namespace jacobi } // namespace dpcpp } // namespace kernels diff --git a/hip/preconditioner/jacobi_kernels.hip.cpp b/hip/preconditioner/jacobi_kernels.hip.cpp index 0be2a3db67b..425820ead4e 100644 --- a/hip/preconditioner/jacobi_kernels.hip.cpp +++ b/hip/preconditioner/jacobi_kernels.hip.cpp @@ -244,17 +244,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL); -template -void scalar_convert_to_dense(std::shared_ptr exec, - const Array &blocks, - ValueType *result_values, - const gko::dim<2> &mat_size, - size_type result_stride) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL); - - template void convert_to_dense( std::shared_ptr exec, size_type num_blocks, @@ -268,62 +257,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL); -template -void scalar_apply(std::shared_ptr exec, - const Array &diag, - const matrix::Dense *alpha, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *x) -{ - const auto b_size = b->get_size(); - const auto num_rows = b_size[0]; - const auto num_cols = b_size[1]; - const auto b_stride = b->get_stride(); - const auto x_stride = x->get_stride(); - const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size); - - const auto b_values = b->get_const_values(); - const auto diag_values = diag.get_const_data(); - auto x_values = x->get_values(); - - hipLaunchKernelGGL( - kernel::scalar_apply, dim3(grid_dim), dim3(default_block_size), 0, 0, - num_rows, num_cols, as_hip_type(diag_values), - as_hip_type(alpha->get_const_values()), b_stride, as_hip_type(b_values), - as_hip_type(beta->get_const_values()), x_stride, as_hip_type(x_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL); - - -template -void simple_scalar_apply(std::shared_ptr exec, - const Array &diag, - const matrix::Dense *b, - matrix::Dense *x) -{ - const auto b_size = b->get_size(); - const auto num_rows = b_size[0]; - const auto num_cols = b_size[1]; - const auto b_stride = b->get_stride(); - const auto x_stride = x->get_stride(); - const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size); - - const auto b_values = b->get_const_values(); - const auto diag_values = diag.get_const_data(); - auto x_values = x->get_values(); - - hipLaunchKernelGGL(kernel::simple_scalar_apply, dim3(grid_dim), - dim3(default_block_size), 0, 0, num_rows, num_cols, - as_hip_type(diag_values), b_stride, - as_hip_type(b_values), x_stride, as_hip_type(x_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL); - - } // namespace jacobi } // namespace hip } // namespace kernels diff --git a/omp/preconditioner/jacobi_kernels.cpp b/omp/preconditioner/jacobi_kernels.cpp index a49ffe8aa49..1b0c0c6c1a5 100644 --- a/omp/preconditioner/jacobi_kernels.cpp +++ b/omp/preconditioner/jacobi_kernels.cpp @@ -696,68 +696,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL); -template -void scalar_convert_to_dense(std::shared_ptr exec, - const Array &blocks, - ValueType *result_values, - const gko::dim<2> &matrix_size, - size_type result_stride) -{ -#pragma omp parallel for - for (size_type i = 0; i < matrix_size[0]; ++i) { - for (size_type j = 0; j < matrix_size[1]; ++j) { - result_values[i * result_stride + j] = zero(); - if (i == j) { - result_values[i * result_stride + j] = - blocks.get_const_data()[i]; - } - } - } -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL); - - -template -void scalar_apply(std::shared_ptr exec, - const Array &diag, - const matrix::Dense *alpha, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *x) -{ -#pragma omp parallel for - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - x->at(i, j) = beta->at(0) * x->at(i, j) + - alpha->at(0) * b->at(i, j) * diag.get_const_data()[i]; - } - } -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL); - - -template -void simple_scalar_apply(std::shared_ptr exec, - const Array &diag, - const matrix::Dense *b, - matrix::Dense *x) -{ -#pragma omp parallel for - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - auto diag_val = diag.get_const_data()[i]; - x->at(i, j) = b->at(i, j) * diag_val; - } - } -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL); - - } // namespace jacobi } // namespace omp } // namespace kernels