diff --git a/cuda/matrix/sparsity_csr_kernels.cu b/cuda/matrix/sparsity_csr_kernels.cu index 454a2f412e1..71ac5d9c664 100644 --- a/cuda/matrix/sparsity_csr_kernels.cu +++ b/cuda/matrix/sparsity_csr_kernels.cu @@ -118,25 +118,16 @@ void classical_spmv(syn::value_list, return; } if (alpha == nullptr && beta == nullptr) { - if (grid.x > 0 && grid.y > 0) { - kernel::abstract_classical_spmv - <<>>( - a->get_size()[0], as_cuda_type(a->get_const_value()), - a->get_const_col_idxs(), - as_cuda_type(a->get_const_row_ptrs()), - acc::as_cuda_range(b_vals), acc::as_cuda_range(c_vals)); - } + kernel::abstract_classical_spmv<<>>( + a->get_size()[0], as_cuda_type(a->get_const_value()), + a->get_const_col_idxs(), as_cuda_type(a->get_const_row_ptrs()), + acc::as_cuda_range(b_vals), acc::as_cuda_range(c_vals)); } else if (alpha != nullptr && beta != nullptr) { - if (grid.x > 0 && grid.y > 0) { - kernel::abstract_classical_spmv - <<>>( - a->get_size()[0], as_cuda_type(alpha->get_const_values()), - as_cuda_type(a->get_const_value()), a->get_const_col_idxs(), - as_cuda_type(a->get_const_row_ptrs()), - acc::as_cuda_range(b_vals), - as_cuda_type(beta->get_const_values()), - acc::as_cuda_range(c_vals)); - } + kernel::abstract_classical_spmv<<>>( + a->get_size()[0], as_cuda_type(alpha->get_const_values()), + as_cuda_type(a->get_const_value()), a->get_const_col_idxs(), + as_cuda_type(a->get_const_row_ptrs()), acc::as_cuda_range(b_vals), + as_cuda_type(beta->get_const_values()), acc::as_cuda_range(c_vals)); } else { GKO_KERNEL_NOT_FOUND; } diff --git a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp index 5210338a1fe..61610538a1c 100644 --- a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp +++ b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp @@ -235,20 +235,16 @@ void classical_spmv(syn::value_list, return; } if (alpha == nullptr && beta == nullptr) { - if (grid.x > 0 && grid.y > 0) { - kernel::abstract_classical_spmv( - grid, block, 0, exec->get_queue(), a->get_size()[0], - a->get_const_value(), a->get_const_col_idxs(), - a->get_const_row_ptrs(), b_vals, c_vals); - } + kernel::abstract_classical_spmv( + grid, block, 0, exec->get_queue(), a->get_size()[0], + a->get_const_value(), a->get_const_col_idxs(), + a->get_const_row_ptrs(), b_vals, c_vals); } else if (alpha != nullptr && beta != nullptr) { - if (grid.x > 0 && grid.y > 0) { - kernel::abstract_classical_spmv( - grid, block, 0, exec->get_queue(), a->get_size()[0], - alpha->get_const_values(), a->get_const_value(), - a->get_const_col_idxs(), a->get_const_row_ptrs(), b_vals, - beta->get_const_values(), c_vals); - } + kernel::abstract_classical_spmv( + grid, block, 0, exec->get_queue(), a->get_size()[0], + alpha->get_const_values(), a->get_const_value(), + a->get_const_col_idxs(), a->get_const_row_ptrs(), b_vals, + beta->get_const_values(), c_vals); } else { GKO_KERNEL_NOT_FOUND; } diff --git a/hip/matrix/sparsity_csr_kernels.hip.cpp b/hip/matrix/sparsity_csr_kernels.hip.cpp index 34b5729bed6..68daf706fbc 100644 --- a/hip/matrix/sparsity_csr_kernels.hip.cpp +++ b/hip/matrix/sparsity_csr_kernels.hip.cpp @@ -121,25 +121,20 @@ void classical_spmv(syn::value_list, return; } if (alpha == nullptr && beta == nullptr) { - if (grid.x > 0 && grid.y > 0) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(kernel::abstract_classical_spmv), - grid, block, 0, 0, a->get_size()[0], - as_hip_type(a->get_const_value()), a->get_const_col_idxs(), - as_hip_type(a->get_const_row_ptrs()), acc::as_hip_range(b_vals), - acc::as_hip_range(c_vals)); - } + hipLaunchKernelGGL( + HIP_KERNEL_NAME(kernel::abstract_classical_spmv), + grid, block, 0, 0, a->get_size()[0], + as_hip_type(a->get_const_value()), a->get_const_col_idxs(), + as_hip_type(a->get_const_row_ptrs()), acc::as_hip_range(b_vals), + acc::as_hip_range(c_vals)); } else if (alpha != nullptr && beta != nullptr) { - if (grid.x > 0 && grid.y > 0) { - hipLaunchKernelGGL( - HIP_KERNEL_NAME(kernel::abstract_classical_spmv), - grid, block, 0, 0, a->get_size()[0], - as_hip_type(alpha->get_const_values()), - as_hip_type(a->get_const_value()), a->get_const_col_idxs(), - as_hip_type(a->get_const_row_ptrs()), acc::as_hip_range(b_vals), - as_hip_type(beta->get_const_values()), - acc::as_hip_range(c_vals)); - } + hipLaunchKernelGGL( + HIP_KERNEL_NAME(kernel::abstract_classical_spmv), + grid, block, 0, 0, a->get_size()[0], + as_hip_type(alpha->get_const_values()), + as_hip_type(a->get_const_value()), a->get_const_col_idxs(), + as_hip_type(a->get_const_row_ptrs()), acc::as_hip_range(b_vals), + as_hip_type(beta->get_const_values()), acc::as_hip_range(c_vals)); } else { GKO_KERNEL_NOT_FOUND; } diff --git a/omp/matrix/sparsity_csr_kernels.cpp b/omp/matrix/sparsity_csr_kernels.cpp index 0a883f61c84..92db204f534 100644 --- a/omp/matrix/sparsity_csr_kernels.cpp +++ b/omp/matrix/sparsity_csr_kernels.cpp @@ -70,22 +70,22 @@ void spmv(std::shared_ptr exec, const matrix::Dense* b, matrix::Dense* c) { + using arithmetic_type = + highest_precision; auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); - const auto val = static_cast(a->get_const_value()[0]); + const auto val = static_cast(a->get_const_value()[0]); #pragma omp parallel for for (size_type row = 0; row < a->get_size()[0]; ++row) { for (size_type j = 0; j < c->get_size()[1]; ++j) { - c->at(row, j) = zero(); - } - for (size_type k = row_ptrs[row]; - k < static_cast(row_ptrs[row + 1]); ++k) { - auto col = col_idxs[k]; - for (size_type j = 0; j < c->get_size()[1]; ++j) { - c->at(row, j) += - val * static_cast(b->at(col, j)); + auto temp_val = gko::zero(); + for (size_type k = row_ptrs[row]; + k < static_cast(row_ptrs[row + 1]); ++k) { + temp_val += + val * static_cast(b->at(col_idxs[k], j)); } + c->at(row, j) = static_cast(temp_val); } } } @@ -103,24 +103,26 @@ void advanced_spmv(std::shared_ptr exec, const matrix::Dense* beta, matrix::Dense* c) { + using arithmetic_type = + highest_precision; auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); - const auto valpha = static_cast(alpha->at(0, 0)); - const auto vbeta = beta->at(0, 0); - const auto val = static_cast(a->get_const_value()[0]); + const auto valpha = static_cast(alpha->at(0, 0)); + const auto vbeta = static_cast(beta->at(0, 0)); + const auto val = static_cast(a->get_const_value()[0]); #pragma omp parallel for for (size_type row = 0; row < a->get_size()[0]; ++row) { for (size_type j = 0; j < c->get_size()[1]; ++j) { - c->at(row, j) *= vbeta; - } - for (size_type k = row_ptrs[row]; - k < static_cast(row_ptrs[row + 1]); ++k) { - auto col = col_idxs[k]; - for (size_type j = 0; j < c->get_size()[1]; ++j) { - c->at(row, j) += - valpha * val * static_cast(b->at(col, j)); + auto temp_val = gko::zero(); + for (size_type k = row_ptrs[row]; + k < static_cast(row_ptrs[row + 1]); ++k) { + temp_val += + val * static_cast(b->at(col_idxs[k], j)); } + c->at(row, j) = static_cast( + vbeta * static_cast(c->at(row, j)) + + valpha * temp_val); } } } diff --git a/reference/matrix/sparsity_csr_kernels.cpp b/reference/matrix/sparsity_csr_kernels.cpp index adcb9212506..dfb7bc38bfc 100644 --- a/reference/matrix/sparsity_csr_kernels.cpp +++ b/reference/matrix/sparsity_csr_kernels.cpp @@ -75,15 +75,13 @@ void spmv(std::shared_ptr exec, for (size_type row = 0; row < a->get_size()[0]; ++row) { for (size_type j = 0; j < c->get_size()[1]; ++j) { - c->at(row, j) = zero(); - } - for (size_type k = row_ptrs[row]; - k < static_cast(row_ptrs[row + 1]); ++k) { - auto col = col_idxs[k]; - for (size_type j = 0; j < c->get_size()[1]; ++j) { - c->at(row, j) += static_cast( - val * static_cast(b->at(col, j))); + auto temp_val = gko::zero(); + for (size_type k = row_ptrs[row]; + k < static_cast(row_ptrs[row + 1]); ++k) { + temp_val += + val * static_cast(b->at(col_idxs[k], j)); } + c->at(row, j) = static_cast(temp_val); } } } @@ -107,20 +105,20 @@ void advanced_spmv(std::shared_ptr exec, auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); const auto valpha = static_cast(alpha->at(0, 0)); - const auto vbeta = static_cast(beta->at(0, 0)); + const auto vbeta = static_cast(beta->at(0, 0)); const auto val = static_cast(a->get_const_value()[0]); for (size_type row = 0; row < a->get_size()[0]; ++row) { for (size_type j = 0; j < c->get_size()[1]; ++j) { - c->at(row, j) *= vbeta; - } - for (size_type k = row_ptrs[row]; - k < static_cast(row_ptrs[row + 1]); ++k) { - auto col = col_idxs[k]; - for (size_type j = 0; j < c->get_size()[1]; ++j) { - c->at(row, j) += static_cast( - valpha * val * static_cast(b->at(col, j))); + auto temp_val = gko::zero(); + for (size_type k = row_ptrs[row]; + k < static_cast(row_ptrs[row + 1]); ++k) { + temp_val += + val * static_cast(b->at(col_idxs[k], j)); } + c->at(row, j) = static_cast( + vbeta * static_cast(c->at(row, j)) + + valpha * temp_val); } } } diff --git a/test/matrix/matrix.cpp b/test/matrix/matrix.cpp index 694d800010c..f64b4d9673b 100644 --- a/test/matrix/matrix.cpp +++ b/test/matrix/matrix.cpp @@ -361,7 +361,7 @@ struct SparsityCsr using entry_type = gko::matrix_data::nonzero_type; for (auto& entry : data.nonzeros) { - entry = entry_type{entry.row, entry.column, matrix_value_type{1}}; + entry.value = gko::one(); } } };