From 81d7c3db2500758ed3323e02242c3f6910d7021e Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sun, 25 Oct 2020 16:32:12 +0100 Subject: [PATCH 1/3] add GPU permutation kernels --- common/matrix/csr_kernels.hpp.inc | 117 +++++++++++++++++ core/device_hooks/common_kernels.inc.cpp | 11 +- core/matrix/csr.cpp | 24 ++-- core/matrix/csr_kernels.hpp | 21 ++- cuda/matrix/csr_kernels.cu | 75 ++++++++--- cuda/test/matrix/csr_kernels.cpp | 73 +++++++++++ hip/matrix/csr_kernels.hip.cpp | 80 +++++++++--- hip/test/matrix/csr_kernels.hip.cpp | 73 +++++++++++ omp/matrix/csr_kernels.cpp | 140 +++++++++----------- omp/test/matrix/csr_kernels.cpp | 55 ++++---- reference/matrix/csr_kernels.cpp | 158 ++++++++++------------- 11 files changed, 577 insertions(+), 250 deletions(-) diff --git a/common/matrix/csr_kernels.hpp.inc b/common/matrix/csr_kernels.hpp.inc index bb7e6b27b57..8f7e1a3f5a2 100644 --- a/common/matrix/csr_kernels.hpp.inc +++ b/common/matrix/csr_kernels.hpp.inc @@ -946,3 +946,120 @@ __global__ __launch_bounds__(default_block_size) void conjugate_kernel( } // namespace + + +template +__global__ __launch_bounds__(default_block_size) void inv_permutation_kernel( + size_type size, const IndexType *__restrict__ permutation, + IndexType *__restrict__ inv_permutation) +{ + auto tid = thread::get_thread_id_flat(); + if (tid >= size) { + return; + } + inv_permutation[permutation[tid]] = tid; +} + + +template +__global__ __launch_bounds__(default_block_size) void col_permute_kernel( + size_type num_rows, size_type num_nonzeros, + const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + const IndexType *__restrict__ in_cols, + const ValueType *__restrict__ in_vals, IndexType *__restrict__ out_row_ptrs, + IndexType *__restrict__ out_cols, ValueType *__restrict__ out_vals) +{ + auto tid = thread::get_thread_id_flat(); + if (tid < num_nonzeros) { + out_cols[tid] = permutation[in_cols[tid]]; + out_vals[tid] = in_vals[tid]; + } + if (tid <= num_rows) { + out_row_ptrs[tid] = in_row_ptrs[tid]; + } +} + + +template +__global__ __launch_bounds__(default_block_size) void row_ptr_permute_kernel( + size_type num_rows, const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, IndexType *__restrict__ out_nnz) +{ + auto tid = thread::get_thread_id_flat(); + if (tid >= num_rows) { + return; + } + auto in_row = permutation[tid]; + auto out_row = tid; + out_nnz[out_row] = in_row_ptrs[in_row + 1] - in_row_ptrs[in_row]; +} + + +template +__global__ + __launch_bounds__(default_block_size) void inv_row_ptr_permute_kernel( + size_type num_rows, const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + IndexType *__restrict__ out_nnz) +{ + auto tid = thread::get_thread_id_flat(); + if (tid >= num_rows) { + return; + } + auto in_row = tid; + auto out_row = permutation[tid]; + out_nnz[out_row] = in_row_ptrs[in_row + 1] - in_row_ptrs[in_row]; +} + + +template +__global__ __launch_bounds__(default_block_size) void row_permute_kernel( + size_type num_rows, const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + const IndexType *__restrict__ in_cols, + const ValueType *__restrict__ in_vals, + const IndexType *__restrict__ out_row_ptrs, + IndexType *__restrict__ out_cols, ValueType *__restrict__ out_vals) +{ + auto tid = thread::get_subwarp_id_flat(); + if (tid >= num_rows) { + return; + } + auto lane = threadIdx.x % subwarp_size; + auto in_row = permutation[tid]; + auto out_row = tid; + auto in_begin = in_row_ptrs[in_row]; + auto in_size = in_row_ptrs[in_row + 1] - in_begin; + auto out_begin = out_row_ptrs[out_row]; + for (IndexType i = lane; i < in_size; i += subwarp_size) { + out_cols[out_begin + i] = in_cols[in_begin + i]; + out_vals[out_begin + i] = in_vals[in_begin + i]; + } +} + + +template +__global__ __launch_bounds__(default_block_size) void inv_row_permute_kernel( + size_type num_rows, const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + const IndexType *__restrict__ in_cols, + const ValueType *__restrict__ in_vals, + const IndexType *__restrict__ out_row_ptrs, + IndexType *__restrict__ out_cols, ValueType *__restrict__ out_vals) +{ + auto tid = thread::get_subwarp_id_flat(); + if (tid >= num_rows) { + return; + } + auto lane = threadIdx.x % subwarp_size; + auto in_row = tid; + auto out_row = permutation[tid]; + auto in_begin = in_row_ptrs[in_row]; + auto in_size = in_row_ptrs[in_row + 1] - in_begin; + auto out_begin = out_row_ptrs[out_row]; + for (IndexType i = lane; i < in_size; i += subwarp_size) { + out_cols[out_begin + i] = in_cols[in_begin + i]; + out_vals[out_begin + i] = in_vals[in_begin + i]; + } +} \ No newline at end of file diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 09082c5d507..a981372c630 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -647,10 +647,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); template -GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) +GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL); + GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL); template GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType) @@ -658,11 +658,10 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL); -template -GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) +template +GKO_DECLARE_INVERT_PERMUTATION_KERNEL(IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); template GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType) diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index 6a31b3bffae..c6d410aee16 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -70,9 +70,9 @@ GKO_REGISTER_OPERATION(convert_to_hybrid, csr::convert_to_hybrid); GKO_REGISTER_OPERATION(transpose, csr::transpose); GKO_REGISTER_OPERATION(conj_transpose, csr::conj_transpose); GKO_REGISTER_OPERATION(row_permute, csr::row_permute); -GKO_REGISTER_OPERATION(column_permute, csr::column_permute); GKO_REGISTER_OPERATION(inverse_row_permute, csr::inverse_row_permute); GKO_REGISTER_OPERATION(inverse_column_permute, csr::inverse_column_permute); +GKO_REGISTER_OPERATION(invert_permutation, csr::invert_permutation); GKO_REGISTER_OPERATION(calculate_max_nnz_per_row, csr::calculate_max_nnz_per_row); GKO_REGISTER_OPERATION(calculate_nonzeros_per_row, @@ -412,8 +412,8 @@ std::unique_ptr Csr::row_permute( Csr::create(exec, this->get_size(), this->get_num_stored_elements(), this->get_strategy()); - exec->run( - csr::make_row_permute(permutation_indices, this, permute_cpy.get())); + exec->run(csr::make_row_permute(permutation_indices->get_const_data(), this, + permute_cpy.get())); permute_cpy->make_srow(); return std::move(permute_cpy); } @@ -428,10 +428,15 @@ std::unique_ptr Csr::column_permute( auto permute_cpy = Csr::create(exec, this->get_size(), this->get_num_stored_elements(), this->get_strategy()); + Array inv_permutation(exec, this->get_size()[1]); - exec->run( - csr::make_column_permute(permutation_indices, this, permute_cpy.get())); + exec->run(csr::make_invert_permutation( + this->get_size()[1], permutation_indices->get_const_data(), + inv_permutation.get_data())); + exec->run(csr::make_inverse_column_permute(inv_permutation.get_const_data(), + this, permute_cpy.get())); permute_cpy->make_srow(); + permute_cpy->sort_by_column_index(); return std::move(permute_cpy); } @@ -447,8 +452,9 @@ std::unique_ptr Csr::inverse_row_permute( Csr::create(exec, this->get_size(), this->get_num_stored_elements(), this->get_strategy()); - exec->run(csr::make_inverse_row_permute(inverse_permutation_indices, this, - inverse_permute_cpy.get())); + exec->run(csr::make_inverse_row_permute( + inverse_permutation_indices->get_const_data(), this, + inverse_permute_cpy.get())); inverse_permute_cpy->make_srow(); return std::move(inverse_permute_cpy); } @@ -466,8 +472,10 @@ std::unique_ptr Csr::inverse_column_permute( this->get_strategy()); exec->run(csr::make_inverse_column_permute( - inverse_permutation_indices, this, inverse_permute_cpy.get())); + inverse_permutation_indices->get_const_data(), this, + inverse_permute_cpy.get())); inverse_permute_cpy->make_srow(); + inverse_permute_cpy->sort_by_column_index(); return std::move(inverse_permute_cpy); } diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp index 92d9f462dfd..ff8c2d9ef26 100644 --- a/core/matrix/csr_kernels.hpp +++ b/core/matrix/csr_kernels.hpp @@ -131,29 +131,28 @@ namespace kernels { #define GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ void row_permute(std::shared_ptr exec, \ - const Array *permutation_indices, \ + const IndexType *permutation_indices, \ const matrix::Csr *orig, \ matrix::Csr *row_permuted) -#define GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) \ - void column_permute(std::shared_ptr exec, \ - const Array *permutation_indices, \ - const matrix::Csr *orig, \ - matrix::Csr *column_permuted) - #define GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ void inverse_row_permute(std::shared_ptr exec, \ - const Array *permutation_indices, \ + const IndexType *permutation_indices, \ const matrix::Csr *orig, \ matrix::Csr *row_permuted) #define GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) \ void inverse_column_permute( \ std::shared_ptr exec, \ - const Array *permutation_indices, \ + const IndexType *permutation_indices, \ const matrix::Csr *orig, \ matrix::Csr *column_permuted) +#define GKO_DECLARE_INVERT_PERMUTATION_KERNEL(IndexType) \ + void invert_permutation( \ + std::shared_ptr exec, size_type size, \ + const IndexType *permutation_indices, IndexType *inv_permutation) + #define GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType) \ void calculate_max_nnz_per_row( \ std::shared_ptr exec, \ @@ -210,11 +209,11 @@ namespace kernels { template \ GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ template \ - GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ - template \ GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_INVERT_PERMUTATION_KERNEL(IndexType); \ template \ GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType); \ template \ diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index 7316ecfa890..b90745fc8d9 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -1143,34 +1143,66 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL); +template +void invert_permutation(std::shared_ptr exec, + size_type size, const IndexType *permutation_indices, + IndexType *inv_permutation) +{ + auto num_blocks = ceildiv(size, default_block_size); + inv_permutation_kernel<<>>( + size, permutation_indices, inv_permutation); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); + + template void row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *perm, const matrix::Csr *orig, matrix::Csr *row_permuted) - GKO_NOT_IMPLEMENTED; +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + row_ptr_permute_kernel<<>>( + num_rows, perm, orig->get_const_row_ptrs(), + row_permuted->get_row_ptrs()); + components::prefix_sum(exec, row_permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + row_permute_kernel + <<>>( + num_rows, perm, orig->get_const_row_ptrs(), + orig->get_const_col_idxs(), as_cuda_type(orig->get_const_values()), + row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(), + as_cuda_type(row_permuted->get_values())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); -template -void column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *column_permuted) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL); - - template void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *perm, const matrix::Csr *orig, matrix::Csr *row_permuted) - GKO_NOT_IMPLEMENTED; +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + inv_row_ptr_permute_kernel<<>>( + num_rows, perm, orig->get_const_row_ptrs(), + row_permuted->get_row_ptrs()); + components::prefix_sum(exec, row_permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + inv_row_permute_kernel + <<>>( + num_rows, perm, orig->get_const_row_ptrs(), + orig->get_const_col_idxs(), as_cuda_type(orig->get_const_values()), + row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(), + as_cuda_type(row_permuted->get_values())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL); @@ -1178,10 +1210,19 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_column_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *perm, const matrix::Csr *orig, matrix::Csr *column_permuted) - GKO_NOT_IMPLEMENTED; +{ + auto num_rows = orig->get_size()[0]; + auto nnz = orig->get_num_stored_elements(); + auto num_blocks = ceildiv(std::max(num_rows, nnz), default_block_size); + col_permute_kernel<<>>( + num_rows, nnz, perm, orig->get_const_row_ptrs(), + orig->get_const_col_idxs(), as_cuda_type(orig->get_const_values()), + column_permuted->get_row_ptrs(), column_permuted->get_col_idxs(), + as_cuda_type(column_permuted->get_values())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL); diff --git a/cuda/test/matrix/csr_kernels.cpp b/cuda/test/matrix/csr_kernels.cpp index 9a4a9075933..38af50b1c21 100644 --- a/cuda/test/matrix/csr_kernels.cpp +++ b/cuda/test/matrix/csr_kernels.cpp @@ -60,6 +60,7 @@ namespace { class Csr : public ::testing::Test { protected: + using Arr = gko::Array; using Vec = gko::matrix::Dense<>; using Mtx = gko::matrix::Csr<>; using ComplexVec = gko::matrix::Dense>; @@ -114,6 +115,18 @@ class Csr : public ::testing::Test { dalpha->copy_from(alpha.get()); dbeta = Vec::create(cuda); dbeta->copy_from(beta.get()); + + std::vector tmp(mtx->get_size()[0], 0); + auto rng = std::default_random_engine{}; + std::iota(tmp.begin(), tmp.end(), 0); + std::shuffle(tmp.begin(), tmp.end(), rng); + std::vector tmp2(mtx->get_size()[1], 0); + std::iota(tmp2.begin(), tmp2.end(), 0); + std::shuffle(tmp2.begin(), tmp2.end(), rng); + rpermute_idxs = std::make_unique(ref, tmp.begin(), tmp.end()); + drpermute_idxs = std::make_unique(cuda, tmp.begin(), tmp.end()); + cpermute_idxs = std::make_unique(ref, tmp2.begin(), tmp2.end()); + dcpermute_idxs = std::make_unique(cuda, tmp2.begin(), tmp2.end()); } void set_up_apply_complex_data( @@ -179,6 +192,10 @@ class Csr : public ::testing::Test { std::unique_ptr dy; std::unique_ptr dalpha; std::unique_ptr dbeta; + std::unique_ptr rpermute_idxs; + std::unique_ptr drpermute_idxs; + std::unique_ptr cpermute_idxs; + std::unique_ptr dcpermute_idxs; }; @@ -688,6 +705,62 @@ TEST_F(Csr, MoveToHybridIsEquivalentToRef) } +TEST_F(Csr, IsRowPermutable) +{ + set_up_apply_data(std::make_shared()); + auto r_permute = gko::share(mtx->row_permute(rpermute_idxs.get())); + auto dr_permute = gko::share(dmtx->row_permute(drpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(r_permute), + gko::as(dr_permute)); + GKO_ASSERT_MTX_NEAR(gko::as(r_permute), gko::as(dr_permute), 0); +} + + +TEST_F(Csr, IsColPermutable) +{ + set_up_apply_data(std::make_shared()); + auto c_permute = gko::share(mtx->column_permute(cpermute_idxs.get())); + auto dc_permute = gko::share(dmtx->column_permute(dcpermute_idxs.get())); + + ASSERT_TRUE(gko::as(dc_permute)->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(c_permute), + gko::as(dc_permute)); + GKO_ASSERT_MTX_NEAR(gko::as(c_permute), gko::as(dc_permute), 0); +} + + +TEST_F(Csr, IsInverseRowPermutable) +{ + set_up_apply_data(std::make_shared()); + auto inverse_r_permute = + gko::share(mtx->inverse_row_permute(rpermute_idxs.get())); + auto d_inverse_r_permute = + gko::share(dmtx->inverse_row_permute(drpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(inverse_r_permute), + gko::as(d_inverse_r_permute)); + GKO_ASSERT_MTX_NEAR(gko::as(inverse_r_permute), + gko::as(d_inverse_r_permute), 0); +} + + +TEST_F(Csr, IsInverseColPermutable) +{ + set_up_apply_data(std::make_shared()); + auto inverse_c_permute = + gko::share(mtx->inverse_column_permute(cpermute_idxs.get())); + auto d_inverse_c_permute = + gko::share(dmtx->inverse_column_permute(dcpermute_idxs.get())); + + ASSERT_TRUE(gko::as(d_inverse_c_permute)->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(inverse_c_permute), + gko::as(d_inverse_c_permute)); + GKO_ASSERT_MTX_NEAR(gko::as(inverse_c_permute), + gko::as(d_inverse_c_permute), 0); +} + + TEST_F(Csr, RecognizeSortedMatrixIsEquivalentToRef) { set_up_apply_data(std::make_shared()); diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index f133507092e..6effd739123 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -967,34 +967,68 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL); -template -void row_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *row_permuted) - GKO_NOT_IMPLEMENTED; +template +void invert_permutation(std::shared_ptr exec, + size_type size, const IndexType *permutation_indices, + IndexType *inv_permutation) +{ + auto num_blocks = ceildiv(size, default_block_size); + hipLaunchKernelGGL(HIP_KERNEL_NAME(inv_permutation_kernel), num_blocks, + default_block_size, 0, 0, size, permutation_indices, + inv_permutation); +} -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); template -void column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *column_permuted) - GKO_NOT_IMPLEMENTED; +void row_permute(std::shared_ptr exec, const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *row_permuted) +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + hipLaunchKernelGGL(HIP_KERNEL_NAME(row_ptr_permute_kernel), + count_num_blocks, default_block_size, 0, 0, num_rows, + perm, orig->get_const_row_ptrs(), + row_permuted->get_row_ptrs()); + components::prefix_sum(exec, row_permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + hipLaunchKernelGGL( + HIP_KERNEL_NAME(row_permute_kernel), copy_num_blocks, + default_block_size, 0, 0, num_rows, perm, orig->get_const_row_ptrs(), + orig->get_const_col_idxs(), as_hip_type(orig->get_const_values()), + row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(), + as_hip_type(row_permuted->get_values())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL); + GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); template void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *perm, const matrix::Csr *orig, matrix::Csr *row_permuted) - GKO_NOT_IMPLEMENTED; +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + hipLaunchKernelGGL(HIP_KERNEL_NAME(inv_row_ptr_permute_kernel), + count_num_blocks, default_block_size, 0, 0, num_rows, + perm, orig->get_const_row_ptrs(), + row_permuted->get_row_ptrs()); + components::prefix_sum(exec, row_permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + hipLaunchKernelGGL( + HIP_KERNEL_NAME(inv_row_permute_kernel), + copy_num_blocks, default_block_size, 0, 0, num_rows, perm, + orig->get_const_row_ptrs(), orig->get_const_col_idxs(), + as_hip_type(orig->get_const_values()), row_permuted->get_row_ptrs(), + row_permuted->get_col_idxs(), as_hip_type(row_permuted->get_values())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL); @@ -1002,10 +1036,20 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_column_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *perm, const matrix::Csr *orig, matrix::Csr *column_permuted) - GKO_NOT_IMPLEMENTED; +{ + auto num_rows = orig->get_size()[0]; + auto nnz = orig->get_num_stored_elements(); + auto num_blocks = ceildiv(std::max(num_rows, nnz), default_block_size); + hipLaunchKernelGGL( + HIP_KERNEL_NAME(col_permute_kernel), num_blocks, default_block_size, 0, + 0, num_rows, nnz, perm, orig->get_const_row_ptrs(), + orig->get_const_col_idxs(), as_hip_type(orig->get_const_values()), + column_permuted->get_row_ptrs(), column_permuted->get_col_idxs(), + as_hip_type(column_permuted->get_values())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL); diff --git a/hip/test/matrix/csr_kernels.hip.cpp b/hip/test/matrix/csr_kernels.hip.cpp index 99e6bca6f2a..510189ad70e 100644 --- a/hip/test/matrix/csr_kernels.hip.cpp +++ b/hip/test/matrix/csr_kernels.hip.cpp @@ -60,6 +60,7 @@ namespace { class Csr : public ::testing::Test { protected: + using Arr = gko::Array; using Vec = gko::matrix::Dense<>; using Mtx = gko::matrix::Csr<>; using ComplexVec = gko::matrix::Dense>; @@ -114,6 +115,18 @@ class Csr : public ::testing::Test { dalpha->copy_from(alpha.get()); dbeta = Vec::create(hip); dbeta->copy_from(beta.get()); + + std::vector tmp(mtx->get_size()[0], 0); + auto rng = std::default_random_engine{}; + std::iota(tmp.begin(), tmp.end(), 0); + std::shuffle(tmp.begin(), tmp.end(), rng); + std::vector tmp2(mtx->get_size()[1], 0); + std::iota(tmp2.begin(), tmp2.end(), 0); + std::shuffle(tmp2.begin(), tmp2.end(), rng); + rpermute_idxs = std::make_unique(ref, tmp.begin(), tmp.end()); + drpermute_idxs = std::make_unique(hip, tmp.begin(), tmp.end()); + cpermute_idxs = std::make_unique(ref, tmp2.begin(), tmp2.end()); + dcpermute_idxs = std::make_unique(hip, tmp2.begin(), tmp2.end()); } void set_up_apply_complex_data( @@ -179,6 +192,10 @@ class Csr : public ::testing::Test { std::unique_ptr dy; std::unique_ptr dalpha; std::unique_ptr dbeta; + std::unique_ptr rpermute_idxs; + std::unique_ptr drpermute_idxs; + std::unique_ptr cpermute_idxs; + std::unique_ptr dcpermute_idxs; }; @@ -687,6 +704,62 @@ TEST_F(Csr, MoveToHybridIsEquivalentToRef) } +TEST_F(Csr, IsRowPermutable) +{ + set_up_apply_data(std::make_shared()); + auto r_permute = gko::share(mtx->row_permute(rpermute_idxs.get())); + auto dr_permute = gko::share(dmtx->row_permute(drpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(r_permute), + gko::as(dr_permute)); + GKO_ASSERT_MTX_NEAR(gko::as(r_permute), gko::as(dr_permute), 0); +} + + +TEST_F(Csr, IsColPermutable) +{ + set_up_apply_data(std::make_shared()); + auto c_permute = gko::share(mtx->column_permute(cpermute_idxs.get())); + auto dc_permute = gko::share(dmtx->column_permute(dcpermute_idxs.get())); + + ASSERT_TRUE(gko::as(dc_permute)->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(c_permute), + gko::as(dc_permute)); + GKO_ASSERT_MTX_NEAR(gko::as(c_permute), gko::as(dc_permute), 0); +} + + +TEST_F(Csr, IsInverseRowPermutable) +{ + set_up_apply_data(std::make_shared()); + auto inverse_r_permute = + gko::share(mtx->inverse_row_permute(rpermute_idxs.get())); + auto d_inverse_r_permute = + gko::share(dmtx->inverse_row_permute(drpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(inverse_r_permute), + gko::as(d_inverse_r_permute)); + GKO_ASSERT_MTX_NEAR(gko::as(inverse_r_permute), + gko::as(d_inverse_r_permute), 0); +} + + +TEST_F(Csr, IsInverseColPermutable) +{ + set_up_apply_data(std::make_shared()); + auto inverse_c_permute = + gko::share(mtx->inverse_column_permute(cpermute_idxs.get())); + auto d_inverse_c_permute = + gko::share(dmtx->inverse_column_permute(dcpermute_idxs.get())); + + ASSERT_TRUE(gko::as(d_inverse_c_permute)->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(inverse_c_permute), + gko::as(d_inverse_c_permute)); + GKO_ASSERT_MTX_NEAR(gko::as(inverse_c_permute), + gko::as(d_inverse_c_permute), 0); +} + + TEST_F(Csr, RecognizeSortedMatrixIsEquivalentToRef) { set_up_apply_data(std::make_shared(hip)); diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index a4d467a0693..32007104cc1 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -598,13 +598,25 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CONVERT_TO_HYBRID_KERNEL); +template +void invert_permutation(std::shared_ptr exec, + size_type size, const IndexType *permutation_indices, + IndexType *inv_permutation) +{ +#pragma omp parallel for + for (size_type i = 0; i < size; ++i) { + inv_permutation[permutation_indices[i]] = i; + } +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); + + template -void row_permute_impl(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *row_permuted) +void row_permute(std::shared_ptr exec, const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *row_permuted) { - auto perm = permutation_indices->get_const_data(); auto orig_row_ptrs = orig->get_const_row_ptrs(); auto orig_col_idxs = orig->get_const_col_idxs(); auto orig_vals = orig->get_const_values(); @@ -614,60 +626,65 @@ void row_permute_impl(std::shared_ptr exec, size_type num_rows = orig->get_size()[0]; size_type num_nnz = orig->get_num_stored_elements(); - size_type cur_ptr = 0; - rp_row_ptrs[0] = cur_ptr; - vector orig_num_nnz_per_row(num_rows, 0, exec); #pragma omp parallel for for (size_type row = 0; row < num_rows; ++row) { - orig_num_nnz_per_row[row] = orig_row_ptrs[row + 1] - orig_row_ptrs[row]; + auto src_row = perm[row]; + auto dst_row = row; + rp_row_ptrs[dst_row] = + orig_row_ptrs[src_row + 1] - orig_row_ptrs[src_row]; } - for (size_type row = 0; row < num_rows; ++row) { - rp_row_ptrs[row + 1] = - rp_row_ptrs[row] + orig_num_nnz_per_row[perm[row]]; - } - rp_row_ptrs[num_rows] = orig_row_ptrs[num_rows]; + components::prefix_sum(exec, rp_row_ptrs, num_rows + 1); #pragma omp parallel for for (size_type row = 0; row < num_rows; ++row) { - auto new_row = perm[row]; - auto new_k = orig_row_ptrs[new_row]; - for (size_type k = rp_row_ptrs[row]; - k < size_type(rp_row_ptrs[row + 1]); ++k) { - rp_col_idxs[k] = orig_col_idxs[new_k]; - rp_vals[k] = orig_vals[new_k]; - new_k++; - } + auto src_row = perm[row]; + auto dst_row = row; + auto src_begin = orig_row_ptrs[src_row]; + auto dst_begin = rp_row_ptrs[dst_row]; + auto row_size = orig_row_ptrs[src_row + 1] - src_begin; + std::copy_n(orig_col_idxs + src_begin, row_size, + rp_col_idxs + dst_begin); + std::copy_n(orig_vals + src_begin, row_size, rp_vals + dst_begin); } } - -template -void row_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *row_permuted) -{ - row_permute_impl(exec, permutation_indices, orig, row_permuted); -} - GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); template void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *perm, const matrix::Csr *orig, matrix::Csr *row_permuted) { - auto perm = permutation_indices->get_const_data(); - Array inv_perm(*permutation_indices); - auto iperm = inv_perm.get_data(); + auto orig_row_ptrs = orig->get_const_row_ptrs(); + auto orig_col_idxs = orig->get_const_col_idxs(); + auto orig_vals = orig->get_const_values(); + auto rp_row_ptrs = row_permuted->get_row_ptrs(); + auto rp_col_idxs = row_permuted->get_col_idxs(); + auto rp_vals = row_permuted->get_values(); + size_type num_rows = orig->get_size()[0]; + size_type num_nnz = orig->get_num_stored_elements(); + #pragma omp parallel for - for (size_type ind = 0; ind < inv_perm.get_num_elems(); ++ind) { - iperm[perm[ind]] = ind; + for (size_type row = 0; row < num_rows; ++row) { + auto src_row = row; + auto dst_row = perm[row]; + rp_row_ptrs[dst_row] = + orig_row_ptrs[src_row + 1] - orig_row_ptrs[src_row]; + } + components::prefix_sum(exec, rp_row_ptrs, num_rows + 1); +#pragma omp parallel for + for (size_type row = 0; row < num_rows; ++row) { + auto src_row = row; + auto dst_row = perm[row]; + auto src_begin = orig_row_ptrs[src_row]; + auto dst_begin = rp_row_ptrs[dst_row]; + auto row_size = orig_row_ptrs[src_row + 1] - src_begin; + std::copy_n(orig_col_idxs + src_begin, row_size, + rp_col_idxs + dst_begin); + std::copy_n(orig_vals + src_begin, row_size, rp_vals + dst_begin); } - - row_permute_impl(exec, &inv_perm, orig, row_permuted); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -675,26 +692,23 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void column_permute_impl(const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *column_permuted) +void inverse_column_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *column_permuted) { - auto perm = permutation_indices->get_const_data(); auto orig_row_ptrs = orig->get_const_row_ptrs(); auto orig_col_idxs = orig->get_const_col_idxs(); auto orig_vals = orig->get_const_values(); auto cp_row_ptrs = column_permuted->get_row_ptrs(); auto cp_col_idxs = column_permuted->get_col_idxs(); auto cp_vals = column_permuted->get_values(); - auto num_nnz = orig->get_num_stored_elements(); size_type num_rows = orig->get_size()[0]; - size_type num_cols = orig->get_size()[1]; #pragma omp parallel for for (size_type row = 0; row < num_rows; ++row) { cp_row_ptrs[row] = orig_row_ptrs[row]; - for (size_type k = orig_row_ptrs[row]; - k < size_type(orig_row_ptrs[row + 1]); ++k) { + for (auto k = orig_row_ptrs[row]; k < orig_row_ptrs[row + 1]; ++k) { cp_col_idxs[k] = perm[orig_col_idxs[k]]; cp_vals[k] = orig_vals[k]; } @@ -702,36 +716,6 @@ void column_permute_impl(const Array *permutation_indices, cp_row_ptrs[num_rows] = orig_row_ptrs[num_rows]; } - -template -void column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *column_permuted) -{ - auto perm = permutation_indices->get_const_data(); - Array inv_perm(*permutation_indices); - auto iperm = inv_perm.get_data(); -#pragma omp parallel for - for (size_type ind = 0; ind < inv_perm.get_num_elems(); ++ind) { - iperm[perm[ind]] = ind; - } - column_permute_impl(&inv_perm, orig, column_permuted); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL); - - -template -void inverse_column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *column_permuted) -{ - column_permute_impl(permutation_indices, orig, column_permuted); -} - GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL); diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index 6462720003f..0dcfaac7844 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -126,14 +126,10 @@ class Csr : public ::testing::Test { std::vector tmp2(mtx->get_size()[1], 0); std::iota(tmp2.begin(), tmp2.end(), 0); std::shuffle(tmp2.begin(), tmp2.end(), rng); - rpermute_idxs = - std::unique_ptr(new Arr{ref, tmp.begin(), tmp.end()}); - drpermute_idxs = - std::unique_ptr(new Arr{omp, tmp.begin(), tmp.end()}); - cpermute_idxs = - std::unique_ptr(new Arr{ref, tmp2.begin(), tmp2.end()}); - dcpermute_idxs = - std::unique_ptr(new Arr{omp, tmp2.begin(), tmp2.end()}); + rpermute_idxs = std::make_unique(ref, tmp.begin(), tmp.end()); + drpermute_idxs = std::make_unique(omp, tmp.begin(), tmp.end()); + cpermute_idxs = std::make_unique(ref, tmp2.begin(), tmp2.end()); + dcpermute_idxs = std::make_unique(omp, tmp2.begin(), tmp2.end()); } struct matrix_pair { @@ -480,45 +476,56 @@ TEST_F(Csr, MoveToHybridIsEquivalentToRef) TEST_F(Csr, IsRowPermutable) { set_up_apply_data(); - auto r_permute = mtx->row_permute(rpermute_idxs.get()); - auto dr_permute = dmtx->row_permute(drpermute_idxs.get()); + auto r_permute = gko::share(mtx->row_permute(rpermute_idxs.get())); + auto dr_permute = gko::share(dmtx->row_permute(drpermute_idxs.get())); - GKO_ASSERT_MTX_NEAR(static_cast(r_permute.get()), - static_cast(dr_permute.get()), 0); + GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(r_permute), + gko::as(dr_permute)); + GKO_ASSERT_MTX_NEAR(gko::as(r_permute), gko::as(dr_permute), 0); } TEST_F(Csr, IsColPermutable) { set_up_apply_data(); - auto c_permute = mtx->column_permute(cpermute_idxs.get()); - auto dc_permute = dmtx->column_permute(dcpermute_idxs.get()); + auto c_permute = gko::share(mtx->column_permute(cpermute_idxs.get())); + auto dc_permute = gko::share(dmtx->column_permute(dcpermute_idxs.get())); - GKO_ASSERT_MTX_NEAR(static_cast(c_permute.get()), - static_cast(dc_permute.get()), 0); + ASSERT_TRUE(gko::as(dc_permute)->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(c_permute), + gko::as(dc_permute)); + GKO_ASSERT_MTX_NEAR(gko::as(c_permute), gko::as(dc_permute), 0); } TEST_F(Csr, IsInverseRowPermutable) { set_up_apply_data(); - auto inverse_r_permute = mtx->inverse_row_permute(rpermute_idxs.get()); - auto d_inverse_r_permute = dmtx->inverse_row_permute(drpermute_idxs.get()); + auto inverse_r_permute = + gko::share(mtx->inverse_row_permute(rpermute_idxs.get())); + auto d_inverse_r_permute = + gko::share(dmtx->inverse_row_permute(drpermute_idxs.get())); - GKO_ASSERT_MTX_NEAR(static_cast(inverse_r_permute.get()), - static_cast(d_inverse_r_permute.get()), 0); + GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(inverse_r_permute), + gko::as(d_inverse_r_permute)); + GKO_ASSERT_MTX_NEAR(gko::as(inverse_r_permute), + gko::as(d_inverse_r_permute), 0); } TEST_F(Csr, IsInverseColPermutable) { set_up_apply_data(); - auto inverse_c_permute = mtx->inverse_column_permute(cpermute_idxs.get()); + auto inverse_c_permute = + gko::share(mtx->inverse_column_permute(cpermute_idxs.get())); auto d_inverse_c_permute = - dmtx->inverse_column_permute(dcpermute_idxs.get()); + gko::share(dmtx->inverse_column_permute(dcpermute_idxs.get())); - GKO_ASSERT_MTX_NEAR(static_cast(inverse_c_permute.get()), - static_cast(d_inverse_c_permute.get()), 0); + ASSERT_TRUE(gko::as(d_inverse_c_permute)->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(inverse_c_permute), + gko::as(d_inverse_c_permute)); + GKO_ASSERT_MTX_NEAR(gko::as(inverse_c_permute), + gko::as(d_inverse_c_permute), 0); } diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index e5d5b7ce151..1be2ae79cb5 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -689,73 +689,85 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CONVERT_TO_HYBRID_KERNEL); +template +void invert_permutation(std::shared_ptr exec, + size_type size, const IndexType *permutation_indices, + IndexType *inv_permutation) +{ + for (size_type i = 0; i < size; ++i) { + inv_permutation[permutation_indices[i]] = i; + } +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); + + template -void row_permute_impl(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *row_permuted) +void row_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *row_permuted) { - auto perm = permutation_indices->get_const_data(); - auto orig_row_ptrs = orig->get_const_row_ptrs(); - auto orig_col_idxs = orig->get_const_col_idxs(); - auto orig_vals = orig->get_const_values(); + auto in_row_ptrs = orig->get_const_row_ptrs(); + auto in_col_idxs = orig->get_const_col_idxs(); + auto in_vals = orig->get_const_values(); auto rp_row_ptrs = row_permuted->get_row_ptrs(); auto rp_col_idxs = row_permuted->get_col_idxs(); auto rp_vals = row_permuted->get_values(); size_type num_rows = orig->get_size()[0]; size_type num_nnz = orig->get_num_stored_elements(); - size_type cur_ptr = 0; - rp_row_ptrs[0] = cur_ptr; - vector orig_num_nnz_per_row(num_rows, 0, exec); - for (size_type row = 0; row < num_rows; ++row) { - orig_num_nnz_per_row[row] = orig_row_ptrs[row + 1] - orig_row_ptrs[row]; - } for (size_type row = 0; row < num_rows; ++row) { - rp_row_ptrs[row + 1] = - rp_row_ptrs[row] + orig_num_nnz_per_row[perm[row]]; + auto src_row = perm[row]; + auto dst_row = row; + rp_row_ptrs[dst_row] = in_row_ptrs[src_row + 1] - in_row_ptrs[src_row]; } - rp_row_ptrs[num_rows] = orig_row_ptrs[num_rows]; + components::prefix_sum(exec, rp_row_ptrs, num_rows + 1); for (size_type row = 0; row < num_rows; ++row) { - auto new_row = perm[row]; - auto new_k = orig_row_ptrs[new_row]; - for (size_type k = rp_row_ptrs[row]; - k < size_type(rp_row_ptrs[row + 1]); ++k) { - rp_col_idxs[k] = orig_col_idxs[new_k]; - rp_vals[k] = orig_vals[new_k]; - new_k++; - } + auto src_row = perm[row]; + auto dst_row = row; + auto src_begin = in_row_ptrs[src_row]; + auto dst_begin = rp_row_ptrs[dst_row]; + auto row_size = in_row_ptrs[src_row + 1] - src_begin; + std::copy_n(in_col_idxs + src_begin, row_size, rp_col_idxs + dst_begin); + std::copy_n(in_vals + src_begin, row_size, rp_vals + dst_begin); } } - -template -void row_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *row_permuted) -{ - row_permute_impl(exec, permutation_indices, orig, row_permuted); -} - GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); template void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *perm, const matrix::Csr *orig, matrix::Csr *row_permuted) { - auto perm = permutation_indices->get_const_data(); - Array inv_perm(*permutation_indices); - auto iperm = inv_perm.get_data(); - for (size_type ind = 0; ind < inv_perm.get_num_elems(); ++ind) { - iperm[perm[ind]] = ind; - } + auto in_row_ptrs = orig->get_const_row_ptrs(); + auto in_col_idxs = orig->get_const_col_idxs(); + auto in_vals = orig->get_const_values(); + auto rp_row_ptrs = row_permuted->get_row_ptrs(); + auto rp_col_idxs = row_permuted->get_col_idxs(); + auto rp_vals = row_permuted->get_values(); + size_type num_rows = orig->get_size()[0]; + size_type num_nnz = orig->get_num_stored_elements(); - row_permute_impl(exec, &inv_perm, orig, row_permuted); + for (size_type row = 0; row < num_rows; ++row) { + auto src_row = row; + auto dst_row = perm[row]; + rp_row_ptrs[dst_row] = in_row_ptrs[src_row + 1] - in_row_ptrs[src_row]; + } + components::prefix_sum(exec, rp_row_ptrs, num_rows + 1); + for (size_type row = 0; row < num_rows; ++row) { + auto src_row = row; + auto dst_row = perm[row]; + auto src_begin = in_row_ptrs[src_row]; + auto dst_begin = rp_row_ptrs[dst_row]; + auto row_size = in_row_ptrs[src_row + 1] - src_begin; + std::copy_n(in_col_idxs + src_begin, row_size, rp_col_idxs + dst_begin); + std::copy_n(in_vals + src_begin, row_size, rp_vals + dst_begin); + } } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -763,59 +775,29 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void column_permute_impl(const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *column_permuted) +void inverse_column_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *column_permuted) { - auto perm = permutation_indices->get_const_data(); - auto orig_row_ptrs = orig->get_const_row_ptrs(); - auto orig_col_idxs = orig->get_const_col_idxs(); - auto orig_vals = orig->get_const_values(); + auto in_row_ptrs = orig->get_const_row_ptrs(); + auto in_col_idxs = orig->get_const_col_idxs(); + auto in_vals = orig->get_const_values(); auto cp_row_ptrs = column_permuted->get_row_ptrs(); auto cp_col_idxs = column_permuted->get_col_idxs(); auto cp_vals = column_permuted->get_values(); - auto num_nnz = orig->get_num_stored_elements(); - size_type num_rows = orig->get_size()[0]; - size_type num_cols = orig->get_size()[1]; + auto num_rows = orig->get_size()[0]; for (size_type row = 0; row < num_rows; ++row) { - cp_row_ptrs[row] = orig_row_ptrs[row]; - for (size_type k = orig_row_ptrs[row]; - k < size_type(orig_row_ptrs[row + 1]); ++k) { - cp_col_idxs[k] = perm[orig_col_idxs[k]]; - cp_vals[k] = orig_vals[k]; + auto row_begin = in_row_ptrs[row]; + auto row_end = in_row_ptrs[row + 1]; + cp_row_ptrs[row] = in_row_ptrs[row]; + for (auto k = row_begin; k < row_end; ++k) { + cp_col_idxs[k] = perm[in_col_idxs[k]]; + cp_vals[k] = in_vals[k]; } } - cp_row_ptrs[num_rows] = orig_row_ptrs[num_rows]; -} - - -template -void column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *column_permuted) -{ - auto perm = permutation_indices->get_const_data(); - Array inv_perm(*permutation_indices); - auto iperm = inv_perm.get_data(); - for (size_type ind = 0; ind < inv_perm.get_num_elems(); ++ind) { - iperm[perm[ind]] = ind; - } - column_permute_impl(&inv_perm, orig, column_permuted); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL); - - -template -void inverse_column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *column_permuted) -{ - column_permute_impl(permutation_indices, orig, column_permuted); + cp_row_ptrs[num_rows] = in_row_ptrs[num_rows]; } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( From 165f65f94a909f6fd11b6e017b1043edc0b3d23a Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Mon, 26 Oct 2020 12:20:05 +0100 Subject: [PATCH 2/3] improve code style and fix sonarqube warnings --- cuda/test/matrix/csr_kernels.cpp | 65 +++++++++++++-------------- hip/test/matrix/csr_kernels.hip.cpp | 65 +++++++++++++-------------- omp/matrix/csr_kernels.cpp | 4 +- omp/test/matrix/csr_kernels.cpp | 65 +++++++++++++-------------- reference/matrix/csr_kernels.cpp | 4 +- reference/test/matrix/csr_kernels.cpp | 43 +++++++----------- 6 files changed, 107 insertions(+), 139 deletions(-) diff --git a/cuda/test/matrix/csr_kernels.cpp b/cuda/test/matrix/csr_kernels.cpp index 38af50b1c21..f0a1416b6bd 100644 --- a/cuda/test/matrix/csr_kernels.cpp +++ b/cuda/test/matrix/csr_kernels.cpp @@ -463,12 +463,11 @@ TEST_F(Csr, TransposeIsEquivalentToRef) { set_up_apply_data(std::make_shared(cuda)); - auto trans = mtx->transpose(); - auto d_trans = dmtx->transpose(); + auto trans = gko::as(mtx->transpose()); + auto d_trans = gko::as(dmtx->transpose()); - GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), - static_cast(trans.get()), 0.0); - ASSERT_TRUE(static_cast(d_trans.get())->is_sorted_by_column_index()); + GKO_ASSERT_MTX_NEAR(d_trans, trans, 0.0); + ASSERT_TRUE(d_trans->is_sorted_by_column_index()); } @@ -476,13 +475,11 @@ TEST_F(Csr, ConjugateTransposeIsEquivalentToRef) { set_up_apply_complex_data(std::make_shared(cuda)); - auto trans = complex_mtx->conj_transpose(); - auto d_trans = complex_dmtx->conj_transpose(); + auto trans = gko::as(complex_mtx->conj_transpose()); + auto d_trans = gko::as(complex_dmtx->conj_transpose()); - GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), - static_cast(trans.get()), 0.0); - ASSERT_TRUE( - static_cast(d_trans.get())->is_sorted_by_column_index()); + GKO_ASSERT_MTX_NEAR(d_trans, trans, 0.0); + ASSERT_TRUE(d_trans->is_sorted_by_column_index()); } @@ -708,56 +705,54 @@ TEST_F(Csr, MoveToHybridIsEquivalentToRef) TEST_F(Csr, IsRowPermutable) { set_up_apply_data(std::make_shared()); - auto r_permute = gko::share(mtx->row_permute(rpermute_idxs.get())); - auto dr_permute = gko::share(dmtx->row_permute(drpermute_idxs.get())); - GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(r_permute), - gko::as(dr_permute)); - GKO_ASSERT_MTX_NEAR(gko::as(r_permute), gko::as(dr_permute), 0); + auto r_permute = gko::as(mtx->row_permute(rpermute_idxs.get())); + auto dr_permute = gko::as(dmtx->row_permute(drpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(r_permute, dr_permute); + GKO_ASSERT_MTX_NEAR(r_permute, dr_permute, 0); } TEST_F(Csr, IsColPermutable) { set_up_apply_data(std::make_shared()); - auto c_permute = gko::share(mtx->column_permute(cpermute_idxs.get())); - auto dc_permute = gko::share(dmtx->column_permute(dcpermute_idxs.get())); - ASSERT_TRUE(gko::as(dc_permute)->is_sorted_by_column_index()); - GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(c_permute), - gko::as(dc_permute)); - GKO_ASSERT_MTX_NEAR(gko::as(c_permute), gko::as(dc_permute), 0); + auto c_permute = gko::as(mtx->column_permute(cpermute_idxs.get())); + auto dc_permute = gko::as(dmtx->column_permute(dcpermute_idxs.get())); + + ASSERT_TRUE(dc_permute->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(c_permute, dc_permute); + GKO_ASSERT_MTX_NEAR(c_permute, dc_permute, 0); } TEST_F(Csr, IsInverseRowPermutable) { set_up_apply_data(std::make_shared()); + auto inverse_r_permute = - gko::share(mtx->inverse_row_permute(rpermute_idxs.get())); + gko::as(mtx->inverse_row_permute(rpermute_idxs.get())); auto d_inverse_r_permute = - gko::share(dmtx->inverse_row_permute(drpermute_idxs.get())); + gko::as(dmtx->inverse_row_permute(drpermute_idxs.get())); - GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(inverse_r_permute), - gko::as(d_inverse_r_permute)); - GKO_ASSERT_MTX_NEAR(gko::as(inverse_r_permute), - gko::as(d_inverse_r_permute), 0); + GKO_ASSERT_MTX_EQ_SPARSITY(inverse_r_permute, d_inverse_r_permute); + GKO_ASSERT_MTX_NEAR(inverse_r_permute, d_inverse_r_permute, 0); } TEST_F(Csr, IsInverseColPermutable) { set_up_apply_data(std::make_shared()); + auto inverse_c_permute = - gko::share(mtx->inverse_column_permute(cpermute_idxs.get())); + gko::as(mtx->inverse_column_permute(cpermute_idxs.get())); auto d_inverse_c_permute = - gko::share(dmtx->inverse_column_permute(dcpermute_idxs.get())); + gko::as(dmtx->inverse_column_permute(dcpermute_idxs.get())); - ASSERT_TRUE(gko::as(d_inverse_c_permute)->is_sorted_by_column_index()); - GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(inverse_c_permute), - gko::as(d_inverse_c_permute)); - GKO_ASSERT_MTX_NEAR(gko::as(inverse_c_permute), - gko::as(d_inverse_c_permute), 0); + ASSERT_TRUE(d_inverse_c_permute->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(inverse_c_permute, d_inverse_c_permute); + GKO_ASSERT_MTX_NEAR(inverse_c_permute, d_inverse_c_permute, 0); } diff --git a/hip/test/matrix/csr_kernels.hip.cpp b/hip/test/matrix/csr_kernels.hip.cpp index 510189ad70e..1fb01640dfa 100644 --- a/hip/test/matrix/csr_kernels.hip.cpp +++ b/hip/test/matrix/csr_kernels.hip.cpp @@ -462,12 +462,11 @@ TEST_F(Csr, TransposeIsEquivalentToRef) { set_up_apply_data(std::make_shared(hip)); - auto trans = mtx->transpose(); - auto d_trans = dmtx->transpose(); + auto trans = gko::as(mtx->transpose()); + auto d_trans = gko::as(dmtx->transpose()); - GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), - static_cast(trans.get()), 0.0); - ASSERT_TRUE(static_cast(d_trans.get())->is_sorted_by_column_index()); + GKO_ASSERT_MTX_NEAR(d_trans, trans, 0.0); + ASSERT_TRUE(d_trans->is_sorted_by_column_index()); } @@ -475,13 +474,11 @@ TEST_F(Csr, ConjugateTransposeIsEquivalentToRef) { set_up_apply_data(std::make_shared(hip)); - auto ctrans = mtx->conj_transpose(); - auto d_ctrans = dmtx->conj_transpose(); + auto trans = gko::as(mtx->conj_transpose()); + auto d_trans = gko::as(dmtx->conj_transpose()); - GKO_ASSERT_MTX_NEAR(static_cast(d_ctrans.get()), - static_cast(ctrans.get()), 0.0); - ASSERT_TRUE( - static_cast(d_ctrans.get())->is_sorted_by_column_index()); + GKO_ASSERT_MTX_NEAR(d_trans, trans, 0.0); + ASSERT_TRUE(d_trans->is_sorted_by_column_index()); } @@ -707,56 +704,54 @@ TEST_F(Csr, MoveToHybridIsEquivalentToRef) TEST_F(Csr, IsRowPermutable) { set_up_apply_data(std::make_shared()); - auto r_permute = gko::share(mtx->row_permute(rpermute_idxs.get())); - auto dr_permute = gko::share(dmtx->row_permute(drpermute_idxs.get())); - GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(r_permute), - gko::as(dr_permute)); - GKO_ASSERT_MTX_NEAR(gko::as(r_permute), gko::as(dr_permute), 0); + auto r_permute = gko::as(mtx->row_permute(rpermute_idxs.get())); + auto dr_permute = gko::as(dmtx->row_permute(drpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(r_permute, dr_permute); + GKO_ASSERT_MTX_NEAR(r_permute, dr_permute, 0); } TEST_F(Csr, IsColPermutable) { set_up_apply_data(std::make_shared()); - auto c_permute = gko::share(mtx->column_permute(cpermute_idxs.get())); - auto dc_permute = gko::share(dmtx->column_permute(dcpermute_idxs.get())); - ASSERT_TRUE(gko::as(dc_permute)->is_sorted_by_column_index()); - GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(c_permute), - gko::as(dc_permute)); - GKO_ASSERT_MTX_NEAR(gko::as(c_permute), gko::as(dc_permute), 0); + auto c_permute = gko::as(mtx->column_permute(cpermute_idxs.get())); + auto dc_permute = gko::as(dmtx->column_permute(dcpermute_idxs.get())); + + ASSERT_TRUE(dc_permute->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(c_permute, dc_permute); + GKO_ASSERT_MTX_NEAR(c_permute, dc_permute, 0); } TEST_F(Csr, IsInverseRowPermutable) { set_up_apply_data(std::make_shared()); + auto inverse_r_permute = - gko::share(mtx->inverse_row_permute(rpermute_idxs.get())); + gko::as(mtx->inverse_row_permute(rpermute_idxs.get())); auto d_inverse_r_permute = - gko::share(dmtx->inverse_row_permute(drpermute_idxs.get())); + gko::as(dmtx->inverse_row_permute(drpermute_idxs.get())); - GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(inverse_r_permute), - gko::as(d_inverse_r_permute)); - GKO_ASSERT_MTX_NEAR(gko::as(inverse_r_permute), - gko::as(d_inverse_r_permute), 0); + GKO_ASSERT_MTX_EQ_SPARSITY(inverse_r_permute, d_inverse_r_permute); + GKO_ASSERT_MTX_NEAR(inverse_r_permute, d_inverse_r_permute, 0); } TEST_F(Csr, IsInverseColPermutable) { set_up_apply_data(std::make_shared()); + auto inverse_c_permute = - gko::share(mtx->inverse_column_permute(cpermute_idxs.get())); + gko::as(mtx->inverse_column_permute(cpermute_idxs.get())); auto d_inverse_c_permute = - gko::share(dmtx->inverse_column_permute(dcpermute_idxs.get())); + gko::as(dmtx->inverse_column_permute(dcpermute_idxs.get())); - ASSERT_TRUE(gko::as(d_inverse_c_permute)->is_sorted_by_column_index()); - GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(inverse_c_permute), - gko::as(d_inverse_c_permute)); - GKO_ASSERT_MTX_NEAR(gko::as(inverse_c_permute), - gko::as(d_inverse_c_permute), 0); + ASSERT_TRUE(d_inverse_c_permute->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(inverse_c_permute, d_inverse_c_permute); + GKO_ASSERT_MTX_NEAR(inverse_c_permute, d_inverse_c_permute, 0); } diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 32007104cc1..0f681fa409b 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -604,7 +604,7 @@ void invert_permutation(std::shared_ptr exec, IndexType *inv_permutation) { #pragma omp parallel for - for (size_type i = 0; i < size; ++i) { + for (IndexType i = 0; i < static_cast(size); ++i) { inv_permutation[permutation_indices[i]] = i; } } @@ -624,7 +624,6 @@ void row_permute(std::shared_ptr exec, const IndexType *perm, auto rp_col_idxs = row_permuted->get_col_idxs(); auto rp_vals = row_permuted->get_values(); size_type num_rows = orig->get_size()[0]; - size_type num_nnz = orig->get_num_stored_elements(); #pragma omp parallel for for (size_type row = 0; row < num_rows; ++row) { @@ -664,7 +663,6 @@ void inverse_row_permute(std::shared_ptr exec, auto rp_col_idxs = row_permuted->get_col_idxs(); auto rp_vals = row_permuted->get_values(); size_type num_rows = orig->get_size()[0]; - size_type num_nnz = orig->get_num_stored_elements(); #pragma omp parallel for for (size_type row = 0; row < num_rows; ++row) { diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index 0dcfaac7844..064a129b9eb 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -325,12 +325,11 @@ TEST_F(Csr, TransposeIsEquivalentToRef) { set_up_apply_data(); - auto trans = mtx->transpose(); - auto d_trans = dmtx->transpose(); + auto trans = gko::as(mtx->transpose()); + auto d_trans = gko::as(dmtx->transpose()); - GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), - static_cast(trans.get()), 0.0); - ASSERT_TRUE(static_cast(d_trans.get())->is_sorted_by_column_index()); + GKO_ASSERT_MTX_NEAR(d_trans, trans, 0.0); + ASSERT_TRUE(d_trans->is_sorted_by_column_index()); } @@ -338,13 +337,11 @@ TEST_F(Csr, ConjugateTransposeIsEquivalentToRef) { set_up_apply_data(); - auto trans = complex_mtx->conj_transpose(); - auto d_trans = complex_dmtx->conj_transpose(); + auto trans = gko::as(complex_mtx->conj_transpose()); + auto d_trans = gko::as(complex_dmtx->conj_transpose()); - GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), - static_cast(trans.get()), 0.0); - ASSERT_TRUE( - static_cast(d_trans.get())->is_sorted_by_column_index()); + GKO_ASSERT_MTX_NEAR(d_trans, trans, 0.0); + ASSERT_TRUE(d_trans->is_sorted_by_column_index()); } @@ -476,56 +473,54 @@ TEST_F(Csr, MoveToHybridIsEquivalentToRef) TEST_F(Csr, IsRowPermutable) { set_up_apply_data(); - auto r_permute = gko::share(mtx->row_permute(rpermute_idxs.get())); - auto dr_permute = gko::share(dmtx->row_permute(drpermute_idxs.get())); - GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(r_permute), - gko::as(dr_permute)); - GKO_ASSERT_MTX_NEAR(gko::as(r_permute), gko::as(dr_permute), 0); + auto r_permute = gko::as(mtx->row_permute(rpermute_idxs.get())); + auto dr_permute = gko::as(dmtx->row_permute(drpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(r_permute, dr_permute); + GKO_ASSERT_MTX_NEAR(r_permute, dr_permute, 0); } TEST_F(Csr, IsColPermutable) { set_up_apply_data(); - auto c_permute = gko::share(mtx->column_permute(cpermute_idxs.get())); - auto dc_permute = gko::share(dmtx->column_permute(dcpermute_idxs.get())); - ASSERT_TRUE(gko::as(dc_permute)->is_sorted_by_column_index()); - GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(c_permute), - gko::as(dc_permute)); - GKO_ASSERT_MTX_NEAR(gko::as(c_permute), gko::as(dc_permute), 0); + auto c_permute = gko::as(mtx->column_permute(cpermute_idxs.get())); + auto dc_permute = gko::as(dmtx->column_permute(dcpermute_idxs.get())); + + ASSERT_TRUE(dc_permute->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(c_permute, dc_permute); + GKO_ASSERT_MTX_NEAR(c_permute, dc_permute, 0); } TEST_F(Csr, IsInverseRowPermutable) { set_up_apply_data(); + auto inverse_r_permute = - gko::share(mtx->inverse_row_permute(rpermute_idxs.get())); + gko::as(mtx->inverse_row_permute(rpermute_idxs.get())); auto d_inverse_r_permute = - gko::share(dmtx->inverse_row_permute(drpermute_idxs.get())); + gko::as(dmtx->inverse_row_permute(drpermute_idxs.get())); - GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(inverse_r_permute), - gko::as(d_inverse_r_permute)); - GKO_ASSERT_MTX_NEAR(gko::as(inverse_r_permute), - gko::as(d_inverse_r_permute), 0); + GKO_ASSERT_MTX_EQ_SPARSITY(inverse_r_permute, d_inverse_r_permute); + GKO_ASSERT_MTX_NEAR(inverse_r_permute, d_inverse_r_permute, 0); } TEST_F(Csr, IsInverseColPermutable) { set_up_apply_data(); + auto inverse_c_permute = - gko::share(mtx->inverse_column_permute(cpermute_idxs.get())); + gko::as(mtx->inverse_column_permute(cpermute_idxs.get())); auto d_inverse_c_permute = - gko::share(dmtx->inverse_column_permute(dcpermute_idxs.get())); + gko::as(dmtx->inverse_column_permute(dcpermute_idxs.get())); - ASSERT_TRUE(gko::as(d_inverse_c_permute)->is_sorted_by_column_index()); - GKO_ASSERT_MTX_EQ_SPARSITY(gko::as(inverse_c_permute), - gko::as(d_inverse_c_permute)); - GKO_ASSERT_MTX_NEAR(gko::as(inverse_c_permute), - gko::as(d_inverse_c_permute), 0); + ASSERT_TRUE(d_inverse_c_permute->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(inverse_c_permute, d_inverse_c_permute); + GKO_ASSERT_MTX_NEAR(inverse_c_permute, d_inverse_c_permute, 0); } diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 1be2ae79cb5..b22d7e3d66e 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -694,7 +694,7 @@ void invert_permutation(std::shared_ptr exec, size_type size, const IndexType *permutation_indices, IndexType *inv_permutation) { - for (size_type i = 0; i < size; ++i) { + for (IndexType i = 0; i < static_cast(size); ++i) { inv_permutation[permutation_indices[i]] = i; } } @@ -715,7 +715,6 @@ void row_permute(std::shared_ptr exec, auto rp_col_idxs = row_permuted->get_col_idxs(); auto rp_vals = row_permuted->get_values(); size_type num_rows = orig->get_size()[0]; - size_type num_nnz = orig->get_num_stored_elements(); for (size_type row = 0; row < num_rows; ++row) { auto src_row = perm[row]; @@ -751,7 +750,6 @@ void inverse_row_permute(std::shared_ptr exec, auto rp_col_idxs = row_permuted->get_col_idxs(); auto rp_vals = row_permuted->get_values(); size_type num_rows = orig->get_size()[0]; - size_type num_nnz = orig->get_num_stored_elements(); for (size_type row = 0; row < num_rows; ++row) { auto src_row = row; diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index 3cedeba21d4..dfeb1b7ace3 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -1042,8 +1042,7 @@ TYPED_TEST(Csr, SquareMtxIsTransposable) {0.0, 1.5, 2.0}}, this->exec); // clang-format on - auto trans = mtx2->transpose(); - auto trans_as_csr = static_cast(trans.get()); + auto trans_as_csr = gko::as(mtx2->transpose()); // clang-format off GKO_ASSERT_MTX_NEAR(trans_as_csr, @@ -1057,8 +1056,7 @@ TYPED_TEST(Csr, SquareMtxIsTransposable) TYPED_TEST(Csr, NonSquareMtxIsTransposable) { using Csr = typename TestFixture::Mtx; - auto trans = this->mtx->transpose(); - auto trans_as_csr = static_cast(trans.get()); + auto trans_as_csr = gko::as(this->mtx->transpose()); // clang-format off GKO_ASSERT_MTX_NEAR(trans_as_csr, @@ -1080,9 +1078,8 @@ TYPED_TEST(Csr, SquareMatrixIsRowPermutable) // clang-format on gko::Array permute_idxs{this->exec, {1, 2, 0}}; - auto row_permute = p_mtx->row_permute(&permute_idxs); + auto row_permute_csr = gko::as(p_mtx->row_permute(&permute_idxs)); - auto row_permute_csr = static_cast(row_permute.get()); // clang-format off GKO_ASSERT_MTX_NEAR(row_permute_csr, l({{0.0, 5.0, 0.0}, @@ -1103,9 +1100,8 @@ TYPED_TEST(Csr, NonSquareMatrixIsRowPermutable) // clang-format on gko::Array permute_idxs{this->exec, {1, 0}}; - auto row_permute = p_mtx->row_permute(&permute_idxs); + auto row_permute_csr = gko::as(p_mtx->row_permute(&permute_idxs)); - auto row_permute_csr = static_cast(row_permute.get()); // clang-format off GKO_ASSERT_MTX_NEAR(row_permute_csr, l({{0.0, 5.0, 0.0}, @@ -1126,9 +1122,8 @@ TYPED_TEST(Csr, SquareMatrixIsColPermutable) // clang-format on gko::Array permute_idxs{this->exec, {1, 2, 0}}; - auto c_permute = p_mtx->column_permute(&permute_idxs); + auto c_permute_csr = gko::as(p_mtx->column_permute(&permute_idxs)); - auto c_permute_csr = static_cast(c_permute.get()); // clang-format off GKO_ASSERT_MTX_NEAR(c_permute_csr, l({{3.0, 2.0, 1.0}, @@ -1149,9 +1144,8 @@ TYPED_TEST(Csr, NonSquareMatrixIsColPermutable) // clang-format on gko::Array permute_idxs{this->exec, {1, 2, 0}}; - auto c_permute = p_mtx->column_permute(&permute_idxs); + auto c_permute_csr = gko::as(p_mtx->column_permute(&permute_idxs)); - auto c_permute_csr = static_cast(c_permute.get()); // clang-format off GKO_ASSERT_MTX_NEAR(c_permute_csr, l({{0.0, 2.0, 1.0}, @@ -1172,11 +1166,9 @@ TYPED_TEST(Csr, SquareMatrixIsInverseRowPermutable) // clang-format on gko::Array inverse_permute_idxs{this->exec, {1, 2, 0}}; - auto inverse_row_permute = - inverse_p_mtx->inverse_row_permute(&inverse_permute_idxs); - auto inverse_row_permute_csr = - static_cast(inverse_row_permute.get()); + gko::as(inverse_p_mtx->inverse_row_permute(&inverse_permute_idxs)); + // clang-format off GKO_ASSERT_MTX_NEAR(inverse_row_permute_csr, l({{0.0, 1.5, 2.0}, @@ -1197,11 +1189,9 @@ TYPED_TEST(Csr, NonSquareMatrixIsInverseRowPermutable) // clang-format on gko::Array inverse_permute_idxs{this->exec, {1, 0}}; - auto inverse_row_permute = - inverse_p_mtx->inverse_row_permute(&inverse_permute_idxs); - auto inverse_row_permute_csr = - static_cast(inverse_row_permute.get()); + gko::as(inverse_p_mtx->inverse_row_permute(&inverse_permute_idxs)); + // clang-format off GKO_ASSERT_MTX_NEAR(inverse_row_permute_csr, l({{0.0, 5.0, 0.0}, @@ -1222,10 +1212,9 @@ TYPED_TEST(Csr, SquareMatrixIsInverseColPermutable) // clang-format on gko::Array inverse_permute_idxs{this->exec, {1, 2, 0}}; - auto inverse_c_permute = - inverse_p_mtx->inverse_column_permute(&inverse_permute_idxs); + auto inverse_c_permute_csr = gko::as( + inverse_p_mtx->inverse_column_permute(&inverse_permute_idxs)); - auto inverse_c_permute_csr = static_cast(inverse_c_permute.get()); // clang-format off GKO_ASSERT_MTX_NEAR(inverse_c_permute_csr, l({{2.0, 1.0, 3.0}, @@ -1246,10 +1235,9 @@ TYPED_TEST(Csr, NonSquareMatrixIsInverseColPermutable) // clang-format on gko::Array inverse_permute_idxs{this->exec, {1, 2, 0}}; - auto inverse_c_permute = - inverse_p_mtx->inverse_column_permute(&inverse_permute_idxs); + auto inverse_c_permute_csr = gko::as( + inverse_p_mtx->inverse_column_permute(&inverse_permute_idxs)); - auto inverse_c_permute_csr = static_cast(inverse_c_permute.get()); // clang-format off GKO_ASSERT_MTX_NEAR(inverse_c_permute_csr, l({{2.0, 1.0, 3.0}, @@ -1418,8 +1406,7 @@ TYPED_TEST(CsrComplex, MtxIsConjugateTransposable) {T{0.0, 0.0}, T{0.0, 1.5}, T{2.0,0.0}}}, exec); // clang-format on - auto trans = mtx2->conj_transpose(); - auto trans_as_csr = static_cast(trans.get()); + auto trans_as_csr = gko::as(mtx2->conj_transpose()); // clang-format off GKO_ASSERT_MTX_NEAR(trans_as_csr, From b59015b3dd8800aa2ba1ed04a08177d24706910c Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 3 Nov 2020 13:56:52 +0100 Subject: [PATCH 3/3] hacky fix for rocThrust compilation issues --- benchmark/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index f3a05ab5c9e..62cfb1b2538 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -25,6 +25,8 @@ function(ginkgo_benchmark_hipsp_linops name) target_compile_definitions("${name}" PRIVATE HAS_HIP=1) EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig --cpp_config OUTPUT_VARIABLE HIP_CXX_FLAGS) set_target_properties("${name}" PROPERTIES COMPILE_FLAGS ${HIP_CXX_FLAGS}) + # use Thrust C++ device just for compilation, we don't use thrust::complex in the benchmarks + target_compile_definitions("${name}" PUBLIC -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CPP) # for some reason, HIP creates a dependency on Threads::Threads here, so we # need to find it find_package(Threads REQUIRED)