Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create submatrix for sparse matrix formats #885

Merged
merged 14 commits into from
Nov 12, 2021
57 changes: 57 additions & 0 deletions common/cuda_hip/matrix/csr_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1046,3 +1046,60 @@ __global__ __launch_bounds__(default_block_size) void inv_symm_permute_kernel(
out_vals[out_begin + i] = in_vals[in_begin + i];
}
}


namespace kernel {


template <typename ValueType, typename IndexType>
__global__
__launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals(
const size_type num_rows, const size_type num_cols,
const size_type num_nnz, const size_type row_offset,
const size_type col_offset,
const IndexType* __restrict__ source_row_ptrs,
const IndexType* __restrict__ source_col_idxs,
const ValueType* __restrict__ source_values,
const IndexType* __restrict__ result_row_ptrs,
IndexType* __restrict__ result_col_idxs,
ValueType* __restrict__ result_values)
{
const auto res_row = thread::get_thread_id_flat();
if (res_row < num_rows) {
const auto src_row = res_row + row_offset;
auto res_nnz = result_row_ptrs[res_row];
for (auto nnz = source_row_ptrs[src_row];
nnz < source_row_ptrs[src_row + 1]; ++nnz) {
const auto res_col = source_col_idxs[nnz] - col_offset;
if (res_col < num_cols && res_col >= 0) {
result_col_idxs[res_nnz] = res_col;
result_values[res_nnz] = source_values[nnz];
res_nnz++;
}
}
}
}


template <typename IndexType>
__global__
__launch_bounds__(default_block_size) void calculate_nnz_per_row_in_span(
const span row_span, const span col_span,
const IndexType* __restrict__ row_ptrs,
const IndexType* __restrict__ col_idxs,
IndexType* __restrict__ nnz_per_row)
{
const auto src_row = thread::get_thread_id_flat() + row_span.begin;
if (src_row < row_span.end) {
IndexType nnz{};
for (auto i = row_ptrs[src_row]; i < row_ptrs[src_row + 1]; ++i) {
if (col_idxs[i] >= col_span.begin && col_idxs[i] < col_span.end) {
nnz++;
}
}
nnz_per_row[src_row - row_span.begin] = nnz;
}
}


} // namespace kernel
2 changes: 2 additions & 0 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,8 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL);

template <typename ValueType, typename IndexType>
GKO_DECLARE_CSR_SCALE_KERNEL(ValueType, IndexType)
Expand Down
30 changes: 30 additions & 0 deletions core/matrix/csr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "core/components/absolute_array.hpp"
#include "core/components/fill_array.hpp"
#include "core/components/prefix_sum.hpp"
#include "core/matrix/csr_kernels.hpp"


Expand All @@ -69,6 +70,9 @@ GKO_REGISTER_OPERATION(convert_to_sellp, csr::convert_to_sellp);
GKO_REGISTER_OPERATION(calculate_total_cols, csr::calculate_total_cols);
GKO_REGISTER_OPERATION(convert_to_ell, csr::convert_to_ell);
GKO_REGISTER_OPERATION(convert_to_hybrid, csr::convert_to_hybrid);
GKO_REGISTER_OPERATION(calculate_nonzeros_per_row_in_span,
csr::calculate_nonzeros_per_row_in_span);
GKO_REGISTER_OPERATION(compute_submatrix, csr::compute_submatrix);
GKO_REGISTER_OPERATION(transpose, csr::transpose);
GKO_REGISTER_OPERATION(conj_transpose, csr::conj_transpose);
GKO_REGISTER_OPERATION(inv_symm_permute, csr::inv_symm_permute);
Expand All @@ -85,6 +89,7 @@ GKO_REGISTER_OPERATION(is_sorted_by_column_index,
csr::is_sorted_by_column_index);
GKO_REGISTER_OPERATION(extract_diagonal, csr::extract_diagonal);
GKO_REGISTER_OPERATION(fill_array, components::fill_array);
GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
GKO_REGISTER_OPERATION(inplace_absolute_array,
components::inplace_absolute_array);
GKO_REGISTER_OPERATION(outplace_absolute_array,
Expand Down Expand Up @@ -536,6 +541,31 @@ bool Csr<ValueType, IndexType>::is_sorted_by_column_index() const
}


template <typename ValueType, typename IndexType>
std::unique_ptr<Csr<ValueType, IndexType>>
Csr<ValueType, IndexType>::create_submatrix(const gko::span& row_span,
const gko::span& column_span) const
{
using Mat = Csr<ValueType, IndexType>;
auto exec = this->get_executor();
auto sub_mat_size = gko::dim<2>(row_span.length(), column_span.length());
Array<IndexType> row_ptrs(exec, row_span.length() + 1);
exec->run(csr::make_calculate_nonzeros_per_row_in_span(
this, row_span, column_span, &row_ptrs));
exec->run(csr::make_prefix_sum(row_ptrs.get_data(), row_span.length() + 1));
auto num_nnz =
exec->copy_val_to_host(row_ptrs.get_data() + sub_mat_size[0]);
auto sub_mat = Mat::create(exec, sub_mat_size,
std::move(Array<ValueType>(exec, num_nnz)),
std::move(Array<IndexType>(exec, num_nnz)),
std::move(row_ptrs), this->get_strategy());
exec->run(csr::make_compute_submatrix(this, row_span, column_span,
sub_mat.get()));
sub_mat->make_srow();
return sub_mat;
}


template <typename ValueType, typename IndexType>
std::unique_ptr<Diagonal<ValueType>>
Csr<ValueType, IndexType>::extract_diagonal() const
Expand Down
16 changes: 16 additions & 0 deletions core/matrix/csr_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,18 @@ namespace kernels {
const matrix::Csr<ValueType, IndexType>* source, \
Array<size_type>* result)

#define GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL(ValueType, IndexType) \
void calculate_nonzeros_per_row_in_span( \
std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType>* source, const span& row_span, \
const span& col_span, Array<IndexType>* row_nnz)

#define GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType) \
void compute_submatrix(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType>* source, \
gko::span row_span, gko::span col_span, \
matrix::Csr<ValueType, IndexType>* result)

#define GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \
void sort_by_column_index(std::shared_ptr<const DefaultExecutor> exec, \
matrix::Csr<ValueType, IndexType>* to_sort)
Expand Down Expand Up @@ -240,6 +252,10 @@ namespace kernels {
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType); \
Expand Down
49 changes: 49 additions & 0 deletions cuda/matrix/csr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1240,6 +1240,55 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL);


template <typename ValueType, typename IndexType>
void calculate_nonzeros_per_row_in_span(
std::shared_ptr<const DefaultExecutor> exec,
const matrix::Csr<ValueType, IndexType>* source, const span& row_span,
const span& col_span, Array<IndexType>* row_nnz)
{
const auto num_rows = source->get_size()[0];
auto row_ptrs = source->get_const_row_ptrs();
auto col_idxs = source->get_const_col_idxs();
auto grid_dim = ceildiv(row_span.length(), default_block_size);

kernel::calculate_nnz_per_row_in_span<<<grid_dim, default_block_size>>>(
row_span, col_span, as_cuda_type(row_ptrs), as_cuda_type(col_idxs),
as_cuda_type(row_nnz->get_data()));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL);


template <typename ValueType, typename IndexType>
void compute_submatrix(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Csr<ValueType, IndexType>* source,
gko::span row_span, gko::span col_span,
matrix::Csr<ValueType, IndexType>* result)
{
auto row_offset = row_span.begin;
auto col_offset = col_span.begin;
auto num_rows = result->get_size()[0];
auto num_cols = result->get_size()[1];
auto row_ptrs = source->get_const_row_ptrs();
auto grid_dim = ceildiv(num_rows, default_block_size);

auto num_nnz = source->get_num_stored_elements();
grid_dim = ceildiv(num_nnz, default_block_size);
kernel::compute_submatrix_idxs_and_vals<<<grid_dim, default_block_size>>>(
num_rows, num_cols, num_nnz, row_offset, col_offset,
as_cuda_type(source->get_const_row_ptrs()),
as_cuda_type(source->get_const_col_idxs()),
as_cuda_type(source->get_const_values()),
as_cuda_type(result->get_const_row_ptrs()),
as_cuda_type(result->get_col_idxs()),
as_cuda_type(result->get_values()));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL);


template <typename ValueType, typename IndexType>
void convert_to_hybrid(std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType>* source,
Expand Down
82 changes: 82 additions & 0 deletions cuda/test/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/matrix/sparsity_csr.hpp>


#include "core/components/prefix_sum.hpp"
#include "core/matrix/csr_kernels.hpp"
#include "core/test/utils/unsort_matrix.hpp"
#include "cuda/test/utils.hpp"
Expand Down Expand Up @@ -100,6 +101,14 @@ class Csr : public ::testing::Test {
std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
}

void set_up_apply_data()
{
mtx2 = Mtx::create(ref);
mtx2->copy_from(gen_mtx<Mtx>(mtx_size[0], mtx_size[1], 5));
dmtx2 = Mtx::create(cuda);
dmtx2->copy_from(mtx2.get());
}

void set_up_apply_data(std::shared_ptr<Mtx::strategy_type> strategy,
int num_vectors = 1)
{
Expand Down Expand Up @@ -147,13 +156,15 @@ class Csr : public ::testing::Test {
dmtx->copy_from(mtx.get());
}


std::shared_ptr<gko::ReferenceExecutor> ref;
std::shared_ptr<const gko::CudaExecutor> cuda;

const gko::dim<2> mtx_size;
std::ranlux48 rand_engine;

std::unique_ptr<Mtx> mtx;
std::unique_ptr<Mtx> mtx2;
std::unique_ptr<ComplexMtx> complex_mtx;
std::unique_ptr<Mtx> square_mtx;
std::unique_ptr<Vec> expected;
Expand All @@ -162,6 +173,7 @@ class Csr : public ::testing::Test {
std::unique_ptr<Vec> beta;

std::unique_ptr<Mtx> dmtx;
std::unique_ptr<Mtx> dmtx2;
std::unique_ptr<ComplexMtx> complex_dmtx;
std::unique_ptr<Mtx> square_dmtx;
std::unique_ptr<Vec> dresult;
Expand Down Expand Up @@ -926,4 +938,74 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef)
}


TEST_F(Csr, CalculateNnzPerRowInSpanIsEquivalentToRef)
{
using Mtx = gko::matrix::Csr<>;
set_up_apply_data();
gko::span rspan{7, 51};
gko::span cspan{22, 88};
auto size = this->mtx2->get_size();
auto row_nnz = gko::Array<int>(this->ref, rspan.length() + 1);
auto drow_nnz = gko::Array<int>(this->cuda, row_nnz);

gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span(
this->ref, this->mtx2.get(), rspan, cspan, &row_nnz);
gko::kernels::cuda::csr::calculate_nonzeros_per_row_in_span(
this->cuda, this->dmtx2.get(), rspan, cspan, &drow_nnz);

GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz);
}


TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef)
{
using Mtx = gko::matrix::Csr<>;
using IndexType = int;
using ValueType = double;
set_up_apply_data();
gko::span rspan{7, 51};
gko::span cspan{22, 88};
auto size = this->mtx2->get_size();
auto row_nnz = gko::Array<int>(this->ref, rspan.length() + 1);
gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span(
this->ref, this->mtx2.get(), rspan, cspan, &row_nnz);
gko::kernels::reference::components::prefix_sum(
this->ref, row_nnz.get_data(), row_nnz.get_num_elems());
auto num_nnz = row_nnz.get_data()[rspan.length()];
auto drow_nnz = gko::Array<int>(this->cuda, row_nnz);
auto smat1 =
Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()),
std::move(gko::Array<ValueType>(this->ref, num_nnz)),
std::move(gko::Array<IndexType>(this->ref, num_nnz)),
std::move(row_nnz));
auto sdmat1 =
Mtx::create(this->cuda, gko::dim<2>(rspan.length(), cspan.length()),
std::move(gko::Array<ValueType>(this->cuda, num_nnz)),
std::move(gko::Array<IndexType>(this->cuda, num_nnz)),
std::move(drow_nnz));


gko::kernels::reference::csr::compute_submatrix(this->ref, this->mtx2.get(),
rspan, cspan, smat1.get());
gko::kernels::cuda::csr::compute_submatrix(this->cuda, this->dmtx2.get(),
rspan, cspan, sdmat1.get());

GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0);
}


TEST_F(Csr, CreateSubMatrixIsEquivalentToRef)
{
using Mtx = gko::matrix::Csr<>;
set_up_apply_data();
gko::span rspan{47, 81};
gko::span cspan{2, 31};

auto smat1 = this->mtx2->create_submatrix(rspan, cspan);
auto sdmat1 = this->dmtx2->create_submatrix(rspan, cspan);

GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0);
}


} // namespace
Loading