Skip to content

Commit

Permalink
Merge: Create submatrix for CSR
Browse files Browse the repository at this point in the history
This PR adds support for creating submatrices from spans for CSR. Dense already has this functionality, but differs in the sense that here we create a new matrix and Dense returns a view of an existing matrix.

+ Only CSR is supported for now, but other formats could be easily added.
+ All backend kernels + tests have been added.

Related PR: #885
  • Loading branch information
pratikvn authored Nov 12, 2021
2 parents f61e7a7 + 7494456 commit d9dcea6
Show file tree
Hide file tree
Showing 15 changed files with 853 additions and 0 deletions.
57 changes: 57 additions & 0 deletions common/cuda_hip/matrix/csr_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1046,3 +1046,60 @@ __global__ __launch_bounds__(default_block_size) void inv_symm_permute_kernel(
out_vals[out_begin + i] = in_vals[in_begin + i];
}
}


namespace kernel {


template <typename ValueType, typename IndexType>
__global__
__launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals(
const size_type num_rows, const size_type num_cols,
const size_type num_nnz, const size_type row_offset,
const size_type col_offset,
const IndexType* __restrict__ source_row_ptrs,
const IndexType* __restrict__ source_col_idxs,
const ValueType* __restrict__ source_values,
const IndexType* __restrict__ result_row_ptrs,
IndexType* __restrict__ result_col_idxs,
ValueType* __restrict__ result_values)
{
const auto res_row = thread::get_thread_id_flat();
if (res_row < num_rows) {
const auto src_row = res_row + row_offset;
auto res_nnz = result_row_ptrs[res_row];
for (auto nnz = source_row_ptrs[src_row];
nnz < source_row_ptrs[src_row + 1]; ++nnz) {
const auto res_col = source_col_idxs[nnz] - col_offset;
if (res_col < num_cols && res_col >= 0) {
result_col_idxs[res_nnz] = res_col;
result_values[res_nnz] = source_values[nnz];
res_nnz++;
}
}
}
}


template <typename IndexType>
__global__
__launch_bounds__(default_block_size) void calculate_nnz_per_row_in_span(
const span row_span, const span col_span,
const IndexType* __restrict__ row_ptrs,
const IndexType* __restrict__ col_idxs,
IndexType* __restrict__ nnz_per_row)
{
const auto src_row = thread::get_thread_id_flat() + row_span.begin;
if (src_row < row_span.end) {
IndexType nnz{};
for (auto i = row_ptrs[src_row]; i < row_ptrs[src_row + 1]; ++i) {
if (col_idxs[i] >= col_span.begin && col_idxs[i] < col_span.end) {
nnz++;
}
}
nnz_per_row[src_row - row_span.begin] = nnz;
}
}


} // namespace kernel
2 changes: 2 additions & 0 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,8 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL);

template <typename ValueType, typename IndexType>
GKO_DECLARE_CSR_SCALE_KERNEL(ValueType, IndexType)
Expand Down
30 changes: 30 additions & 0 deletions core/matrix/csr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "core/components/absolute_array.hpp"
#include "core/components/fill_array.hpp"
#include "core/components/prefix_sum.hpp"
#include "core/matrix/csr_kernels.hpp"


Expand All @@ -69,6 +70,9 @@ GKO_REGISTER_OPERATION(convert_to_sellp, csr::convert_to_sellp);
GKO_REGISTER_OPERATION(calculate_total_cols, csr::calculate_total_cols);
GKO_REGISTER_OPERATION(convert_to_ell, csr::convert_to_ell);
GKO_REGISTER_OPERATION(convert_to_hybrid, csr::convert_to_hybrid);
GKO_REGISTER_OPERATION(calculate_nonzeros_per_row_in_span,
csr::calculate_nonzeros_per_row_in_span);
GKO_REGISTER_OPERATION(compute_submatrix, csr::compute_submatrix);
GKO_REGISTER_OPERATION(transpose, csr::transpose);
GKO_REGISTER_OPERATION(conj_transpose, csr::conj_transpose);
GKO_REGISTER_OPERATION(inv_symm_permute, csr::inv_symm_permute);
Expand All @@ -85,6 +89,7 @@ GKO_REGISTER_OPERATION(is_sorted_by_column_index,
csr::is_sorted_by_column_index);
GKO_REGISTER_OPERATION(extract_diagonal, csr::extract_diagonal);
GKO_REGISTER_OPERATION(fill_array, components::fill_array);
GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
GKO_REGISTER_OPERATION(inplace_absolute_array,
components::inplace_absolute_array);
GKO_REGISTER_OPERATION(outplace_absolute_array,
Expand Down Expand Up @@ -536,6 +541,31 @@ bool Csr<ValueType, IndexType>::is_sorted_by_column_index() const
}


template <typename ValueType, typename IndexType>
std::unique_ptr<Csr<ValueType, IndexType>>
Csr<ValueType, IndexType>::create_submatrix(const gko::span& row_span,
const gko::span& column_span) const
{
using Mat = Csr<ValueType, IndexType>;
auto exec = this->get_executor();
auto sub_mat_size = gko::dim<2>(row_span.length(), column_span.length());
Array<IndexType> row_ptrs(exec, row_span.length() + 1);
exec->run(csr::make_calculate_nonzeros_per_row_in_span(
this, row_span, column_span, &row_ptrs));
exec->run(csr::make_prefix_sum(row_ptrs.get_data(), row_span.length() + 1));
auto num_nnz =
exec->copy_val_to_host(row_ptrs.get_data() + sub_mat_size[0]);
auto sub_mat = Mat::create(exec, sub_mat_size,
std::move(Array<ValueType>(exec, num_nnz)),
std::move(Array<IndexType>(exec, num_nnz)),
std::move(row_ptrs), this->get_strategy());
exec->run(csr::make_compute_submatrix(this, row_span, column_span,
sub_mat.get()));
sub_mat->make_srow();
return sub_mat;
}


template <typename ValueType, typename IndexType>
std::unique_ptr<Diagonal<ValueType>>
Csr<ValueType, IndexType>::extract_diagonal() const
Expand Down
16 changes: 16 additions & 0 deletions core/matrix/csr_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,18 @@ namespace kernels {
const matrix::Csr<ValueType, IndexType>* source, \
Array<size_type>* result)

#define GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL(ValueType, IndexType) \
void calculate_nonzeros_per_row_in_span( \
std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType>* source, const span& row_span, \
const span& col_span, Array<IndexType>* row_nnz)

#define GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType) \
void compute_submatrix(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType>* source, \
gko::span row_span, gko::span col_span, \
matrix::Csr<ValueType, IndexType>* result)

#define GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \
void sort_by_column_index(std::shared_ptr<const DefaultExecutor> exec, \
matrix::Csr<ValueType, IndexType>* to_sort)
Expand Down Expand Up @@ -240,6 +252,10 @@ namespace kernels {
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType); \
Expand Down
49 changes: 49 additions & 0 deletions cuda/matrix/csr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1240,6 +1240,55 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL);


template <typename ValueType, typename IndexType>
void calculate_nonzeros_per_row_in_span(
std::shared_ptr<const DefaultExecutor> exec,
const matrix::Csr<ValueType, IndexType>* source, const span& row_span,
const span& col_span, Array<IndexType>* row_nnz)
{
const auto num_rows = source->get_size()[0];
auto row_ptrs = source->get_const_row_ptrs();
auto col_idxs = source->get_const_col_idxs();
auto grid_dim = ceildiv(row_span.length(), default_block_size);

kernel::calculate_nnz_per_row_in_span<<<grid_dim, default_block_size>>>(
row_span, col_span, as_cuda_type(row_ptrs), as_cuda_type(col_idxs),
as_cuda_type(row_nnz->get_data()));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL);


template <typename ValueType, typename IndexType>
void compute_submatrix(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Csr<ValueType, IndexType>* source,
gko::span row_span, gko::span col_span,
matrix::Csr<ValueType, IndexType>* result)
{
auto row_offset = row_span.begin;
auto col_offset = col_span.begin;
auto num_rows = result->get_size()[0];
auto num_cols = result->get_size()[1];
auto row_ptrs = source->get_const_row_ptrs();
auto grid_dim = ceildiv(num_rows, default_block_size);

auto num_nnz = source->get_num_stored_elements();
grid_dim = ceildiv(num_nnz, default_block_size);
kernel::compute_submatrix_idxs_and_vals<<<grid_dim, default_block_size>>>(
num_rows, num_cols, num_nnz, row_offset, col_offset,
as_cuda_type(source->get_const_row_ptrs()),
as_cuda_type(source->get_const_col_idxs()),
as_cuda_type(source->get_const_values()),
as_cuda_type(result->get_const_row_ptrs()),
as_cuda_type(result->get_col_idxs()),
as_cuda_type(result->get_values()));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL);


template <typename ValueType, typename IndexType>
void convert_to_hybrid(std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType>* source,
Expand Down
82 changes: 82 additions & 0 deletions cuda/test/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/matrix/sparsity_csr.hpp>


#include "core/components/prefix_sum.hpp"
#include "core/matrix/csr_kernels.hpp"
#include "core/test/utils/unsort_matrix.hpp"
#include "cuda/test/utils.hpp"
Expand Down Expand Up @@ -100,6 +101,14 @@ class Csr : public ::testing::Test {
std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
}

void set_up_apply_data()
{
mtx2 = Mtx::create(ref);
mtx2->copy_from(gen_mtx<Mtx>(mtx_size[0], mtx_size[1], 5));
dmtx2 = Mtx::create(cuda);
dmtx2->copy_from(mtx2.get());
}

void set_up_apply_data(std::shared_ptr<Mtx::strategy_type> strategy,
int num_vectors = 1)
{
Expand Down Expand Up @@ -147,13 +156,15 @@ class Csr : public ::testing::Test {
dmtx->copy_from(mtx.get());
}


std::shared_ptr<gko::ReferenceExecutor> ref;
std::shared_ptr<const gko::CudaExecutor> cuda;

const gko::dim<2> mtx_size;
std::ranlux48 rand_engine;

std::unique_ptr<Mtx> mtx;
std::unique_ptr<Mtx> mtx2;
std::unique_ptr<ComplexMtx> complex_mtx;
std::unique_ptr<Mtx> square_mtx;
std::unique_ptr<Vec> expected;
Expand All @@ -162,6 +173,7 @@ class Csr : public ::testing::Test {
std::unique_ptr<Vec> beta;

std::unique_ptr<Mtx> dmtx;
std::unique_ptr<Mtx> dmtx2;
std::unique_ptr<ComplexMtx> complex_dmtx;
std::unique_ptr<Mtx> square_dmtx;
std::unique_ptr<Vec> dresult;
Expand Down Expand Up @@ -926,4 +938,74 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef)
}


TEST_F(Csr, CalculateNnzPerRowInSpanIsEquivalentToRef)
{
using Mtx = gko::matrix::Csr<>;
set_up_apply_data();
gko::span rspan{7, 51};
gko::span cspan{22, 88};
auto size = this->mtx2->get_size();
auto row_nnz = gko::Array<int>(this->ref, rspan.length() + 1);
auto drow_nnz = gko::Array<int>(this->cuda, row_nnz);

gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span(
this->ref, this->mtx2.get(), rspan, cspan, &row_nnz);
gko::kernels::cuda::csr::calculate_nonzeros_per_row_in_span(
this->cuda, this->dmtx2.get(), rspan, cspan, &drow_nnz);

GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz);
}


TEST_F(Csr, ComputeSubmatrixIsEquivalentToRef)
{
using Mtx = gko::matrix::Csr<>;
using IndexType = int;
using ValueType = double;
set_up_apply_data();
gko::span rspan{7, 51};
gko::span cspan{22, 88};
auto size = this->mtx2->get_size();
auto row_nnz = gko::Array<int>(this->ref, rspan.length() + 1);
gko::kernels::reference::csr::calculate_nonzeros_per_row_in_span(
this->ref, this->mtx2.get(), rspan, cspan, &row_nnz);
gko::kernels::reference::components::prefix_sum(
this->ref, row_nnz.get_data(), row_nnz.get_num_elems());
auto num_nnz = row_nnz.get_data()[rspan.length()];
auto drow_nnz = gko::Array<int>(this->cuda, row_nnz);
auto smat1 =
Mtx::create(this->ref, gko::dim<2>(rspan.length(), cspan.length()),
std::move(gko::Array<ValueType>(this->ref, num_nnz)),
std::move(gko::Array<IndexType>(this->ref, num_nnz)),
std::move(row_nnz));
auto sdmat1 =
Mtx::create(this->cuda, gko::dim<2>(rspan.length(), cspan.length()),
std::move(gko::Array<ValueType>(this->cuda, num_nnz)),
std::move(gko::Array<IndexType>(this->cuda, num_nnz)),
std::move(drow_nnz));


gko::kernels::reference::csr::compute_submatrix(this->ref, this->mtx2.get(),
rspan, cspan, smat1.get());
gko::kernels::cuda::csr::compute_submatrix(this->cuda, this->dmtx2.get(),
rspan, cspan, sdmat1.get());

GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0);
}


TEST_F(Csr, CreateSubMatrixIsEquivalentToRef)
{
using Mtx = gko::matrix::Csr<>;
set_up_apply_data();
gko::span rspan{47, 81};
gko::span cspan{2, 31};

auto smat1 = this->mtx2->create_submatrix(rspan, cspan);
auto sdmat1 = this->dmtx2->create_submatrix(rspan, cspan);

GKO_ASSERT_MTX_NEAR(sdmat1, smat1, 0.0);
}


} // namespace
Loading

0 comments on commit d9dcea6

Please sign in to comment.