Skip to content

Commit

Permalink
fix Hybrid conversions and read
Browse files Browse the repository at this point in the history
This also involves a few bugfixes for edge cases
with ELL matrices with less columns than nonzeros per row.
This can be reverted once ELL gets actual symbolic zeros
  • Loading branch information
upsj committed Feb 2, 2022
1 parent 55a8023 commit caf04d2
Show file tree
Hide file tree
Showing 18 changed files with 168 additions and 396 deletions.
97 changes: 0 additions & 97 deletions common/cuda_hip/matrix/hybrid_kernels.hpp.inc

This file was deleted.

44 changes: 44 additions & 0 deletions common/unified/matrix/hybrid_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,50 @@ void compute_row_nnz(std::shared_ptr<const DefaultExecutor> exec,
}


template <typename ValueType, typename IndexType>
void fill_in_matrix_data(std::shared_ptr<const DefaultExecutor> exec,
const device_matrix_data<ValueType, IndexType>& data,
const int64* row_ptrs, const int64* coo_row_ptrs,
matrix::Hybrid<ValueType, IndexType>* result)
{
using device_value_type = device_type<ValueType>;
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto row_ptrs, auto data, auto ell_stride,
auto ell_max_nnz, auto ell_cols, auto ell_vals,
auto coo_row_ptrs, auto coo_row_idxs, auto coo_col_idxs,
auto coo_vals) {
const auto row_begin = row_ptrs[row];
const auto row_size = row_ptrs[row + 1] - row_begin;
for (int64 i = 0; i < ell_max_nnz; i++) {
const auto out_idx = row + ell_stride * i;
const auto in_idx = i + row_begin;
const bool use = i < row_size;
ell_cols[out_idx] = use ? data[in_idx].column : 0;
ell_vals[out_idx] = use ? unpack_member(data[in_idx].value)
: zero<device_value_type>();
}
const auto coo_begin = coo_row_ptrs[row];
for (int64 i = ell_max_nnz; i < row_size; i++) {
const auto in_idx = i + row_begin;
const auto out_idx =
coo_begin + i - static_cast<int64>(ell_max_nnz);
coo_row_idxs[out_idx] = row;
coo_col_idxs[out_idx] = data[in_idx].column;
coo_vals[out_idx] = unpack_member(data[in_idx].value);
}
},
data.size[0], row_ptrs, data.nonzeros, result->get_ell_stride(),
result->get_ell_num_stored_elements_per_row(),
result->get_ell_col_idxs(), result->get_ell_values(), coo_row_ptrs,
result->get_coo_row_idxs(), result->get_coo_col_idxs(),
result->get_coo_values());
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_HYBRID_FILL_IN_MATRIX_DATA_KERNEL);


template <typename ValueType, typename IndexType>
void convert_to_csr(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Hybrid<ValueType, IndexType>* source,
Expand Down
2 changes: 1 addition & 1 deletion core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ namespace hybrid {

GKO_STUB(GKO_DECLARE_HYBRID_COMPUTE_COO_ROW_PTRS_KERNEL);
GKO_STUB(GKO_DECLARE_HYBRID_COMPUTE_ROW_NNZ);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_HYBRID_SPLIT_MATRIX_DATA_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_HYBRID_FILL_IN_MATRIX_DATA_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_HYBRID_CONVERT_TO_CSR_KERNEL);


Expand Down
15 changes: 11 additions & 4 deletions core/matrix/csr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,15 +230,22 @@ void Csr<ValueType, IndexType>::convert_to(
Hybrid<ValueType, IndexType>* result) const
{
auto exec = this->get_executor();
Array<size_type> row_nnz{exec, this->get_size()[0]};
Array<int64> coo_row_ptrs{exec, this->get_size()[0] + 1};
exec->run(csr::make_convert_ptrs_to_sizes(
this->get_const_row_ptrs(), this->get_size()[0], row_nnz.get_data()));
const auto num_rows = this->get_size()[0];
const auto num_cols = this->get_size()[1];
Array<size_type> row_nnz{exec, num_rows};
Array<int64> coo_row_ptrs{exec, num_rows + 1};
exec->run(csr::make_convert_ptrs_to_sizes(this->get_const_row_ptrs(),
num_rows, row_nnz.get_data()));
size_type ell_lim{};
size_type coo_nnz{};
result->get_strategy()->compute_hybrid_config(row_nnz, &ell_lim, &coo_nnz);
if (ell_lim > num_cols) {
// TODO remove temporary fix after ELL gains true structural zeros
ell_lim = num_cols;
}
exec->run(csr::make_compute_hybrid_coo_row_ptrs(row_nnz, ell_lim,
coo_row_ptrs.get_data()));
coo_nnz = exec->copy_val_to_host(coo_row_ptrs.get_const_data() + num_rows);
auto tmp = make_temporary_clone(exec, result);
tmp->resize(this->get_size(), ell_lim, coo_nnz);
exec->run(csr::make_convert_to_hybrid(this, coo_row_ptrs.get_const_data(),
Expand Down
15 changes: 10 additions & 5 deletions core/matrix/dense.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,18 +557,23 @@ template <typename IndexType>
void Dense<ValueType>::convert_impl(Hybrid<ValueType, IndexType>* result) const
{
auto exec = this->get_executor();
Array<size_type> row_nnz{exec, this->get_size()[0]};
Array<int64> coo_row_ptrs{exec, this->get_size()[0] + 1};
const auto num_rows = this->get_size()[0];
const auto num_cols = this->get_size()[1];
Array<size_type> row_nnz{exec, num_rows};
Array<int64> coo_row_ptrs{exec, num_rows + 1};
exec->run(dense::make_count_nonzeros_per_row(this, row_nnz.get_data()));
size_type ell_lim{};
size_type coo_nnz{};
result->get_strategy()->compute_hybrid_config(row_nnz, &ell_lim, &coo_nnz);
if (ell_lim > num_cols) {
// TODO remove temporary fix after ELL gains true structural zeros
ell_lim = num_cols;
}
exec->run(dense::make_compute_hybrid_coo_row_ptrs(row_nnz, ell_lim,
coo_row_ptrs.get_data()));
coo_nnz = exec->copy_val_to_host(coo_row_ptrs.get_const_data() + num_rows);
auto tmp = make_temporary_clone(exec, result);
tmp->ell_->resize(this->get_size(), ell_lim);
tmp->coo_->resize(this->get_size(), coo_nnz);
tmp->set_size(this->get_size());
tmp->resize(this->get_size(), ell_lim, coo_nnz);
exec->run(dense::make_convert_to_hybrid(this, coo_row_ptrs.get_const_data(),
tmp.get()));
}
Expand Down
29 changes: 17 additions & 12 deletions core/matrix/hybrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,13 @@ namespace {

GKO_REGISTER_OPERATION(build_row_ptrs, components::build_row_ptrs);
GKO_REGISTER_OPERATION(compute_row_nnz, hybrid::compute_row_nnz);
GKO_REGISTER_OPERATION(split_matrix_data, hybrid::split_matrix_data);
GKO_REGISTER_OPERATION(fill_in_matrix_data, hybrid::fill_in_matrix_data);
GKO_REGISTER_OPERATION(ell_fill_in_dense, ell::fill_in_dense);
GKO_REGISTER_OPERATION(coo_fill_in_dense, coo::fill_in_dense);
GKO_REGISTER_OPERATION(ell_extract_diagonal, ell::extract_diagonal);
GKO_REGISTER_OPERATION(coo_extract_diagonal, coo::extract_diagonal);
GKO_REGISTER_OPERATION(ell_count_nonzeros_per_row, ell::count_nonzeros_per_row);
GKO_REGISTER_OPERATION(compute_coo_row_ptrs, hybrid::compute_coo_row_ptrs);
GKO_REGISTER_OPERATION(convert_idxs_to_ptrs, components::convert_idxs_to_ptrs);
GKO_REGISTER_OPERATION(convert_to_csr, hybrid::convert_to_csr);
GKO_REGISTER_OPERATION(fill_array, components::fill_array);
Expand Down Expand Up @@ -209,25 +210,29 @@ template <typename ValueType, typename IndexType>
void Hybrid<ValueType, IndexType>::read(const device_mat_data& data)
{
auto exec = this->get_executor();
const auto num_rows = data.size[0];
const auto num_cols = data.size[1];
auto local_data = make_temporary_clone(exec, &data.nonzeros);
Array<int64> row_ptrs{exec, data.size[0] + 1};
exec->run(hybrid::make_build_row_ptrs(*local_data, data.size[0],
Array<int64> row_ptrs{exec, num_rows + 1};
exec->run(hybrid::make_build_row_ptrs(*local_data, num_rows,
row_ptrs.get_data()));
Array<size_type> row_nnz{exec, data.size[0]};
exec->run(hybrid::make_compute_row_nnz(row_ptrs, row_nnz.get_data()));
size_type ell_max_nnz{};
size_type coo_nnz{};
this->get_strategy()->compute_hybrid_config(row_nnz, &ell_max_nnz,
&coo_nnz);
auto ell_nnz = data.nonzeros.get_num_elems() - coo_nnz;
device_mat_data ell_data{exec, data.size, ell_nnz};
device_mat_data coo_data{exec, data.size, coo_nnz};
exec->run(hybrid::make_split_matrix_data(
data.nonzeros, row_ptrs.get_const_data(), ell_max_nnz, data.size[0],
ell_data.nonzeros, coo_data.nonzeros));
this->set_size(data.size);
ell_->read(ell_data);
coo_->read(coo_data);
if (ell_max_nnz > num_cols) {
// TODO remove temporary fix after ELL gains true structural zeros
ell_max_nnz = num_cols;
}
Array<int64> coo_row_ptrs{exec, num_rows + 1};
exec->run(hybrid::make_compute_coo_row_ptrs(row_nnz, ell_max_nnz,
coo_row_ptrs.get_data()));
coo_nnz = exec->copy_val_to_host(coo_row_ptrs.get_const_data() + num_rows);
this->resize(this->get_size(), ell_max_nnz, coo_nnz);
exec->run(hybrid::make_fill_in_matrix_data(
data, row_ptrs.get_const_data(), coo_row_ptrs.get_const_data(), this));
}


Expand Down
25 changes: 12 additions & 13 deletions core/matrix/hybrid_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,12 @@ namespace kernels {
const Array<size_type>& row_nnz, \
size_type ell_lim, int64* coo_row_ptrs)

#define GKO_DECLARE_HYBRID_SPLIT_MATRIX_DATA_KERNEL(ValueType, IndexType) \
void split_matrix_data( \
std::shared_ptr<const DefaultExecutor> exec, \
const Array<matrix_data_entry<ValueType, IndexType>>& data, \
const int64* row_ptrs, size_type num_rows, size_type ell_limit, \
Array<matrix_data_entry<ValueType, IndexType>>& ell_data, \
Array<matrix_data_entry<ValueType, IndexType>>& coo_data)
#define GKO_DECLARE_HYBRID_FILL_IN_MATRIX_DATA_KERNEL(ValueType, IndexType) \
void fill_in_matrix_data( \
std::shared_ptr<const DefaultExecutor> exec, \
const device_matrix_data<ValueType, IndexType>& data, \
const int64* row_ptrs, const int64* coo_row_ptrs, \
matrix::Hybrid<ValueType, IndexType>* result)

#define GKO_DECLARE_HYBRID_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) \
void convert_to_csr(std::shared_ptr<const DefaultExecutor> exec, \
Expand All @@ -72,12 +71,12 @@ namespace kernels {
matrix::Csr<ValueType, IndexType>* result)


#define GKO_DECLARE_ALL_AS_TEMPLATES \
GKO_DECLARE_HYBRID_COMPUTE_ROW_NNZ; \
GKO_DECLARE_HYBRID_COMPUTE_COO_ROW_PTRS_KERNEL; \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_HYBRID_SPLIT_MATRIX_DATA_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
#define GKO_DECLARE_ALL_AS_TEMPLATES \
GKO_DECLARE_HYBRID_COMPUTE_ROW_NNZ; \
GKO_DECLARE_HYBRID_COMPUTE_COO_ROW_PTRS_KERNEL; \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_HYBRID_FILL_IN_MATRIX_DATA_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_HYBRID_CONVERT_TO_CSR_KERNEL(ValueType, IndexType)


Expand Down
1 change: 0 additions & 1 deletion cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ target_sources(ginkgo_cuda
matrix/ell_kernels.cu
matrix/fbcsr_kernels.cu
matrix/fft_kernels.cu
matrix/hybrid_kernels.cu
matrix/sellp_kernels.cu
matrix/sparsity_csr_kernels.cu
multigrid/amgx_pgm_kernels.cu
Expand Down
64 changes: 0 additions & 64 deletions cuda/matrix/hybrid_kernels.cu

This file was deleted.

1 change: 0 additions & 1 deletion dpcpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ target_sources(ginkgo_dpcpp
matrix/diagonal_kernels.dp.cpp
matrix/ell_kernels.dp.cpp
matrix/fft_kernels.dp.cpp
matrix/hybrid_kernels.dp.cpp
matrix/sellp_kernels.dp.cpp
matrix/sparsity_csr_kernels.dp.cpp
multigrid/amgx_pgm_kernels.dp.cpp
Expand Down
Loading

0 comments on commit caf04d2

Please sign in to comment.