Skip to content

Commit

Permalink
Merge generic matrix format test
Browse files Browse the repository at this point in the history
Adds a test that runs for all matrix formats and fixes a few small issues that were found on the way,
mostly related to edge cases with zero matrix dimensions.

Related PR: #904
  • Loading branch information
upsj authored Feb 3, 2022
2 parents 0436f3e + c6d4152 commit 083fb87
Show file tree
Hide file tree
Showing 33 changed files with 1,265 additions and 837 deletions.
3 changes: 2 additions & 1 deletion cmake/create_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,14 @@ function(ginkgo_create_common_test test_name)
string(SUBSTRING ${exec} 0 1 exec_initial)
string(SUBSTRING ${exec} 1 -1 exec_tail)
string(TOUPPER ${exec_initial} exec_initial)
string(TOUPPER ${exec} exec_upper)
set(exec_type ${exec_initial}${exec_tail}Executor)
# set up actual test
set(test_target_name ${test_target_name}_${exec})
add_executable(${test_target_name} ${test_name}.cpp)
target_compile_features(${test_target_name} PUBLIC cxx_std_14)
target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS})
target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=${exec_type} EXEC_NAMESPACE=${exec})
target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=${exec_type} EXEC_NAMESPACE=${exec} GKO_COMPILING_${exec_upper})
target_link_libraries(${test_target_name} PRIVATE ${common_test_ADDITIONAL_LIBRARIES})
# use float for DPC++ if necessary
if((exec STREQUAL "dpcpp") AND GINKGO_DPCPP_SINGLE_MODE)
Expand Down
29 changes: 29 additions & 0 deletions common/cuda_hip/matrix/dense_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,35 @@ __global__ __launch_bounds__(default_block_size) void fill_in_csr(
}


template <typename ValueType, typename IndexType>
__global__ __launch_bounds__(default_block_size) void fill_in_sparsity_csr(
size_type num_rows, size_type num_cols, size_type stride,
const ValueType* __restrict__ source, IndexType* __restrict__ row_ptrs,
IndexType* __restrict__ col_idxs)
{
const auto row = thread::get_subwarp_id_flat<config::warp_size>();

if (row < num_rows) {
auto warp = group::tiled_partition<config::warp_size>(
group::this_thread_block());
auto lane_prefix_mask =
(config::lane_mask_type(1) << warp.thread_rank()) - 1;
auto base_out_idx = row_ptrs[row];
for (size_type i = 0; i < num_cols; i += config::warp_size) {
const auto col = i + warp.thread_rank();
const auto pred =
col < num_cols ? is_nonzero(source[stride * row + col]) : false;
const auto mask = warp.ballot(pred);
const auto out_idx = base_out_idx + popcnt(mask & lane_prefix_mask);
if (pred) {
col_idxs[out_idx] = col;
}
base_out_idx += popcnt(mask);
}
}
}


template <typename ValueType, typename IndexType>
__global__ __launch_bounds__(default_block_size) void fill_in_ell(
size_type num_rows, size_type num_cols, size_type source_stride,
Expand Down
97 changes: 0 additions & 97 deletions common/cuda_hip/matrix/hybrid_kernels.hpp.inc

This file was deleted.

44 changes: 44 additions & 0 deletions common/unified/matrix/hybrid_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,50 @@ void compute_row_nnz(std::shared_ptr<const DefaultExecutor> exec,
}


template <typename ValueType, typename IndexType>
void fill_in_matrix_data(std::shared_ptr<const DefaultExecutor> exec,
const device_matrix_data<ValueType, IndexType>& data,
const int64* row_ptrs, const int64* coo_row_ptrs,
matrix::Hybrid<ValueType, IndexType>* result)
{
using device_value_type = device_type<ValueType>;
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto row_ptrs, auto data, auto ell_stride,
auto ell_max_nnz, auto ell_cols, auto ell_vals,
auto coo_row_ptrs, auto coo_row_idxs, auto coo_col_idxs,
auto coo_vals) {
const auto row_begin = row_ptrs[row];
const auto row_size = row_ptrs[row + 1] - row_begin;
for (int64 i = 0; i < ell_max_nnz; i++) {
const auto out_idx = row + ell_stride * i;
const auto in_idx = i + row_begin;
const bool use = i < row_size;
ell_cols[out_idx] = use ? data[in_idx].column : 0;
ell_vals[out_idx] = use ? unpack_member(data[in_idx].value)
: zero<device_value_type>();
}
const auto coo_begin = coo_row_ptrs[row];
for (int64 i = ell_max_nnz; i < row_size; i++) {
const auto in_idx = i + row_begin;
const auto out_idx =
coo_begin + i - static_cast<int64>(ell_max_nnz);
coo_row_idxs[out_idx] = row;
coo_col_idxs[out_idx] = data[in_idx].column;
coo_vals[out_idx] = unpack_member(data[in_idx].value);
}
},
data.size[0], row_ptrs, data.nonzeros, result->get_ell_stride(),
result->get_ell_num_stored_elements_per_row(),
result->get_ell_col_idxs(), result->get_ell_values(), coo_row_ptrs,
result->get_coo_row_idxs(), result->get_coo_col_idxs(),
result->get_coo_values());
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_HYBRID_FILL_IN_MATRIX_DATA_KERNEL);


template <typename ValueType, typename IndexType>
void convert_to_csr(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Hybrid<ValueType, IndexType>* source,
Expand Down
2 changes: 1 addition & 1 deletion core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ namespace hybrid {

GKO_STUB(GKO_DECLARE_HYBRID_COMPUTE_COO_ROW_PTRS_KERNEL);
GKO_STUB(GKO_DECLARE_HYBRID_COMPUTE_ROW_NNZ);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_HYBRID_SPLIT_MATRIX_DATA_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_HYBRID_FILL_IN_MATRIX_DATA_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_HYBRID_CONVERT_TO_CSR_KERNEL);


Expand Down
8 changes: 4 additions & 4 deletions core/matrix/coo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,10 @@ template <typename ValueType, typename IndexType>
void Coo<ValueType, IndexType>::convert_to(Dense<ValueType>* result) const
{
auto exec = this->get_executor();
result->resize(this->get_size());
result->fill(zero<ValueType>());
exec->run(coo::make_fill_in_dense(
this, make_temporary_output_clone(exec, result).get()));
auto tmp_result = make_temporary_output_clone(exec, result);
tmp_result->resize(this->get_size());
tmp_result->fill(zero<ValueType>());
exec->run(coo::make_fill_in_dense(this, tmp_result.get()));
}


Expand Down
23 changes: 15 additions & 8 deletions core/matrix/csr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,10 @@ template <typename ValueType, typename IndexType>
void Csr<ValueType, IndexType>::convert_to(Dense<ValueType>* result) const
{
auto exec = this->get_executor();
result->resize(this->get_size());
result->fill(zero<ValueType>());
exec->run(csr::make_fill_in_dense(
this, make_temporary_output_clone(exec, result).get()));
auto tmp_result = make_temporary_output_clone(exec, result);
tmp_result->resize(this->get_size());
tmp_result->fill(zero<ValueType>());
exec->run(csr::make_fill_in_dense(this, tmp_result.get()));
}


Expand All @@ -230,15 +230,22 @@ void Csr<ValueType, IndexType>::convert_to(
Hybrid<ValueType, IndexType>* result) const
{
auto exec = this->get_executor();
Array<size_type> row_nnz{exec, this->get_size()[0]};
Array<int64> coo_row_ptrs{exec, this->get_size()[0] + 1};
exec->run(csr::make_convert_ptrs_to_sizes(
this->get_const_row_ptrs(), this->get_size()[0], row_nnz.get_data()));
const auto num_rows = this->get_size()[0];
const auto num_cols = this->get_size()[1];
Array<size_type> row_nnz{exec, num_rows};
Array<int64> coo_row_ptrs{exec, num_rows + 1};
exec->run(csr::make_convert_ptrs_to_sizes(this->get_const_row_ptrs(),
num_rows, row_nnz.get_data()));
size_type ell_lim{};
size_type coo_nnz{};
result->get_strategy()->compute_hybrid_config(row_nnz, &ell_lim, &coo_nnz);
if (ell_lim > num_cols) {
// TODO remove temporary fix after ELL gains true structural zeros
ell_lim = num_cols;
}
exec->run(csr::make_compute_hybrid_coo_row_ptrs(row_nnz, ell_lim,
coo_row_ptrs.get_data()));
coo_nnz = exec->copy_val_to_host(coo_row_ptrs.get_const_data() + num_rows);
auto tmp = make_temporary_clone(exec, result);
tmp->resize(this->get_size(), ell_lim, coo_nnz);
exec->run(csr::make_convert_to_hybrid(this, coo_row_ptrs.get_const_data(),
Expand Down
15 changes: 10 additions & 5 deletions core/matrix/dense.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,18 +557,23 @@ template <typename IndexType>
void Dense<ValueType>::convert_impl(Hybrid<ValueType, IndexType>* result) const
{
auto exec = this->get_executor();
Array<size_type> row_nnz{exec, this->get_size()[0]};
Array<int64> coo_row_ptrs{exec, this->get_size()[0] + 1};
const auto num_rows = this->get_size()[0];
const auto num_cols = this->get_size()[1];
Array<size_type> row_nnz{exec, num_rows};
Array<int64> coo_row_ptrs{exec, num_rows + 1};
exec->run(dense::make_count_nonzeros_per_row(this, row_nnz.get_data()));
size_type ell_lim{};
size_type coo_nnz{};
result->get_strategy()->compute_hybrid_config(row_nnz, &ell_lim, &coo_nnz);
if (ell_lim > num_cols) {
// TODO remove temporary fix after ELL gains true structural zeros
ell_lim = num_cols;
}
exec->run(dense::make_compute_hybrid_coo_row_ptrs(row_nnz, ell_lim,
coo_row_ptrs.get_data()));
coo_nnz = exec->copy_val_to_host(coo_row_ptrs.get_const_data() + num_rows);
auto tmp = make_temporary_clone(exec, result);
tmp->ell_->resize(this->get_size(), ell_lim);
tmp->coo_->resize(this->get_size(), coo_nnz);
tmp->set_size(this->get_size());
tmp->resize(this->get_size(), ell_lim, coo_nnz);
exec->run(dense::make_convert_to_hybrid(this, coo_row_ptrs.get_const_data(),
tmp.get()));
}
Expand Down
8 changes: 4 additions & 4 deletions core/matrix/ell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@ template <typename ValueType, typename IndexType>
void Ell<ValueType, IndexType>::convert_to(Dense<ValueType>* result) const
{
auto exec = this->get_executor();
result->resize(this->get_size());
result->fill(zero<ValueType>());
exec->run(ell::make_fill_in_dense(
this, make_temporary_output_clone(exec, result).get()));
auto tmp_result = make_temporary_output_clone(exec, result);
tmp_result->resize(this->get_size());
tmp_result->fill(zero<ValueType>());
exec->run(ell::make_fill_in_dense(this, tmp_result.get()));
}


Expand Down
8 changes: 4 additions & 4 deletions core/matrix/fbcsr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,10 @@ void Fbcsr<ValueType, IndexType>::convert_to(
Dense<ValueType>* const result) const
{
auto exec = this->get_executor();
result->resize(this->get_size());
result->fill(zero<ValueType>());
exec->run(fbcsr::make_fill_in_dense(
this, make_temporary_output_clone(exec, result).get()));
auto tmp_result = make_temporary_output_clone(exec, result);
tmp_result->resize(this->get_size());
tmp_result->fill(zero<ValueType>());
exec->run(fbcsr::make_fill_in_dense(this, tmp_result.get()));
}


Expand Down
29 changes: 17 additions & 12 deletions core/matrix/hybrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,13 @@ namespace {

GKO_REGISTER_OPERATION(build_row_ptrs, components::build_row_ptrs);
GKO_REGISTER_OPERATION(compute_row_nnz, hybrid::compute_row_nnz);
GKO_REGISTER_OPERATION(split_matrix_data, hybrid::split_matrix_data);
GKO_REGISTER_OPERATION(fill_in_matrix_data, hybrid::fill_in_matrix_data);
GKO_REGISTER_OPERATION(ell_fill_in_dense, ell::fill_in_dense);
GKO_REGISTER_OPERATION(coo_fill_in_dense, coo::fill_in_dense);
GKO_REGISTER_OPERATION(ell_extract_diagonal, ell::extract_diagonal);
GKO_REGISTER_OPERATION(coo_extract_diagonal, coo::extract_diagonal);
GKO_REGISTER_OPERATION(ell_count_nonzeros_per_row, ell::count_nonzeros_per_row);
GKO_REGISTER_OPERATION(compute_coo_row_ptrs, hybrid::compute_coo_row_ptrs);
GKO_REGISTER_OPERATION(convert_idxs_to_ptrs, components::convert_idxs_to_ptrs);
GKO_REGISTER_OPERATION(convert_to_csr, hybrid::convert_to_csr);
GKO_REGISTER_OPERATION(fill_array, components::fill_array);
Expand Down Expand Up @@ -209,25 +210,29 @@ template <typename ValueType, typename IndexType>
void Hybrid<ValueType, IndexType>::read(const device_mat_data& data)
{
auto exec = this->get_executor();
const auto num_rows = data.size[0];
const auto num_cols = data.size[1];
auto local_data = make_temporary_clone(exec, &data.nonzeros);
Array<int64> row_ptrs{exec, data.size[0] + 1};
exec->run(hybrid::make_build_row_ptrs(*local_data, data.size[0],
Array<int64> row_ptrs{exec, num_rows + 1};
exec->run(hybrid::make_build_row_ptrs(*local_data, num_rows,
row_ptrs.get_data()));
Array<size_type> row_nnz{exec, data.size[0]};
exec->run(hybrid::make_compute_row_nnz(row_ptrs, row_nnz.get_data()));
size_type ell_max_nnz{};
size_type coo_nnz{};
this->get_strategy()->compute_hybrid_config(row_nnz, &ell_max_nnz,
&coo_nnz);
auto ell_nnz = data.nonzeros.get_num_elems() - coo_nnz;
device_mat_data ell_data{exec, data.size, ell_nnz};
device_mat_data coo_data{exec, data.size, coo_nnz};
exec->run(hybrid::make_split_matrix_data(
data.nonzeros, row_ptrs.get_const_data(), ell_max_nnz, data.size[0],
ell_data.nonzeros, coo_data.nonzeros));
this->set_size(data.size);
ell_->read(ell_data);
coo_->read(coo_data);
if (ell_max_nnz > num_cols) {
// TODO remove temporary fix after ELL gains true structural zeros
ell_max_nnz = num_cols;
}
Array<int64> coo_row_ptrs{exec, num_rows + 1};
exec->run(hybrid::make_compute_coo_row_ptrs(row_nnz, ell_max_nnz,
coo_row_ptrs.get_data()));
coo_nnz = exec->copy_val_to_host(coo_row_ptrs.get_const_data() + num_rows);
this->resize(data.size, ell_max_nnz, coo_nnz);
exec->run(hybrid::make_fill_in_matrix_data(
data, row_ptrs.get_const_data(), coo_row_ptrs.get_const_data(), this));
}


Expand Down
Loading

0 comments on commit 083fb87

Please sign in to comment.