From a9759c1859bdd86f48ad84abb35372887e86ce14 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Tue, 30 Nov 2021 17:22:03 +0100 Subject: [PATCH 01/38] add kernels for intermediate norm computation Co-authored-by: Tobias Ribizel Co-authored-by: Pratik Nayak --- common/unified/matrix/dense_kernels.cpp | 31 ++++++++++++++++++++++ core/device_hooks/common_kernels.inc.cpp | 2 ++ core/matrix/dense.cpp | 2 ++ core/matrix/dense_kernels.hpp | 13 ++++++++++ reference/matrix/dense_kernels.cpp | 33 ++++++++++++++++++++++++ 5 files changed, 81 insertions(+) diff --git a/common/unified/matrix/dense_kernels.cpp b/common/unified/matrix/dense_kernels.cpp index a328c5b56ad..40122219e31 100644 --- a/common/unified/matrix/dense_kernels.cpp +++ b/common/unified/matrix/dense_kernels.cpp @@ -380,6 +380,37 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL_SIZE_T); +template +void compute_norm2_sqr(std::shared_ptr exec, + const matrix::Dense* x, + matrix::Dense>* result) +{ + run_kernel_col_reduction( + exec, + [] GKO_KERNEL(auto i, auto j, auto x) { return squared_norm(x(i, j)); }, + [] GKO_KERNEL(auto a, auto b) { return a + b; }, + [] GKO_KERNEL(auto a) { return a; }, remove_complex{}, + result->get_values(), x->get_size(), x); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_SQR_KERNEL); + + +template +void compute_sqrt(std::shared_ptr exec, + matrix::Dense* x) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto x) { + x(row, col) = sqrt(x(row, col)); + }, + x->get_size(), x); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL); + + template void symm_permute(std::shared_ptr exec, const Array* permutation_indices, diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 00acc7a9cea..1ae87c15ec4 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -262,6 +262,8 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_SQR_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_CONVERT_TO_CSR_KERNEL); diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index b2a92cbbe95..354e7220c7c 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -80,6 +80,8 @@ GKO_REGISTER_OPERATION(compute_dot, dense::compute_dot_dispatch); GKO_REGISTER_OPERATION(compute_conj_dot, dense::compute_conj_dot_dispatch); GKO_REGISTER_OPERATION(compute_norm2, dense::compute_norm2_dispatch); GKO_REGISTER_OPERATION(compute_norm1, dense::compute_norm1); +GKO_REGISTER_OPERATION(compute_norm2_sqr, dense::compute_norm2_sqr); +GKO_REGISTER_OPERATION(compute_sqrt, dense::compute_sqrt); GKO_REGISTER_OPERATION(compute_max_nnz_per_row, dense::compute_max_nnz_per_row); GKO_REGISTER_OPERATION(compute_hybrid_coo_row_ptrs, hybrid::compute_coo_row_ptrs); diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 3cf15052826..f8c7fcb8aa0 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -151,6 +151,15 @@ namespace kernels { const device_matrix_data<_type, _prec>& data, \ matrix::Dense<_type>* output) +#define GKO_DECLARE_DENSE_COMPUTE_NORM2_SQR_KERNEL(_type) \ + void compute_norm2_sqr(std::shared_ptr exec, \ + const matrix::Dense<_type>* x, \ + matrix::Dense>* result) + +#define GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL(_type) \ + void compute_sqrt(std::shared_ptr exec, \ + matrix::Dense<_type>* data) + #define GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL(_type, _prec) \ void convert_to_coo(std::shared_ptr exec, \ const matrix::Dense<_type>* source, \ @@ -341,6 +350,10 @@ namespace kernels { GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL(ValueType); \ template \ GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_DENSE_COMPUTE_NORM2_SQR_KERNEL(ValueType); \ + template \ + GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL(ValueType); \ template \ GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL(ValueType, IndexType); \ template \ diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index 317b3599ae3..309fa473521 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -412,6 +412,39 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL); +template +void compute_norm2_sqr(std::shared_ptr exec, + const matrix::Dense* x, + matrix::Dense>* result) +{ + for (size_type j = 0; j < x->get_size()[1]; ++j) { + result->at(0, j) = zero>(); + } + for (size_type i = 0; i < x->get_size()[0]; ++i) { + for (size_type j = 0; j < x->get_size()[1]; ++j) { + result->at(0, j) += squared_norm(x->at(i, j)); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_SQR_KERNEL); + + +template +void compute_sqrt(std::shared_ptr exec, + matrix::Dense* data) +{ + for (size_type i = 0; i < data->get_size()[0]; ++i) { + for (size_type j = 0; j < data->get_size()[1]; ++j) { + data->at(i, j) = sqrt(data->at(i, j)); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL); + + template void convert_to_coo(std::shared_ptr exec, const matrix::Dense* source, const int64*, From be017904440a705d907a8e14413dedeeda58fc3e Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Tue, 30 Nov 2021 17:24:09 +0100 Subject: [PATCH 02/38] add distributed vector class Co-authored-by: Tobias Ribizel Co-authored-by: Pratik Nayak --- core/CMakeLists.txt | 4 +- core/device_hooks/common_kernels.inc.cpp | 18 + core/distributed/vector.cpp | 391 ++++++++++++++++++ core/distributed/vector_kernels.hpp | 82 ++++ core/test/utils.hpp | 24 ++ cuda/CMakeLists.txt | 1 + cuda/distributed/vector_kernels.cu | 61 +++ dpcpp/CMakeLists.txt | 1 + dpcpp/distributed/vector_kernels.dp.cpp | 61 +++ hip/CMakeLists.txt | 1 + hip/distributed/vector_kernels.hip.cpp | 61 +++ include/ginkgo/core/base/types.hpp | 71 +++- include/ginkgo/core/distributed/base.hpp | 74 ++++ include/ginkgo/core/distributed/partition.hpp | 2 + include/ginkgo/core/distributed/vector.hpp | 334 +++++++++++++++ include/ginkgo/core/matrix/dense.hpp | 13 + include/ginkgo/ginkgo.hpp | 4 + omp/CMakeLists.txt | 1 + omp/distributed/vector_kernels.cpp | 150 +++++++ reference/CMakeLists.txt | 1 + reference/distributed/vector_kernels.cpp | 130 ++++++ reference/test/distributed/CMakeLists.txt | 1 + reference/test/distributed/vector_kernels.cpp | 165 ++++++++ 23 files changed, 1647 insertions(+), 4 deletions(-) create mode 100644 core/distributed/vector.cpp create mode 100644 core/distributed/vector_kernels.hpp create mode 100644 cuda/distributed/vector_kernels.cu create mode 100644 dpcpp/distributed/vector_kernels.dp.cpp create mode 100644 hip/distributed/vector_kernels.hip.cpp create mode 100644 include/ginkgo/core/distributed/base.hpp create mode 100644 include/ginkgo/core/distributed/vector.hpp create mode 100644 omp/distributed/vector_kernels.cpp create mode 100644 reference/distributed/vector_kernels.cpp create mode 100644 reference/test/distributed/vector_kernels.cpp diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 19d76968425..d2e27b340df 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -65,7 +65,9 @@ endif() if (GINKGO_BUILD_MPI) target_sources(ginkgo - PRIVATE mpi/exception.cpp) + PRIVATE + mpi/exception.cpp + distributed/vector.cpp) endif() ginkgo_compile_features(ginkgo) diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 1ae87c15ec4..4a861aa3356 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/prefix_sum_kernels.hpp" #include "core/components/reduce_array_kernels.hpp" #include "core/distributed/partition_kernels.hpp" +#include "core/distributed/vector_kernels.hpp" #include "core/factorization/factorization_kernels.hpp" #include "core/factorization/ic_kernels.hpp" #include "core/factorization/ilu_kernels.hpp" @@ -142,6 +143,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(_macro) +#define GKO_STUB_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE(_macro) \ + template \ + _macro(ValueType, LocalIndexType, GlobalIndexType) \ + GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ + GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE(_macro) + #define GKO_STUB_TEMPLATE_TYPE(_macro) \ template \ _macro(IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ @@ -241,6 +249,16 @@ GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); } // namespace partition +namespace distributed_vector { + + +GKO_STUB_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); + + +} + + namespace dense { diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp new file mode 100644 index 00000000000..8bc4054d53c --- /dev/null +++ b/core/distributed/vector.cpp @@ -0,0 +1,391 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include "core/distributed/vector_kernels.hpp" +#include "core/matrix/dense_kernels.hpp" + + +namespace gko { +namespace distributed { +namespace vector { + + +GKO_REGISTER_OPERATION(compute_norm2_sqr, dense::compute_norm2_sqr); +GKO_REGISTER_OPERATION(compute_sqrt, dense::compute_sqrt); +GKO_REGISTER_OPERATION(outplace_absolute_dense, dense::outplace_absolute_dense); +GKO_REGISTER_OPERATION(build_local, distributed_vector::build_local); + + +} // namespace vector + + +template +void Vector::apply_impl( + const LinOp* b, LinOp* x) const +{ + GKO_NOT_SUPPORTED(this); +} + + +template +void Vector::apply_impl( + const LinOp* alpha, const LinOp* b, const LinOp* beta, LinOp* x) const +{ + GKO_NOT_SUPPORTED(this); +} + + +template +void Vector::fill( + const ValueType value) +{ + this->get_local()->fill(value); +} + + +template +void Vector::convert_to( + Vector, LocalIndexType, GlobalIndexType>* result) + const +{ + result->set_size(this->get_size()); + result->set_communicator(this->get_communicator()); + result->partition_ = this->partition_; + this->get_local()->convert_to(result->get_local()); +} + + +template +void Vector::move_to( + Vector, LocalIndexType, GlobalIndexType>* result) +{ + this->convert_to(result); +} + + +template +std::unique_ptr< + typename Vector::absolute_type> +Vector::compute_absolute() const +{ + auto exec = this->get_executor(); + + auto result = absolute_type::create(exec, this->get_communicator(), + this->get_partition(), this->get_size(), + this->get_local()->get_size()); + + exec->run(vector::make_outplace_absolute_dense(this->get_local(), + result->get_local())); + + return result; +} + + +template +void Vector::compute_absolute_inplace() +{ + this->get_local()->compute_absolute_inplace(); +} + + +template +const typename Vector::local_mtx_type* +Vector::get_local() const +{ + return &local_; +} + + +template +typename Vector::local_mtx_type* +Vector::get_local() +{ + return &local_; +} + + +template +Vector::Vector( + std::shared_ptr exec, mpi::communicator comm, + std::shared_ptr> partition, + dim<2> global_size, dim<2> local_size, size_type stride) + : EnableLinOp< + Vector>{exec, + global_size}, + DistributedBase{comm}, + partition_{ + partition + ? std::move(partition) + : gko::share( + Partition::create(exec))}, + local_{exec, local_size, + stride != invalid_index() ? stride : local_size[1]} +{} + + +template +void read_local_impl( + std::shared_ptr exec, mpi::communicator comm, + const Partition* partition, + const size_type num_cols, + const Array>& global_data, + LocalMtxType* local_mtx) +{ + auto rank = comm.rank(); + + Array> local_data{exec}; + exec->run(vector::make_build_local(global_data, partition, rank, local_data, + ValueType{})); + + auto local_rows = static_cast(partition->get_part_size(rank)); + dim<2> local_size{local_rows, num_cols}; + local_mtx->read({local_size, local_data}); +} + + +template +void Vector::read_distributed( + const matrix_data& data, + std::shared_ptr> partition) +{ + this->partition_ = std::move(partition); + + auto exec = this->get_executor(); + Array> global_data{ + exec, data.nonzeros.begin(), data.nonzeros.end()}; + read_local_impl(exec, this->get_communicator(), this->get_partition().get(), + data.size[1], global_data, this->get_local()); + + auto global_rows = static_cast(this->partition_->get_size()); + this->set_size({global_rows, data.size[1]}); +} + + +template +void Vector::read_distributed( + const device_matrix_data& data, + std::shared_ptr> partition) +{ + this->partition_ = std::move(partition); + + read_local_impl(this->get_executor(), this->get_communicator(), + this->get_partition().get(), data.size[1], data.nonzeros, + this->get_local()); + + auto global_rows = static_cast(this->partition_->get_size()); + this->set_size({global_rows, data.size[1]}); +} + + +template +std::unique_ptr< + typename Vector::complex_type> +Vector::make_complex() const +{ + auto result = complex_type::create( + this->get_executor(), this->get_communicator(), this->get_partition(), + this->get_size(), this->get_const_local()->get_size(), + this->get_const_local()->get_stride()); + this->make_complex(result.get()); + return result; +} + + +template +void Vector::make_complex( + Vector::complex_type* result) const +{ + this->get_const_local()->make_complex(result->get_local()); +} + + +template +std::unique_ptr< + typename Vector::real_type> +Vector::get_real() const +{ + auto result = real_type::create( + this->get_executor(), this->get_communicator(), this->get_partition(), + this->get_size(), this->get_const_local()->get_size(), + this->get_const_local()->get_stride()); + this->get_real(result.get()); + return result; +} + + +template +void Vector::get_real( + Vector::real_type* result) const +{ + this->get_const_local()->get_real(result->get_local()); +} + + +template +std::unique_ptr< + typename Vector::real_type> +Vector::get_imag() const +{ + auto result = real_type::create( + this->get_executor(), this->get_communicator(), this->get_partition(), + this->get_size(), this->get_const_local()->get_size(), + this->get_const_local()->get_stride()); + this->get_imag(result.get()); + return result; +} + + +template +void Vector::get_imag( + Vector::real_type* result) const +{ + this->get_const_local()->get_imag(result->get_local()); +} + + +template +void Vector::scale( + const LinOp* alpha) +{ + this->get_local()->scale(alpha); +} + + +template +void Vector::inv_scale( + const LinOp* alpha) +{ + this->get_local()->inv_scale(alpha); +} + + +template +void Vector::add_scaled( + const LinOp* alpha, const LinOp* b) +{ + auto dense_b = as>(b); + this->get_local()->add_scaled(alpha, dense_b->get_local()); +} + + +template +void Vector::sub_scaled( + const LinOp* alpha, const LinOp* b) +{ + auto dense_b = as>(b); + this->get_local()->sub_scaled(alpha, dense_b->get_const_local()); +} + + +template +void Vector::compute_dot( + const LinOp* b, LinOp* result) const +{ + auto exec = this->get_executor(); + auto dense_res = + make_temporary_clone(exec, as>(result)); + this->get_local()->compute_dot(as(b)->get_local(), dense_res.get()); + exec->synchronize(); + auto dense_res_host = + make_temporary_clone(exec->get_master(), dense_res.get()); + this->get_communicator().all_reduce(dense_res_host->get_values(), + static_cast(this->get_size()[1]), + MPI_SUM); + dense_res->copy_from(dense_res_host.get()); +} + + +template +void Vector::compute_conj_dot( + const LinOp* b, LinOp* result) const +{ + auto exec = this->get_executor(); + auto dense_res = + make_temporary_clone(exec, as>(result)); + this->get_local()->compute_conj_dot(as(b)->get_local(), + dense_res.get()); + exec->synchronize(); + this->get_communicator().all_reduce(dense_res->get_values(), + static_cast(this->get_size()[1]), + MPI_SUM); +} + + +template +void Vector::compute_norm2( + LinOp* result) const +{ + using NormVector = matrix::Dense>; + GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); + auto exec = this->get_executor(); + auto dense_res = make_temporary_clone(exec, as(result)); + exec->run( + vector::make_compute_norm2_sqr(this->get_local(), dense_res.get())); + exec->synchronize(); + this->get_communicator().all_reduce(dense_res->get_values(), + static_cast(this->get_size()[1]), + MPI_SUM); + exec->run(vector::make_compute_sqrt(dense_res.get())); +} + + +template +void Vector::compute_norm1( + LinOp* result) const +{ + using NormVector = typename local_mtx_type::absolute_type; + GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); + auto exec = this->get_executor(); + auto dense_res = make_temporary_clone(exec, as(result)); + this->get_const_local()->compute_norm1(dense_res.get()); + exec->synchronize(); + this->get_communicator().all_reduce(dense_res->get_values(), + static_cast(this->get_size()[1]), + MPI_SUM); +} + + +#define GKO_DECLARE_DISTRIBUTED_VECTOR(ValueType, LocalIndexType, \ + GlobalIndexType) \ + class Vector +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR); + + +} // namespace distributed +} // namespace gko diff --git a/core/distributed/vector_kernels.hpp b/core/distributed/vector_kernels.hpp new file mode 100644 index 00000000000..dbdaeb9c101 --- /dev/null +++ b/core/distributed/vector_kernels.hpp @@ -0,0 +1,82 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_DISTRIBUTED_VECTOR_KERNELS_HPP_ +#define GKO_CORE_DISTRIBUTED_VECTOR_KERNELS_HPP_ + + +// can't include ginkgo/core/distributed/vector.hpp since that requires linking +// against MPI +#include +#include +#include + + +#include "core/base/kernel_declaration.hpp" + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL(ValueType, LocalIndexType, \ + GlobalIndexType) \ + void build_local( \ + std::shared_ptr exec, \ + const Array>& input, \ + const distributed::Partition* \ + partition, \ + comm_index_type local_part, \ + Array>& local_data, \ + ValueType deduction_help) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + using comm_index_type = distributed::comm_index_type; \ + template \ + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL(ValueType, LocalIndexType, \ + GlobalIndexType) + + +GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(distributed_vector, + GKO_DECLARE_ALL_AS_TEMPLATES); + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_DISTRIBUTED_VECTOR_KERNELS_HPP_ diff --git a/core/test/utils.hpp b/core/test/utils.hpp index b37f6981e7c..3508ae9f2ee 100644 --- a/core/test/utils.hpp +++ b/core/test/utils.hpp @@ -184,6 +184,30 @@ using TwoValueIndexType = #endif +using ValueLocalGlobalIndexTypes = +#if GINKGO_DPCPP_SINGLE_MODE + ::testing::Types, + std::tuple, + std::tuple, + std::tuple, gko::int32, int32>, + std::tuple, gko::int32, int64>, + std::tuple, gko::int64, int64>>; +#else + ::testing::Types, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, gko::int32, int32>, + std::tuple, gko::int32, int64>, + std::tuple, gko::int64, int64>, + std::tuple, gko::int32, int32>, + std::tuple, gko::int32, int64>, + std::tuple, gko::int64, int64>>; +#endif + + template struct reduction_factor { using nc_output = remove_complex; diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index a76d0323f73..b8406d25875 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -76,6 +76,7 @@ target_sources(ginkgo_cuda base/version.cpp components/prefix_sum_kernels.cu distributed/partition_kernels.cu + distributed/partition_kernels.cu factorization/factorization_kernels.cu factorization/ic_kernels.cu factorization/ilu_kernels.cu diff --git a/cuda/distributed/vector_kernels.cu b/cuda/distributed/vector_kernels.cu new file mode 100644 index 00000000000..23288e6774a --- /dev/null +++ b/cuda/distributed/vector_kernels.cu @@ -0,0 +1,61 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/vector_kernels.hpp" + + +#include + + +namespace gko { +namespace kernels { +namespace cuda { +namespace distributed_vector { + + +template +void build_local( + std::shared_ptr exec, + const Array>& input, + const distributed::Partition* partition, + comm_index_type local_part, + Array>& local_data, + ValueType deduction_help) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); + + +} // namespace distributed_vector +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index 8fa9e8d2e0d..4e35d01ccff 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -21,6 +21,7 @@ target_sources(ginkgo_dpcpp base/version.dp.cpp components/prefix_sum_kernels.dp.cpp distributed/partition_kernels.dp.cpp + distributed/vector_kernels.dp.cpp factorization/ic_kernels.dp.cpp factorization/ilu_kernels.dp.cpp factorization/factorization_kernels.dp.cpp diff --git a/dpcpp/distributed/vector_kernels.dp.cpp b/dpcpp/distributed/vector_kernels.dp.cpp new file mode 100644 index 00000000000..dfe29d50e43 --- /dev/null +++ b/dpcpp/distributed/vector_kernels.dp.cpp @@ -0,0 +1,61 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/vector_kernels.hpp" + + +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { +namespace distributed_vector { + + +template +void build_local( + std::shared_ptr exec, + const Array>& input, + const distributed::Partition* partition, + comm_index_type local_part, + Array>& local_data, + ValueType deduction_help) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); + + +} // namespace distributed_vector +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index 9cc9cc67898..ab6258dc99b 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -165,6 +165,7 @@ set(GINKGO_HIP_SOURCES base/version.hip.cpp components/prefix_sum_kernels.hip.cpp distributed/partition_kernels.hip.cpp + distributed/vector_kernels.hip.cpp factorization/factorization_kernels.hip.cpp factorization/ic_kernels.hip.cpp factorization/ilu_kernels.hip.cpp diff --git a/hip/distributed/vector_kernels.hip.cpp b/hip/distributed/vector_kernels.hip.cpp new file mode 100644 index 00000000000..aa79659d99c --- /dev/null +++ b/hip/distributed/vector_kernels.hip.cpp @@ -0,0 +1,61 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/vector_kernels.hpp" + + +#include + + +namespace gko { +namespace kernels { +namespace hip { +namespace distributed_vector { + + +template +void build_local( + std::shared_ptr exec, + const Array>& input, + const distributed::Partition* partition, + comm_index_type local_part, + Array>& local_data, + ValueType deduction_help) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); + + +} // namespace distributed_vector +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index 009c2e182cd..3189a98830d 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -559,6 +559,73 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x, #endif +/** + * Instantiates a template for each non-complex value, local and global index + * type compiled by Ginkgo. + * + * @param _macro A macro which expands the template instantiation + * (not including the leading `template` specifier). + * Should take three arguments, which are replaced by the + * value, the local and the global index types. + */ +#if GINKGO_DPCPP_SINGLE_MODE +#define GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( \ + _macro) \ + template _macro(float, int32, int32); \ + template _macro(float, int32, int64); \ + template _macro(float, int64, int64); \ + template <> \ + _macro(double, int32, int32) GKO_NOT_IMPLEMENTED; \ + template <> \ + _macro(double, int32, int64) GKO_NOT_IMPLEMENTED; \ + template <> \ + _macro(double, int64, int64) GKO_NOT_IMPLEMENTED +#else +#define GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( \ + _macro) \ + template _macro(float, int32, int32); \ + template _macro(float, int32, int64); \ + template _macro(float, int64, int64); \ + template _macro(double, int32, int32); \ + template _macro(double, int32, int64); \ + template _macro(double, int64, int64) +#endif + + +/** + * Instantiates a template for each value and index type compiled by Ginkgo. + * + * @param _macro A macro which expands the template instantiation + * (not including the leading `template` specifier). + * Should take two arguments, which are replaced by the + * value and index types. + */ +#if GINKGO_DPCPP_SINGLE_MODE +#define GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE(_macro) \ + GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( \ + _macro); \ + template _macro(std::complex, int32, int32); \ + template _macro(std::complex, int32, int64); \ + template _macro(std::complex, int64, int64); \ + template <> \ + _macro(std::complex, int32, int32) GKO_NOT_IMPLEMENTED; \ + template <> \ + _macro(std::complex, int32, int64) GKO_NOT_IMPLEMENTED; \ + template <> \ + _macro(std::complex, int64, int64) GKO_NOT_IMPLEMENTED +#else +#define GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE(_macro) \ + GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( \ + _macro); \ + template _macro(std::complex, int32, int32); \ + template _macro(std::complex, int32, int64); \ + template _macro(std::complex, int64, int64); \ + template _macro(std::complex, int32, int32); \ + template _macro(std::complex, int32, int64); \ + template _macro(std::complex, int64, int64) +#endif + + #if GINKGO_DPCPP_SINGLE_MODE #define GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(_macro) \ template <> \ @@ -580,8 +647,6 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x, template <> \ _macro(std::complex, std::complex) GKO_NOT_IMPLEMENTED #else - - /** * Instantiates a template for each value type conversion pair compiled by * Ginkgo. @@ -693,7 +758,7 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x, template inline constexpr GKO_ATTRIBUTES IndexType invalid_index() { - static_assert(std::is_signed::value, + static_assert(std::is_integral::value, "IndexType needs to be signed"); return static_cast(-1); } diff --git a/include/ginkgo/core/distributed/base.hpp b/include/ginkgo/core/distributed/base.hpp new file mode 100644 index 00000000000..c588ba7e343 --- /dev/null +++ b/include/ginkgo/core/distributed/base.hpp @@ -0,0 +1,74 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_DISTRIBUTED_BASE_HPP_ +#define GKO_PUBLIC_CORE_DISTRIBUTED_BASE_HPP_ + + +#include +#include + + +namespace gko { +namespace distributed { + + +/** + * A base class for distributed objects. + * + * This class gives access to the used mpi::communicator object. + * + * @ingroup distributed + */ +class DistributedBase { +public: + virtual ~DistributedBase() = default; + + mpi::communicator get_communicator() const { return comm_; } + + explicit DistributedBase( + mpi::communicator comm = mpi::communicator(MPI_COMM_NULL)) + : comm_{comm} + {} + +protected: + void set_communicator(mpi::communicator comm) { comm_ = comm; } + +private: + mpi::communicator comm_; +}; + + +} // namespace distributed +} // namespace gko + +#endif // GKO_PUBLIC_CORE_DISTRIBUTED_BASE_HPP_ diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp index 3046bec36f4..c410257823d 100644 --- a/include/ginkgo/core/distributed/partition.hpp +++ b/include/ginkgo/core/distributed/partition.hpp @@ -93,6 +93,8 @@ namespace distributed { * exceed this index type's maximum value. * @tparam GlobalIndexType The index type used for the global indices. Needs * to be at least as large a type as LocalIndexType. + * + * @ingroup distributed */ template class Partition diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp new file mode 100644 index 00000000000..cce2571419e --- /dev/null +++ b/include/ginkgo/core/distributed/vector.hpp @@ -0,0 +1,334 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_DISTRIBUTED_VECTOR_HPP_ +#define GKO_PUBLIC_CORE_DISTRIBUTED_VECTOR_HPP_ + + +#include +#include +#include +#include +#include + + +namespace gko { +namespace distributed { + + +/** + * Vector is a format which explicitly stores (multiple) distributed column + * vectors in a dense storage format. + * + * The (multi-)vector is distributed by row, which is described by a @see + * Partition. The local vectors are stored using the @see Dense format. The + * vector should be filled using the read_distributed method, e.g. + * ``` + * auto part = Partition<...>::build_from_mapping(...); + * auto vector = Vector<...>::create(exec, comm); + * vector->read_distributed(matrix_data, part); + * ``` + * Using this approach the size of the global vectors, as well as the size of + * the local vectors, will be automatically inferred. It is possible to create a + * vector with specified global and local sizes and fill the local vectors using + * the accessor get_local. + * + * @tparam ValueType The precision of vector elements. + * @tparam LocalIndexType The index type for local indices used by the + * partition. + * @tparam GlobalIndexType The index type for the global indices used by the + * partition. + * + * @ingroup dist_vector + * @ingroup distributed + */ +template +class Vector + : public EnableLinOp>, + public EnableCreateMethod< + Vector>, + public ConvertibleTo< + Vector, LocalIndexType, GlobalIndexType>>, + public EnableAbsoluteComputation< + remove_complex>>, + public DistributedBase { + friend class EnableCreateMethod< + Vector>; + friend class EnablePolymorphicObject< + Vector, LinOp>; + friend class Vector, LocalIndexType, GlobalIndexType>; + friend class Vector, LocalIndexType, + GlobalIndexType>; + +public: + using EnableLinOp::convert_to; + using EnableLinOp::move_to; + + using value_type = ValueType; + using index_type = GlobalIndexType; + using local_index_type = LocalIndexType; + using global_index_type = GlobalIndexType; + using absolute_type = remove_complex; + using real_type = absolute_type; + using complex_type = + Vector, local_index_type, global_index_type>; + using local_mtx_type = gko::matrix::Dense; + + /** + * Reads a vector from the matrix_data structure and a global row partition. + * + * The number of rows of the matrix data is ignored, only its number of + * columns is relevant. The number of rows is inferred from the partition. + * + * @note The matrix data can contain entries for rows other than those owned + * by the process. Entries for those rows are discarded. + * + * @param data The matrix_data structure + * @param partition The global row partition + */ + void read_distributed( + const matrix_data& data, + std::shared_ptr> + partition); + + /** + * Reads a vector from the device_matrix_data structure and a global row + * partition. + * + * See @read_distributed + */ + void read_distributed( + const device_matrix_data& data, + std::shared_ptr> + partition); + + void convert_to(Vector, LocalIndexType, + GlobalIndexType>* result) const override; + + void move_to(Vector, LocalIndexType, + GlobalIndexType>* result) override; + + std::unique_ptr compute_absolute() const override; + + void compute_absolute_inplace() override; + + /** + * Creates a complex copy of the original vectors. If the original vectors + * were real, the imaginary part of the result will be zero. + */ + std::unique_ptr make_complex() const; + + /** + * Writes a complex copy of the original vectors to given complex vectors. + * If the original vectors were real, the imaginary part of the result will + * be zero. + */ + void make_complex(complex_type* result) const; + + /** + * Creates new real vectors and extracts the real part of the original + * vectors into that. + */ + std::unique_ptr get_real() const; + + /** + * Extracts the real part of the original vectors into given real vectors. + */ + void get_real(real_type* result) const; + + /** + * Creates new real vectors and extracts the imaginary part of the + * original vectors into that. + */ + std::unique_ptr get_imag() const; + + /** + * Extracts the imaginary part of the original vectors into given real + * vectors. + */ + void get_imag(real_type* result) const; + + /** + * Fill the distributed vectors with a given value. + * + * @param value the value to be filled + */ + void fill(ValueType value); + + /** + * Scales the vectors with a scalar (aka: BLAS scal). + * + * @param alpha If alpha is 1x1 Dense matrx, the all vectors are scaled + * by alpha. If it is a Dense row vector of values, + * then i-th column vector is scaled with the i-th + * element of alpha (the number of columns of alpha has to + * match the number of vectors). + */ + void scale(const LinOp* alpha); + + /** + * Scales the vectors with the inverse of a scalar. + * + * @param alpha If alpha is 1x1 Dense matrix, the all vectors are scaled + * by 1 / alpha. If it is a Dense row vector of values, + * then i-th column vector is scaled with the inverse + * of the i-th element of alpha (the number of columns of + * alpha has to match the number of vectors). + */ + void inv_scale(const LinOp* alpha); + + /** + * Adds `b` scaled by `alpha` to the vectors (aka: BLAS axpy). + * + * @param alpha If alpha is 1x1 Dense matrix, the all vectors of b are + * scaled by alpha. If it is a Dense row vector of values, then i-th column + * vector of b is scaled with the i-th element of alpha (the number of + * columns of alpha has to match the number of vectors). + * @param b a (multi-)vector of the same dimension as this + */ + void add_scaled(const LinOp* alpha, const LinOp* b); + + /** + * Subtracts `b` scaled by `alpha` from the vectors (aka: BLAS axpy). + * + * @param alpha If alpha is 1x1 Dense matrix, the all vectors of b are + * scaled by alpha. If it is a Dense row vector of values, then i-th column + * vector of b is scaled with the i-th element of alpha (the number of c + * @param b a (multi-)vector of the same dimension as this + */ + void sub_scaled(const LinOp* alpha, const LinOp* b); + + /** + * Computes the column-wise dot product of this (multi-)vector and `b` using + * a global reduction. + * + * @param b a (multi-)vector of same dimension as this + * @param result a Dense row matrix, used to store the dot product + * (the number of column in result must match the number + * of columns of this) + */ + void compute_dot(const LinOp* b, LinOp* result) const; + + /** + * Computes the column-wise dot product of this (multi-)vector and `conj(b)` + * using a global reduction. + * + * @param b a (multi-)vector of same dimension as this + * @param result a Dense row matrix, used to store the dot product + * (the number of column in result must match the number + * of columns of this) + */ + void compute_conj_dot(const LinOp* b, LinOp* result) const; + + /** + * Computes the Euclidian (L^2) norm of this (multi-)vector using a global + * reduction. + * + * @param result a Dense row matrix, used to store the norm + * (the number of columns in result must match the number + * of columns of this) + */ + void compute_norm2(LinOp* result) const; + + /** + * Computes the column-wise (L^1) norm of this (multi-)vector. + * + * @param result a Dense row matrix, used to store the norm + * (the number of columns in result must match the number + * of columns of this) + */ + void compute_norm1(LinOp* result) const; + + /** + * Direct (read) access to the underlying local local_mtx_type vectors. + * + * @return a constant pointer to the underlying local_mtx_type vectors + */ + const local_mtx_type* get_const_local() const; + + /* + * Direct (read/write) access to the underlying local_mtx_type Dense + * vectors. + * + * @return a constant pointer to the underlying local_mtx_type vectors + */ + local_mtx_type* get_local(); + + /** + * Access to the partition that defines these global vectors. + * + * @return a shared_ptr to the global row partition + */ + std::shared_ptr> + get_partition() const + { + return partition_; + } + +protected: + /** + * Creates an empty distributed vector with a specified size + * @param exec Executor associated with vector + * @param comm Communicator associated with vector, the default is + * MPI_COMM_WORLD + * @param partition Partition of global rows + * @param global_size Global size of the vector + * @param local_size Processor-local size of the vector + * @param stride Stride of the local vector. If not specified, it defaults + * to local_size[1] + */ + explicit Vector( + std::shared_ptr exec, + mpi::communicator comm = mpi::communicator(MPI_COMM_WORLD), + std::shared_ptr> + partition = nullptr, + dim<2> global_size = {}, dim<2> local_size = {}, + size_type stride = invalid_index()); + + void apply_impl(const LinOp*, LinOp*) const override; + + void apply_impl(const LinOp*, const LinOp*, const LinOp*, + LinOp*) const override; + +private: + std::shared_ptr> + partition_; + local_mtx_type local_; +}; + + +} // namespace distributed +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_DISTRIBUTED_VECTOR_HPP_ diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp index bcaba840fdf..63afd572083 100644 --- a/include/ginkgo/core/matrix/dense.hpp +++ b/include/ginkgo/core/matrix/dense.hpp @@ -47,6 +47,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { +namespace distributed { + + +template +class Vector; + + +} + + namespace matrix { @@ -137,6 +147,9 @@ class Dense friend class SparsityCsr; friend class SparsityCsr; friend class Dense>; + friend class distributed::Vector; + friend class distributed::Vector; + friend class distributed::Vector; public: using ReadableFromMatrixData::read; diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index cb4eeebe268..76f4122ab0f 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -70,6 +70,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#if GINKGO_BUILD_MPI +#include +#include +#endif #include #include diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index 38c42d1d5e1..d9fa42a4547 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -8,6 +8,7 @@ target_sources(ginkgo_omp base/version.cpp components/prefix_sum_kernels.cpp distributed/partition_kernels.cpp + distributed/vector_kernels.cpp factorization/factorization_kernels.cpp factorization/ic_kernels.cpp factorization/ilu_kernels.cpp diff --git a/omp/distributed/vector_kernels.cpp b/omp/distributed/vector_kernels.cpp new file mode 100644 index 00000000000..f6a35cbcb06 --- /dev/null +++ b/omp/distributed/vector_kernels.cpp @@ -0,0 +1,150 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/vector_kernels.hpp" + + +#include + + +#include "core/base/allocator.hpp" +#include "core/components/prefix_sum_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace omp { +namespace distributed_vector { + + +template +void build_local( + std::shared_ptr exec, + const Array>& input, + const distributed::Partition* partition, + comm_index_type local_part, + Array>& local_data, + ValueType deduction_help) +{ + using range_index_type = GlobalIndexType; + using part_index_type = comm_index_type; + auto input_data = input.get_const_data(); + auto range_bounds = partition->get_range_bounds(); + auto range_parts = partition->get_part_ids(); + auto range_starting_indices = partition->get_range_starting_indices(); + auto num_ranges = partition->get_num_ranges(); + + // helpers for retrieving range info + struct range_info { + range_index_type index{}; + GlobalIndexType begin{}; + GlobalIndexType end{}; + LocalIndexType base_rank{}; + part_index_type part{}; + }; + auto find_range = [&](GlobalIndexType idx) { + auto it = std::upper_bound(range_bounds + 1, + range_bounds + num_ranges + 1, idx); + return std::distance(range_bounds + 1, it); + }; + auto update_range = [&](GlobalIndexType idx, range_info& info) { + if (idx < info.begin || idx >= info.end) { + info.index = find_range(idx); + info.begin = range_bounds[info.index]; + info.end = range_bounds[info.index + 1]; + info.base_rank = range_starting_indices[info.index]; + info.part = range_parts[info.index]; + // assert(info.index < num_ranges); + } + // assert(idx >= info.begin && idx < info.end); + }; + auto map_to_local = [&](GlobalIndexType idx, + range_info info) -> LocalIndexType { + return static_cast(idx - info.begin) + info.base_rank; + }; + + range_info row_range{}; + auto num_threads = omp_get_max_threads(); + vector partial_counts(num_threads + 1, exec); + auto work_per_thread = + static_cast(ceildiv(input.get_num_elems(), num_threads)); +#pragma omp parallel + { + auto thread_id = omp_get_thread_num(); + auto begin = static_cast(work_per_thread * thread_id); + auto end = std::min(begin + work_per_thread, input.get_num_elems()); + size_type count{}; + for (auto i = begin; i < end; i++) { + auto entry = input_data[i]; + update_range(entry.row, row_range); + // skip non-local rows + if (row_range.part != local_part) { + continue; + } + count++; + } + partial_counts[thread_id] = count; + } + + components::prefix_sum(exec, partial_counts.data(), num_threads + 1); + + local_data.resize_and_reset(partial_counts.back()); +#pragma omp parallel + { + auto thread_id = omp_get_thread_num(); + auto begin = static_cast(work_per_thread * thread_id); + auto end = std::min(begin + work_per_thread, input.get_num_elems()); + auto idx = partial_counts[thread_id]; + for (auto i = begin; i < end; i++) { + auto entry = input_data[i]; + update_range(entry.row, row_range); + // skip non-local rows + if (row_range.part != local_part) { + continue; + } + local_data.get_data()[idx] = { + // map global row idx to local row idx + map_to_local(entry.row, row_range), + static_cast(entry.column), entry.value}; + idx++; + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); + + +} // namespace distributed_vector +} // namespace omp +} // namespace kernels +} // namespace gko diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index f408fd0c7ef..bbfb7e0b76d 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -11,6 +11,7 @@ target_sources(ginkgo_reference components/precision_conversion_kernels.cpp components/prefix_sum_kernels.cpp distributed/partition_kernels.cpp + distributed/vector_kernels.cpp factorization/factorization_kernels.cpp factorization/ic_kernels.cpp factorization/ilu_kernels.cpp diff --git a/reference/distributed/vector_kernels.cpp b/reference/distributed/vector_kernels.cpp new file mode 100644 index 00000000000..5b144c604e4 --- /dev/null +++ b/reference/distributed/vector_kernels.cpp @@ -0,0 +1,130 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/vector_kernels.hpp" + + +#include "core/base/allocator.hpp" +#include "core/components/prefix_sum_kernels.hpp" + +namespace gko { +namespace kernels { +namespace reference { +namespace distributed_vector { + + +template +void build_local( + std::shared_ptr exec, + const Array>& input, + const distributed::Partition* partition, + comm_index_type local_part, + Array>& local_data, + ValueType deduction_help) +{ + using range_index_type = GlobalIndexType; + using part_index_type = comm_index_type; + using global_nonzero = matrix_data_entry; + using local_nonzero = matrix_data_entry; + auto input_data = input.get_const_data(); + auto range_bounds = partition->get_range_bounds(); + auto range_parts = partition->get_part_ids(); + auto range_starting_indices = partition->get_range_starting_indices(); + auto num_parts = partition->get_num_parts(); + auto num_ranges = partition->get_num_ranges(); + + // helpers for retrieving range info + struct range_info { + range_index_type index{}; + GlobalIndexType begin{}; + GlobalIndexType end{}; + LocalIndexType base_rank{}; + part_index_type part{}; + }; + auto find_range = [&](GlobalIndexType idx) { + auto it = std::upper_bound(range_bounds + 1, + range_bounds + num_ranges + 1, idx); + return std::distance(range_bounds + 1, it); + }; + auto update_range = [&](GlobalIndexType idx, range_info& info) { + if (idx < info.begin || idx >= info.end) { + info.index = find_range(idx); + info.begin = range_bounds[info.index]; + info.end = range_bounds[info.index + 1]; + info.base_rank = range_starting_indices[info.index]; + info.part = range_parts[info.index]; + // assert(info.index < num_ranges); + } + // assert(idx >= info.begin && idx < info.end); + }; + auto map_to_local = [&](GlobalIndexType idx, + range_info info) -> LocalIndexType { + return static_cast(idx - info.begin) + info.base_rank; + }; + + range_info row_range{}; + size_type count{}; + for (size_type i = 0; i < input.get_num_elems(); ++i) { + auto entry = input_data[i]; + update_range(entry.row, row_range); + // skip non-local rows + if (row_range.part != local_part) { + continue; + } + count++; + } + + local_data.resize_and_reset(count); + size_type idx{}; + for (size_type i = 0; i < input.get_num_elems(); ++i) { + auto entry = input_data[i]; + update_range(entry.row, row_range); + // skip non-local rows + if (row_range.part != local_part) { + continue; + } + local_data.get_data()[idx] = {// map global row idx to local row idx + map_to_local(entry.row, row_range), + static_cast(entry.column), + entry.value}; + idx++; + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); + + +} // namespace distributed_vector +} // namespace reference +} // namespace kernels +} // namespace gko diff --git a/reference/test/distributed/CMakeLists.txt b/reference/test/distributed/CMakeLists.txt index 78a626512af..9dfe653db9c 100644 --- a/reference/test/distributed/CMakeLists.txt +++ b/reference/test/distributed/CMakeLists.txt @@ -1 +1,2 @@ ginkgo_create_test(partition_kernels) +ginkgo_create_test(vector_kernels) diff --git a/reference/test/distributed/vector_kernels.cpp b/reference/test/distributed/vector_kernels.cpp new file mode 100644 index 00000000000..7b47ed38031 --- /dev/null +++ b/reference/test/distributed/vector_kernels.cpp @@ -0,0 +1,165 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/vector_kernels.hpp" + + +#include +#include +#include + + +#include +#include + + +#include +#include + + +#include "core/test/utils.hpp" + + +namespace { + +using comm_index_type = gko::distributed::comm_index_type; + + +template +class Vector : public ::testing::Test { +protected: + using value_type = typename std::tuple_element< + 0, decltype(ValueLocalGlobalIndexType())>::type; + using local_index_type = typename std::tuple_element< + 1, decltype(ValueLocalGlobalIndexType())>::type; + using global_index_type = typename std::tuple_element< + 2, decltype(ValueLocalGlobalIndexType())>::type; + using local_entry = gko::matrix_data_entry; + using global_entry = gko::matrix_data_entry; + + Vector() + : ref(gko::ReferenceExecutor::create()), + mapping{ref}, + input{ref}, + output{ref} + {} + + void validate(const gko::distributed::Partition< + local_index_type, global_index_type>* partition, + std::initializer_list input_entries, + std::initializer_list> + output_entries) + { + std::vector> ref_outputs; + + input = gko::Array{ref, input_entries}; + for (auto entry : output_entries) { + ref_outputs.push_back(gko::Array{ref, entry}); + } + + for (comm_index_type part = 0; part < partition->get_num_parts(); + ++part) { + gko::kernels::reference::distributed_vector::build_local( + ref, input, partition, part, output, value_type{}); + + GKO_ASSERT_ARRAY_EQ(output, ref_outputs[part]); + } + } + + std::shared_ptr ref; + gko::Array mapping; + gko::Array input; + gko::Array output; +}; + +TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); + + +TYPED_TEST(Vector, BuildsLocalEmpty) +{ + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + this->mapping = {this->ref, {1, 0, 2, 2, 0, 1, 1, 2}}; + comm_index_type num_parts = 3; + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + this->mapping, + num_parts); + + this->validate(partition.get(), {}, {{}, {}, {}}); +} + + +TYPED_TEST(Vector, BuildsLocalSmall) +{ + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + this->mapping = {this->ref, {1, 0}}; + comm_index_type num_parts = 2; + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + this->mapping, + num_parts); + + this->validate(partition.get(), + {{0, 0, 1}, {0, 1, 2}, {1, 0, 3}, {1, 1, 4}}, + {{{0, 0, 3}, {0, 1, 4}}, {{0, 0, 1}, {0, 1, 2}}}); +} + + +TYPED_TEST(Vector, BuildsLocal) +{ + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + this->mapping = {this->ref, {1, 2, 0, 0, 2, 1}}; + comm_index_type num_parts = 3; + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + this->mapping, + num_parts); + + this->validate(partition.get(), + {{0, 0, 1}, + {0, 1, 2}, + {1, 2, 3}, + {1, 3, 4}, + {2, 4, 5}, + {3, 5, 6}, + {4, 6, 7}, + {5, 7, 8}}, + {{{0, 4, 5}, {1, 5, 6}}, + {{0, 0, 1}, {0, 1, 2}, {1, 7, 8}}, + {{0, 2, 3}, {0, 3, 4}, {1, 6, 7}}}); +} + + +} // namespace From 33bd901833275e398568c835017f863eaa600ffc Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 22 Dec 2021 11:27:25 +0100 Subject: [PATCH 03/38] add tests for distributed vector --- core/test/mpi/CMakeLists.txt | 1 + core/test/mpi/distributed/CMakeLists.txt | 1 + core/test/mpi/distributed/vector.cpp | 329 +++++++++++++++++++++++ 3 files changed, 331 insertions(+) create mode 100644 core/test/mpi/distributed/CMakeLists.txt create mode 100644 core/test/mpi/distributed/vector.cpp diff --git a/core/test/mpi/CMakeLists.txt b/core/test/mpi/CMakeLists.txt index 8edc6781c4e..afbd19bacae 100644 --- a/core/test/mpi/CMakeLists.txt +++ b/core/test/mpi/CMakeLists.txt @@ -5,3 +5,4 @@ target_sources(gtest_mpi_main find_package(MPI REQUIRED) target_link_libraries(gtest_mpi_main PRIVATE GTest::GTest MPI::MPI_CXX) add_subdirectory(base) +add_subdirectory(distributed) diff --git a/core/test/mpi/distributed/CMakeLists.txt b/core/test/mpi/distributed/CMakeLists.txt new file mode 100644 index 00000000000..d48945a104a --- /dev/null +++ b/core/test/mpi/distributed/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_mpi_test(vector 3) diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp new file mode 100644 index 00000000000..c5695cf3a9e --- /dev/null +++ b/core/test/mpi/distributed/vector.cpp @@ -0,0 +1,329 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include + + +namespace { + + +template +class Vector : public ::testing::Test { +public: + using value_type = typename std::tuple_element< + 0, decltype(ValueLocalGlobalIndexType())>::type; + using local_index_type = typename std::tuple_element< + 1, decltype(ValueLocalGlobalIndexType())>::type; + using global_index_type = typename std::tuple_element< + 2, decltype(ValueLocalGlobalIndexType())>::type; + using part_type = + gko::distributed::Partition; + using md_type = gko::matrix_data; + using d_md_type = gko::device_matrix_data; + using dist_vec_type = gko::distributed::Vector; + using dense_type = gko::matrix::Dense; + using nz_type = gko::matrix_data_entry; + + Vector() + : ref(gko::ReferenceExecutor::create()), + comm(MPI_COMM_WORLD), + part(gko::share(part_type::build_from_contiguous( + this->ref, {ref, {0, 2, 4, 6}}))), + md_a{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}, + md_b{{10, -11}, {8, -9}, {-6, 7}, {4, -5}, {2, -3}, {0, 1}}, + vec_a(dist_vec_type::create(ref, comm, part)), + vec_b(dist_vec_type::create(ref, comm, part)) + { + vec_a->read_distributed(md_a, part); + vec_b->read_distributed(md_b, part); + } + + std::shared_ptr ref; + gko::mpi::communicator comm; + std::shared_ptr part; + + md_type md_a; + md_type md_b; + + std::unique_ptr vec_a; + std::unique_ptr vec_b; +}; + + +TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); + +TYPED_TEST(Vector, CanReadGlobalMatrixData) +{ + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); + auto rank = this->comm.rank(); + + vec->read_distributed(this->md_a, this->part); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(2, 2)); + I> ref_data[3] = { + {{0, 1}, {2, 3}}, + {{4, 5}, {6, 7}}, + {{8, 9}, {10, 11}}, + }; + GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], r::value); +} + + +TYPED_TEST(Vector, CanReadGlobalMatrixDataSomeEmpty) +{ + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + auto part = gko::share( + part_type::build_from_contiguous(this->ref, {this->ref, {0, 0, 6, 6}})); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); + auto rank = this->comm.rank(); + + vec->read_distributed(this->md_a, part); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + if (rank == 1) { + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(6, 2)); + GKO_ASSERT_MTX_NEAR( + vec->get_local(), + l({{0., 1.}, {2., 3.}, {4., 5.}, {6., 7.}, {8., 9.}, {10., 11.}}), + r::value); + } else { + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(0, 2)); + } +} + + +TYPED_TEST(Vector, CanReadGlobalDeviceMatrixData) +{ + using d_md_type = typename TestFixture::d_md_type; + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + using nz_type = typename TestFixture::nz_type; + d_md_type md{gko::dim<2>{6, 2}, + gko::Array{this->ref, I{{0, 0, 0}, + {0, 1, 1}, + {1, 0, 2}, + {1, 1, 3}, + {2, 0, 4}, + {2, 1, 5}, + {3, 0, 6}, + {3, 1, 7}, + {4, 0, 8}, + {4, 1, 9}, + {5, 0, 10}, + {5, 1, 11}}}}; + auto part = gko::share( + part_type::build_from_contiguous(this->ref, {this->ref, {0, 2, 4, 6}})); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); + auto rank = this->comm.rank(); + I> ref_data[3] = { + {{0, 1}, {2, 3}}, + {{4, 5}, {6, 7}}, + {{8, 9}, {10, 11}}, + }; + + vec->read_distributed(md, part); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(2, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], r::value); +} + +TYPED_TEST(Vector, CanReadGlobalMatrixDataScattered) +{ + using md_type = typename TestFixture::md_type; + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + md_type md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}; + auto part = gko::share(part_type::build_from_mapping( + this->ref, {this->ref, {0, 1, 2, 0, 2, 0}}, 3)); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); + auto rank = this->comm.rank(); + gko::dim<2> ref_size[3] = {{3, 2}, {1, 2}, {2, 2}}; + I> ref_data[3] = { + {{0, 1}, {6, 7}, {10, 11}}, + {{2, 3}}, + {{4, 5}, {8, 9}}, + }; + + vec->read_distributed(md, part); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), ref_size[rank]); + GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], r::value); +} + + +TYPED_TEST(Vector, CanReadLocalMatrixData) +{ + using md_type = typename TestFixture::md_type; + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + md_type md[3] = { + {gko::dim<2>{6, 2}, {{0, 0, 0}, {0, 1, 1}, {1, 0, 2}, {1, 1, 3}}}, + {gko::dim<2>{6, 2}, {{2, 0, 4}, {2, 1, 5}, {3, 0, 6}, {3, 1, 7}}}, + {gko::dim<2>{6, 2}, {{4, 0, 8}, {4, 1, 9}, {5, 0, 10}, {5, 1, 11}}}}; + auto part = gko::share( + part_type::build_from_contiguous(this->ref, {this->ref, {0, 2, 4, 6}})); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); + auto rank = this->comm.rank(); + I> ref_data[3] = { + {{0, 1}, {2, 3}}, + {{4, 5}, {6, 7}}, + {{8, 9}, {10, 11}}, + }; + + vec->read_distributed(md[rank], part); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(2, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], r::value); +} + + +TYPED_TEST(Vector, CanReadLocalMatrixDataSomeEmpty) +{ + using md_type = typename TestFixture::md_type; + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + md_type md[3] = {{gko::dim<2>{6, 2}, {}}, + {gko::dim<2>{6, 2}, + // clang-format off + {{0, 0, 0}, {0, 1, 1}, + {1, 0, 2}, {1, 1, 3}, + {2, 0, 4}, {2, 1, 5}, + {3, 0, 6}, {3, 1, 7}, + {4, 0, 8}, {4, 1, 9}, + {5, 0, 10}, {5, 1, 11}}}, + // clang-format on + {gko::dim<2>{6, 2}, {}}}; + auto part = gko::share( + part_type::build_from_contiguous(this->ref, {this->ref, {0, 0, 6, 6}})); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); + auto rank = this->comm.rank(); + + vec->read_distributed(md[rank], part); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + if (rank == 1) { + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(6, 2)); + GKO_ASSERT_MTX_NEAR( + vec->get_local(), + I>( + {{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}), + r::value); + } else { + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(0, 2)); + } +} + + +TYPED_TEST(Vector, ComputesDotProduct) +{ + using dense_type = typename TestFixture::dense_type; + using value_type = typename TestFixture::value_type; + auto res = dense_type::create(this->ref, gko::dim<2>{1, 2}); + auto ref_res = + gko::initialize(I>{{32, -54}}, this->ref); + + this->vec_a->compute_dot(this->vec_b.get(), res.get()); + + GKO_ASSERT_MTX_NEAR(res, ref_res, r::value); +} + + +TYPED_TEST(Vector, ComputesConjDot) +{ + using dense_type = typename TestFixture::dense_type; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::global_index_type; + using real_type = typename gko::remove_complex; + using dist_vec_type = typename TestFixture::dist_vec_type; + auto md_a = gko::test::generate_random_matrix_data( + 6, 2, std::uniform_int_distribution(2, 2), + std::normal_distribution(0, 1), std::ranlux48{42}); + auto md_b = gko::test::generate_random_matrix_data( + 6, 2, std::uniform_int_distribution(2, 2), + std::normal_distribution(0, 1), std::ranlux48{42}); + auto dist_vec_a = dist_vec_type::create(this->ref, this->comm, this->part); + auto dist_vec_b = dist_vec_type::create(this->ref, this->comm, this->part); + auto dense_vec_a = dense_type::create(this->ref); + auto dense_vec_b = dense_type::create(this->ref); + dist_vec_a->read_distributed(md_a, this->part); + dist_vec_b->read_distributed(md_b, this->part); + dense_vec_a->read(md_a); + dense_vec_b->read(md_b); + auto res = dense_type::create(this->ref, gko::dim<2>{1, 2}); + auto ref_res = dense_type::create(this->ref, gko::dim<2>{1, 2}); + + dist_vec_a->compute_dot(dist_vec_b.get(), res.get()); + dense_vec_a->compute_dot(dense_vec_b.get(), ref_res.get()); + + GKO_ASSERT_MTX_NEAR(res, ref_res, r::value); +} + + +TYPED_TEST(Vector, ComputesNorm) +{ + using dense_type = typename TestFixture::dense_type; + using value_type = typename TestFixture::value_type; + auto res = dense_type::absolute_type::create(this->ref, gko::dim<2>{1, 2}); + auto ref_res = gko::initialize( + {{static_cast>(std::sqrt(220)), + static_cast>(std::sqrt(286))}}, + this->ref); + + this->vec_a->compute_norm2(res.get()); + + GKO_ASSERT_MTX_NEAR(res, ref_res, r::value); +} + +} // namespace From 4e6d5de82607dfa20d0b5a754a8bbf1fdfacb7a7 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 22 Dec 2021 11:29:58 +0100 Subject: [PATCH 04/38] rework read distributed reference kernels --- reference/distributed/vector_kernels.cpp | 67 +++++++++--------------- 1 file changed, 25 insertions(+), 42 deletions(-) diff --git a/reference/distributed/vector_kernels.cpp b/reference/distributed/vector_kernels.cpp index 5b144c604e4..684017153ff 100644 --- a/reference/distributed/vector_kernels.cpp +++ b/reference/distributed/vector_kernels.cpp @@ -51,72 +51,55 @@ void build_local( Array>& local_data, ValueType deduction_help) { - using range_index_type = GlobalIndexType; - using part_index_type = comm_index_type; - using global_nonzero = matrix_data_entry; - using local_nonzero = matrix_data_entry; auto input_data = input.get_const_data(); auto range_bounds = partition->get_range_bounds(); auto range_parts = partition->get_part_ids(); auto range_starting_indices = partition->get_range_starting_indices(); - auto num_parts = partition->get_num_parts(); auto num_ranges = partition->get_num_ranges(); - // helpers for retrieving range info - struct range_info { - range_index_type index{}; - GlobalIndexType begin{}; - GlobalIndexType end{}; - LocalIndexType base_rank{}; - part_index_type part{}; - }; - auto find_range = [&](GlobalIndexType idx) { - auto it = std::upper_bound(range_bounds + 1, - range_bounds + num_ranges + 1, idx); - return std::distance(range_bounds + 1, it); - }; - auto update_range = [&](GlobalIndexType idx, range_info& info) { - if (idx < info.begin || idx >= info.end) { - info.index = find_range(idx); - info.begin = range_bounds[info.index]; - info.end = range_bounds[info.index + 1]; - info.base_rank = range_starting_indices[info.index]; - info.part = range_parts[info.index]; - // assert(info.index < num_ranges); + auto find_range = [&](GlobalIndexType idx, size_type hint) { + if (range_bounds[hint] <= idx && idx < range_bounds[hint + 1]) { + return hint; + } else { + auto it = std::upper_bound(range_bounds + 1, + range_bounds + num_ranges + 1, idx); + return static_cast(std::distance(range_bounds + 1, it)); } - // assert(idx >= info.begin && idx < info.end); }; auto map_to_local = [&](GlobalIndexType idx, - range_info info) -> LocalIndexType { - return static_cast(idx - info.begin) + info.base_rank; + size_type range_id) -> LocalIndexType { + return static_cast(idx - range_bounds[range_id]) + + range_starting_indices[range_id]; }; - range_info row_range{}; size_type count{}; + size_type range_id_hint = 0; for (size_type i = 0; i < input.get_num_elems(); ++i) { auto entry = input_data[i]; - update_range(entry.row, row_range); + auto range_id = find_range(entry.row, range_id_hint); + range_id_hint = range_id; + auto part_id = range_parts[range_id]; // skip non-local rows - if (row_range.part != local_part) { - continue; + if (part_id == local_part) { + count++; } - count++; } local_data.resize_and_reset(count); size_type idx{}; for (size_type i = 0; i < input.get_num_elems(); ++i) { auto entry = input_data[i]; - update_range(entry.row, row_range); + auto range_id = find_range(entry.row, range_id_hint); + range_id_hint = range_id; + auto part_id = range_parts[range_id]; // skip non-local rows - if (row_range.part != local_part) { - continue; + if (part_id == local_part) { + local_data.get_data()[idx] = { + // map global row idx to local row idx + map_to_local(entry.row, range_id), + static_cast(entry.column), entry.value}; + idx++; } - local_data.get_data()[idx] = {// map global row idx to local row idx - map_to_local(entry.row, row_range), - static_cast(entry.column), - entry.value}; - idx++; } } From 9786387acf1370ae46960f7d0ecb2127d6ec1c2f Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 24 Jan 2022 16:01:24 +0100 Subject: [PATCH 05/38] fix formatting --- core/distributed/vector.cpp | 2 +- core/distributed/vector_kernels.hpp | 2 +- core/test/mpi/distributed/vector.cpp | 6 ++++-- cuda/distributed/vector_kernels.cu | 2 +- dpcpp/distributed/vector_kernels.dp.cpp | 2 +- hip/distributed/vector_kernels.hip.cpp | 2 +- include/ginkgo/core/distributed/base.hpp | 7 +++++++ include/ginkgo/core/distributed/vector.hpp | 6 ++++++ include/ginkgo/ginkgo.hpp | 4 +--- omp/distributed/vector_kernels.cpp | 2 +- reference/distributed/vector_kernels.cpp | 2 +- reference/test/distributed/vector_kernels.cpp | 6 ++---- 12 files changed, 27 insertions(+), 16 deletions(-) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 8bc4054d53c..0764620c6de 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors +Copyright (c) 2017-2022, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/distributed/vector_kernels.hpp b/core/distributed/vector_kernels.hpp index dbdaeb9c101..a1021c7fcb3 100644 --- a/core/distributed/vector_kernels.hpp +++ b/core/distributed/vector_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors +Copyright (c) 2017-2022, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp index c5695cf3a9e..5206c943bc7 100644 --- a/core/test/mpi/distributed/vector.cpp +++ b/core/test/mpi/distributed/vector.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors +Copyright (c) 2017-2022, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -36,11 +36,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #include #include +#include "core/test/utils.hpp" + + namespace { diff --git a/cuda/distributed/vector_kernels.cu b/cuda/distributed/vector_kernels.cu index 23288e6774a..da09b9f7ad2 100644 --- a/cuda/distributed/vector_kernels.cu +++ b/cuda/distributed/vector_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors +Copyright (c) 2017-2022, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/dpcpp/distributed/vector_kernels.dp.cpp b/dpcpp/distributed/vector_kernels.dp.cpp index dfe29d50e43..6927677ad21 100644 --- a/dpcpp/distributed/vector_kernels.dp.cpp +++ b/dpcpp/distributed/vector_kernels.dp.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors +Copyright (c) 2017-2022, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/distributed/vector_kernels.hip.cpp b/hip/distributed/vector_kernels.hip.cpp index aa79659d99c..3d4e100c7c7 100644 --- a/hip/distributed/vector_kernels.hip.cpp +++ b/hip/distributed/vector_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors +Copyright (c) 2017-2022, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/include/ginkgo/core/distributed/base.hpp b/include/ginkgo/core/distributed/base.hpp index c588ba7e343..d69e137bf78 100644 --- a/include/ginkgo/core/distributed/base.hpp +++ b/include/ginkgo/core/distributed/base.hpp @@ -38,6 +38,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#if GINKGO_BUILD_MPI + + namespace gko { namespace distributed { @@ -71,4 +74,8 @@ class DistributedBase { } // namespace distributed } // namespace gko + +#endif + + #endif // GKO_PUBLIC_CORE_DISTRIBUTED_BASE_HPP_ diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index cce2571419e..4a6af294a06 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -41,6 +41,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#if GINKGO_BUILD_MPI + + namespace gko { namespace distributed { @@ -331,4 +334,7 @@ class Vector } // namespace gko +#endif + + #endif // GKO_PUBLIC_CORE_DISTRIBUTED_VECTOR_HPP_ diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 76f4122ab0f..4d009402039 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -69,11 +69,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#include -#if GINKGO_BUILD_MPI #include +#include #include -#endif #include #include diff --git a/omp/distributed/vector_kernels.cpp b/omp/distributed/vector_kernels.cpp index f6a35cbcb06..1a6fd51a95d 100644 --- a/omp/distributed/vector_kernels.cpp +++ b/omp/distributed/vector_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors +Copyright (c) 2017-2022, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/reference/distributed/vector_kernels.cpp b/reference/distributed/vector_kernels.cpp index 684017153ff..2605e4b98f9 100644 --- a/reference/distributed/vector_kernels.cpp +++ b/reference/distributed/vector_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors +Copyright (c) 2017-2022, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/reference/test/distributed/vector_kernels.cpp b/reference/test/distributed/vector_kernels.cpp index 7b47ed38031..c8a08a97ce8 100644 --- a/reference/test/distributed/vector_kernels.cpp +++ b/reference/test/distributed/vector_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors +Copyright (c) 2017-2022, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,9 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include "core/distributed/vector_kernels.hpp" - - #include #include #include @@ -46,6 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/distributed/vector_kernels.hpp" #include "core/test/utils.hpp" From 71a9eaec25d88edf5c1b577b542e132fe056b1cc Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 24 Jan 2022 16:04:10 +0100 Subject: [PATCH 06/38] make name of constant accessor more reflective --- core/distributed/vector.cpp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 0764620c6de..449bb610db2 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -83,7 +83,7 @@ void Vector::convert_to( result->set_size(this->get_size()); result->set_communicator(this->get_communicator()); result->partition_ = this->partition_; - this->get_local()->convert_to(result->get_local()); + this->get_const_local()->convert_to(result->get_local()); } @@ -104,9 +104,9 @@ Vector::compute_absolute() const auto result = absolute_type::create(exec, this->get_communicator(), this->get_partition(), this->get_size(), - this->get_local()->get_size()); + this->get_const_local()->get_size()); - exec->run(vector::make_outplace_absolute_dense(this->get_local(), + exec->run(vector::make_outplace_absolute_dense(this->get_const_local(), result->get_local())); return result; @@ -124,7 +124,7 @@ void Vector const typename Vector::local_mtx_type* -Vector::get_local() const +Vector::get_const_local() const { return &local_; } @@ -299,7 +299,7 @@ void Vector::add_scaled( const LinOp* alpha, const LinOp* b) { auto dense_b = as>(b); - this->get_local()->add_scaled(alpha, dense_b->get_local()); + this->get_local()->add_scaled(alpha, dense_b->get_const_local()); } @@ -319,7 +319,8 @@ void Vector::compute_dot( auto exec = this->get_executor(); auto dense_res = make_temporary_clone(exec, as>(result)); - this->get_local()->compute_dot(as(b)->get_local(), dense_res.get()); + this->get_const_local()->compute_dot(as(b)->get_const_local(), + dense_res.get()); exec->synchronize(); auto dense_res_host = make_temporary_clone(exec->get_master(), dense_res.get()); @@ -337,8 +338,8 @@ void Vector::compute_conj_dot( auto exec = this->get_executor(); auto dense_res = make_temporary_clone(exec, as>(result)); - this->get_local()->compute_conj_dot(as(b)->get_local(), - dense_res.get()); + this->get_const_local()->compute_conj_dot(as(b)->get_const_local(), + dense_res.get()); exec->synchronize(); this->get_communicator().all_reduce(dense_res->get_values(), static_cast(this->get_size()[1]), @@ -350,12 +351,12 @@ template void Vector::compute_norm2( LinOp* result) const { - using NormVector = matrix::Dense>; + using NormVector = typename local_mtx_type::absolute_type; GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); auto exec = this->get_executor(); auto dense_res = make_temporary_clone(exec, as(result)); - exec->run( - vector::make_compute_norm2_sqr(this->get_local(), dense_res.get())); + exec->run(vector::make_compute_norm2_sqr(this->get_const_local(), + dense_res.get())); exec->synchronize(); this->get_communicator().all_reduce(dense_res->get_values(), static_cast(this->get_size()[1]), From fe94d0db6545f79cc55a59f38225a24c088c1cc7 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 24 Jan 2022 16:11:34 +0100 Subject: [PATCH 07/38] update local vector typename --- core/distributed/vector.cpp | 8 ++++---- include/ginkgo/core/distributed/vector.hpp | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 449bb610db2..379f3eb47fa 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -123,7 +123,7 @@ void Vector const typename Vector::local_mtx_type* + GlobalIndexType>::local_vector_type* Vector::get_const_local() const { return &local_; @@ -131,7 +131,7 @@ Vector::get_const_local() const template -typename Vector::local_mtx_type* +typename Vector::local_vector_type* Vector::get_local() { return &local_; @@ -351,7 +351,7 @@ template void Vector::compute_norm2( LinOp* result) const { - using NormVector = typename local_mtx_type::absolute_type; + using NormVector = typename local_vector_type::absolute_type; GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); auto exec = this->get_executor(); auto dense_res = make_temporary_clone(exec, as(result)); @@ -369,7 +369,7 @@ template void Vector::compute_norm1( LinOp* result) const { - using NormVector = typename local_mtx_type::absolute_type; + using NormVector = typename local_vector_type::absolute_type; GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); auto exec = this->get_executor(); auto dense_res = make_temporary_clone(exec, as(result)); diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index 4a6af294a06..2107b374555 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -105,7 +105,7 @@ class Vector using real_type = absolute_type; using complex_type = Vector, local_index_type, global_index_type>; - using local_mtx_type = gko::matrix::Dense; + using local_vector_type = gko::matrix::Dense; /** * Reads a vector from the matrix_data structure and a global row partition. @@ -273,19 +273,19 @@ class Vector void compute_norm1(LinOp* result) const; /** - * Direct (read) access to the underlying local local_mtx_type vectors. + * Direct (read) access to the underlying local local_vector_type vectors. * - * @return a constant pointer to the underlying local_mtx_type vectors + * @return a constant pointer to the underlying local_vector_type vectors */ - const local_mtx_type* get_const_local() const; + const local_vector_type* get_const_local() const; /* - * Direct (read/write) access to the underlying local_mtx_type Dense + * Direct (read/write) access to the underlying local_vector_type Dense * vectors. * - * @return a constant pointer to the underlying local_mtx_type vectors + * @return a constant pointer to the underlying local_vector_type vectors */ - local_mtx_type* get_local(); + local_vector_type* get_local(); /** * Access to the partition that defines these global vectors. @@ -326,7 +326,7 @@ class Vector private: std::shared_ptr> partition_; - local_mtx_type local_; + local_vector_type local_; }; From c4615ed785d7ffd03afb1dce73856ea4a97c3b26 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 31 Jan 2022 18:02:17 +0100 Subject: [PATCH 08/38] minor refactoring --- core/distributed/vector.cpp | 142 ++++++++++++++++++------------------ 1 file changed, 71 insertions(+), 71 deletions(-) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 379f3eb47fa..f4282c97360 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -67,77 +67,6 @@ void Vector::apply_impl( } -template -void Vector::fill( - const ValueType value) -{ - this->get_local()->fill(value); -} - - -template -void Vector::convert_to( - Vector, LocalIndexType, GlobalIndexType>* result) - const -{ - result->set_size(this->get_size()); - result->set_communicator(this->get_communicator()); - result->partition_ = this->partition_; - this->get_const_local()->convert_to(result->get_local()); -} - - -template -void Vector::move_to( - Vector, LocalIndexType, GlobalIndexType>* result) -{ - this->convert_to(result); -} - - -template -std::unique_ptr< - typename Vector::absolute_type> -Vector::compute_absolute() const -{ - auto exec = this->get_executor(); - - auto result = absolute_type::create(exec, this->get_communicator(), - this->get_partition(), this->get_size(), - this->get_const_local()->get_size()); - - exec->run(vector::make_outplace_absolute_dense(this->get_const_local(), - result->get_local())); - - return result; -} - - -template -void Vector::compute_absolute_inplace() -{ - this->get_local()->compute_absolute_inplace(); -} - - -template -const typename Vector::local_vector_type* -Vector::get_const_local() const -{ - return &local_; -} - - -template -typename Vector::local_vector_type* -Vector::get_local() -{ - return &local_; -} - - template Vector::Vector( std::shared_ptr exec, mpi::communicator comm, @@ -212,6 +141,77 @@ void Vector::read_distributed( } +template +void Vector::fill( + const ValueType value) +{ + this->get_local()->fill(value); +} + + +template +void Vector::convert_to( + Vector, LocalIndexType, GlobalIndexType>* result) + const +{ + result->set_size(this->get_size()); + result->set_communicator(this->get_communicator()); + result->partition_ = this->partition_; + this->get_const_local()->convert_to(result->get_local()); +} + + +template +void Vector::move_to( + Vector, LocalIndexType, GlobalIndexType>* result) +{ + this->convert_to(result); +} + + +template +std::unique_ptr< + typename Vector::absolute_type> +Vector::compute_absolute() const +{ + auto exec = this->get_executor(); + + auto result = absolute_type::create(exec, this->get_communicator(), + this->get_partition(), this->get_size(), + this->get_const_local()->get_size()); + + exec->run(vector::make_outplace_absolute_dense(this->get_const_local(), + result->get_local())); + + return result; +} + + +template +void Vector::compute_absolute_inplace() +{ + this->get_local()->compute_absolute_inplace(); +} + + +template +const typename Vector::local_vector_type* +Vector::get_const_local() const +{ + return &local_; +} + + +template +typename Vector::local_vector_type* +Vector::get_local() +{ + return &local_; +} + + template std::unique_ptr< typename Vector::complex_type> From 1238d5cdef331ee3fc1f21ab255dfe3ad3046686 Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Thu, 3 Feb 2022 14:55:18 +0000 Subject: [PATCH 09/38] Format files Co-authored-by: Marcel Koch --- core/test/mpi/distributed/vector.cpp | 15 +++++++++------ reference/test/distributed/vector_kernels.cpp | 15 +++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp index 5206c943bc7..1f3c349a7cc 100644 --- a/core/test/mpi/distributed/vector.cpp +++ b/core/test/mpi/distributed/vector.cpp @@ -49,12 +49,15 @@ namespace { template class Vector : public ::testing::Test { public: - using value_type = typename std::tuple_element< - 0, decltype(ValueLocalGlobalIndexType())>::type; - using local_index_type = typename std::tuple_element< - 1, decltype(ValueLocalGlobalIndexType())>::type; - using global_index_type = typename std::tuple_element< - 2, decltype(ValueLocalGlobalIndexType())>::type; + using value_type = + typename std::tuple_element<0, decltype( + ValueLocalGlobalIndexType())>::type; + using local_index_type = + typename std::tuple_element<1, decltype( + ValueLocalGlobalIndexType())>::type; + using global_index_type = + typename std::tuple_element<2, decltype( + ValueLocalGlobalIndexType())>::type; using part_type = gko::distributed::Partition; using md_type = gko::matrix_data; diff --git a/reference/test/distributed/vector_kernels.cpp b/reference/test/distributed/vector_kernels.cpp index c8a08a97ce8..be9124d45cc 100644 --- a/reference/test/distributed/vector_kernels.cpp +++ b/reference/test/distributed/vector_kernels.cpp @@ -55,12 +55,15 @@ using comm_index_type = gko::distributed::comm_index_type; template class Vector : public ::testing::Test { protected: - using value_type = typename std::tuple_element< - 0, decltype(ValueLocalGlobalIndexType())>::type; - using local_index_type = typename std::tuple_element< - 1, decltype(ValueLocalGlobalIndexType())>::type; - using global_index_type = typename std::tuple_element< - 2, decltype(ValueLocalGlobalIndexType())>::type; + using value_type = + typename std::tuple_element<0, decltype( + ValueLocalGlobalIndexType())>::type; + using local_index_type = + typename std::tuple_element<1, decltype( + ValueLocalGlobalIndexType())>::type; + using global_index_type = + typename std::tuple_element<2, decltype( + ValueLocalGlobalIndexType())>::type; using local_entry = gko::matrix_data_entry; using global_entry = gko::matrix_data_entry; From 07f1f679f1529348a4baf24cf46cccb5ea3e5bff Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 4 Feb 2022 11:34:14 +0100 Subject: [PATCH 10/38] use GPU aware MPI if specified the user has to set the cmake variable GINKGO_FORCE_GPU_AWARE_MPI to a true value, to enable this behavior. If not set, additional transfers to the host are used. Co-authored-by: Pratik Nayak --- CMakeLists.txt | 8 ++++ cmake/get_info.cmake | 2 +- core/distributed/vector.cpp | 63 ++++++++++++++++++++++++-------- include/ginkgo/config.hpp.in | 5 +++ include/ginkgo/core/base/mpi.hpp | 13 +++++++ 5 files changed, 75 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 31175e23c9e..68cda285ec9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,6 +98,8 @@ option(GINKGO_DPCPP_SINGLE_MODE "Do not compile double kernels for the DPC++ bac option(GINKGO_INSTALL_RPATH "Set the RPATH when installing its libraries." ON) option(GINKGO_INSTALL_RPATH_ORIGIN "Add $ORIGIN (Linux) or @loader_path (MacOS) to the installation RPATH." ON) option(GINKGO_INSTALL_RPATH_DEPENDENCIES "Add dependencies to the installation RPATH." OFF) +option(GINKGO_FORCE_GPU_AWARE_MPI "Assert that the MPI library is GPU aware. This forces Ginkgo to assume that GPU aware functionality is available (OFF (default) or ON), but may fail + catastrophically in case the MPI implementation is not GPU Aware, and GPU aware functionality has been forced" OFF) set(GINKGO_CIRCULAR_DEPS_FLAGS "-Wl,--no-undefined") @@ -189,8 +191,14 @@ else() message(STATUS "HWLOC is being forcibly switched off") endif() +set(GINKGO_HAVE_GPU_AWARE_MPI OFF) if(GINKGO_BUILD_MPI) find_package(MPI REQUIRED) + if(GINKGO_FORCE_GPU_AWARE_MPI) + set(GINKGO_HAVE_GPU_AWARE_MPI ON) + else() + set(GINKGO_HAVE_GPU_AWARE_MPI OFF) + endif() endif() # We keep using NVCC/HCC for consistency with previous releases even if AMD diff --git a/cmake/get_info.cmake b/cmake/get_info.cmake index 479b889aeaf..2cf8dd06c3f 100644 --- a/cmake/get_info.cmake +++ b/cmake/get_info.cmake @@ -130,7 +130,7 @@ foreach(log_type ${log_types}) "GINKGO_BUILD_OMP;GINKGO_BUILD_MPI;GINKGO_BUILD_REFERENCE;GINKGO_BUILD_CUDA;GINKGO_BUILD_HIP;GINKGO_BUILD_DPCPP") ginkgo_print_module_footer(${${log_type}} " Enabled features:") ginkgo_print_foreach_variable(${${log_type}} - "GINKGO_MIXED_PRECISION") + "GINKGO_MIXED_PRECISION;GINKGO_HAVE_GPU_AWARE_MPI") ginkgo_print_module_footer(${${log_type}} " Tests, benchmarks and examples:") ginkgo_print_foreach_variable(${${log_type}} "GINKGO_BUILD_TESTS;GINKGO_FAST_TESTS;GINKGO_BUILD_EXAMPLES;GINKGO_EXTLIB_EXAMPLE;GINKGO_BUILD_BENCHMARKS;GINKGO_BENCHMARK_ENABLE_TUNING") diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index f4282c97360..7639c0d53c0 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -317,17 +317,23 @@ void Vector::compute_dot( const LinOp* b, LinOp* result) const { auto exec = this->get_executor(); + const auto comm = this->get_communicator(); auto dense_res = make_temporary_clone(exec, as>(result)); this->get_const_local()->compute_dot(as(b)->get_const_local(), dense_res.get()); exec->synchronize(); - auto dense_res_host = - make_temporary_clone(exec->get_master(), dense_res.get()); - this->get_communicator().all_reduce(dense_res_host->get_values(), - static_cast(this->get_size()[1]), - MPI_SUM); - dense_res->copy_from(dense_res_host.get()); + auto use_host_buffer = + exec->get_master() != exec || !gko::mpi::is_gpu_aware(); + if (use_host_buffer) { + auto dense_res_host = + make_temporary_clone(exec->get_master(), dense_res.get()); + comm.all_reduce(dense_res_host->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + } else { + comm.all_reduce(dense_res->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + } } @@ -336,14 +342,23 @@ void Vector::compute_conj_dot( const LinOp* b, LinOp* result) const { auto exec = this->get_executor(); + const auto comm = this->get_communicator(); auto dense_res = make_temporary_clone(exec, as>(result)); this->get_const_local()->compute_conj_dot(as(b)->get_const_local(), dense_res.get()); exec->synchronize(); - this->get_communicator().all_reduce(dense_res->get_values(), - static_cast(this->get_size()[1]), - MPI_SUM); + auto use_host_buffer = + exec->get_master() != exec || !gko::mpi::is_gpu_aware(); + if (use_host_buffer) { + auto dense_res_host = + make_temporary_clone(exec->get_master(), dense_res.get()); + comm.all_reduce(dense_res_host->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + } else { + comm.all_reduce(dense_res->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + } } @@ -354,13 +369,22 @@ void Vector::compute_norm2( using NormVector = typename local_vector_type::absolute_type; GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); auto exec = this->get_executor(); + const auto comm = this->get_communicator(); auto dense_res = make_temporary_clone(exec, as(result)); exec->run(vector::make_compute_norm2_sqr(this->get_const_local(), dense_res.get())); exec->synchronize(); - this->get_communicator().all_reduce(dense_res->get_values(), - static_cast(this->get_size()[1]), - MPI_SUM); + auto use_host_buffer = + exec->get_master() != exec || !gko::mpi::is_gpu_aware(); + if (use_host_buffer) { + auto dense_res_host = + make_temporary_clone(exec->get_master(), dense_res.get()); + comm.all_reduce(dense_res_host->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + } else { + comm.all_reduce(dense_res->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + } exec->run(vector::make_compute_sqrt(dense_res.get())); } @@ -372,12 +396,21 @@ void Vector::compute_norm1( using NormVector = typename local_vector_type::absolute_type; GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); auto exec = this->get_executor(); + const auto comm = this->get_communicator(); auto dense_res = make_temporary_clone(exec, as(result)); this->get_const_local()->compute_norm1(dense_res.get()); exec->synchronize(); - this->get_communicator().all_reduce(dense_res->get_values(), - static_cast(this->get_size()[1]), - MPI_SUM); + auto use_host_buffer = + exec->get_master() != exec || !gko::mpi::is_gpu_aware(); + if (use_host_buffer) { + auto dense_res_host = + make_temporary_clone(exec->get_master(), dense_res.get()); + comm.all_reduce(dense_res_host->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + } else { + comm.all_reduce(dense_res->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + } } diff --git a/include/ginkgo/config.hpp.in b/include/ginkgo/config.hpp.in index 198f465d4d0..f56605ecd05 100644 --- a/include/ginkgo/config.hpp.in +++ b/include/ginkgo/config.hpp.in @@ -87,6 +87,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #cmakedefine01 GINKGO_BUILD_MPI // clang-format on +/* Is the MPI implementation GPU aware? */ +// clang-format off +#cmakedefine01 GINKGO_HAVE_GPU_AWARE_MPI +// clang-format on + /* Is HWLOC available ? */ // clang-format off diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 4d6df9544ab..10b8d826d63 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -58,6 +58,19 @@ namespace gko { namespace mpi { +/** + * Return if GPU aware functionality is available + */ +static constexpr bool is_gpu_aware() +{ +#if GINKGO_HAVE_GPU_AWARE_MPI + return true; +#else + return false; +#endif +} + + #define GKO_REGISTER_MPI_TYPE(input_type, mpi_type) \ template <> \ struct type_impl { \ From e87d9aa6b68796b8beb89f466a0059eb1b13ba87 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Tue, 8 Feb 2022 17:35:57 +0100 Subject: [PATCH 11/38] review updates - change build_local reference + omp implementation - add build_local omp kernel test - add vector constructor with explicit stride Co-authored-by: Pratik Nayak Co-authored-by: Tobias Ribizel --- common/unified/matrix/dense_kernels.cpp | 5 +- core/distributed/vector.cpp | 36 ++- core/distributed/vector_kernels.hpp | 4 +- cuda/CMakeLists.txt | 2 +- cuda/distributed/vector_kernels.cu | 3 +- dpcpp/distributed/vector_kernels.dp.cpp | 3 +- hip/distributed/vector_kernels.hip.cpp | 3 +- include/ginkgo/core/base/types.hpp | 2 +- include/ginkgo/core/distributed/base.hpp | 11 +- include/ginkgo/core/distributed/vector.hpp | 29 ++- omp/distributed/vector_kernels.cpp | 99 ++------ omp/test/CMakeLists.txt | 1 + omp/test/distributed/CMakeLists.txt | 1 + omp/test/distributed/vector_kernels.cpp | 223 ++++++++++++++++++ reference/distributed/vector_kernels.cpp | 27 +-- reference/test/distributed/vector_kernels.cpp | 76 +++--- 16 files changed, 349 insertions(+), 176 deletions(-) create mode 100644 omp/test/distributed/CMakeLists.txt create mode 100644 omp/test/distributed/vector_kernels.cpp diff --git a/common/unified/matrix/dense_kernels.cpp b/common/unified/matrix/dense_kernels.cpp index 40122219e31..ef105ba7993 100644 --- a/common/unified/matrix/dense_kernels.cpp +++ b/common/unified/matrix/dense_kernels.cpp @@ -388,9 +388,8 @@ void compute_norm2_sqr(std::shared_ptr exec, run_kernel_col_reduction( exec, [] GKO_KERNEL(auto i, auto j, auto x) { return squared_norm(x(i, j)); }, - [] GKO_KERNEL(auto a, auto b) { return a + b; }, - [] GKO_KERNEL(auto a) { return a; }, remove_complex{}, - result->get_values(), x->get_size(), x); + GKO_KERNEL_REDUCE_SUM(remove_complex), result->get_values(), + x->get_size(), x); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_SQR_KERNEL); diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 7639c0d53c0..3891a744ff6 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { namespace distributed { namespace vector { +namespace { GKO_REGISTER_OPERATION(compute_norm2_sqr, dense::compute_norm2_sqr); @@ -48,6 +49,7 @@ GKO_REGISTER_OPERATION(outplace_absolute_dense, dense::outplace_absolute_dense); GKO_REGISTER_OPERATION(build_local, distributed_vector::build_local); +} // namespace } // namespace vector @@ -66,6 +68,14 @@ void Vector::apply_impl( GKO_NOT_SUPPORTED(this); } +template +Vector::Vector( + std::shared_ptr exec, mpi::communicator comm, + std::shared_ptr> partition, + dim<2> global_size, dim<2> local_size) + : Vector(exec, comm, std::move(partition), global_size, local_size, + local_size[1]) +{} template Vector::Vector( @@ -76,13 +86,8 @@ Vector::Vector( Vector>{exec, global_size}, DistributedBase{comm}, - partition_{ - partition - ? std::move(partition) - : gko::share( - Partition::create(exec))}, - local_{exec, local_size, - stride != invalid_index() ? stride : local_size[1]} + partition_{std::move(partition)}, + local_{exec, local_size, stride} {} @@ -97,13 +102,16 @@ void read_local_impl( { auto rank = comm.rank(); - Array> local_data{exec}; - exec->run(vector::make_build_local(global_data, partition, rank, local_data, + auto num_rows = static_cast(partition->get_part_size(rank)); + if (local_mtx->get_size() != dim<2>{num_rows, num_cols}) { + auto stride = + local_mtx->get_stride() > 0 ? local_mtx->get_stride() : num_cols; + LocalMtxType::create(exec, dim<2>{num_rows, num_cols}, stride) + ->move_to(local_mtx); + } + local_mtx->fill(zero()); + exec->run(vector::make_build_local(global_data, partition, rank, local_mtx, ValueType{})); - - auto local_rows = static_cast(partition->get_part_size(rank)); - dim<2> local_size{local_rows, num_cols}; - local_mtx->read({local_size, local_data}); } @@ -112,6 +120,7 @@ void Vector::read_distributed( const matrix_data& data, std::shared_ptr> partition) { + GKO_ASSERT(partition->get_executor() == this->get_executor()); this->partition_ = std::move(partition); auto exec = this->get_executor(); @@ -130,6 +139,7 @@ void Vector::read_distributed( const device_matrix_data& data, std::shared_ptr> partition) { + GKO_ASSERT(partition->get_executor() == this->get_executor()); this->partition_ = std::move(partition); read_local_impl(this->get_executor(), this->get_communicator(), diff --git a/core/distributed/vector_kernels.hpp b/core/distributed/vector_kernels.hpp index a1021c7fcb3..078266dcfb5 100644 --- a/core/distributed/vector_kernels.hpp +++ b/core/distributed/vector_kernels.hpp @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include "core/base/kernel_declaration.hpp" @@ -55,8 +56,7 @@ namespace kernels { const Array>& input, \ const distributed::Partition* \ partition, \ - comm_index_type local_part, \ - Array>& local_data, \ + comm_index_type local_part, matrix::Dense* local_mtx, \ ValueType deduction_help) diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index b8406d25875..e962d7e9e45 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -76,7 +76,7 @@ target_sources(ginkgo_cuda base/version.cpp components/prefix_sum_kernels.cu distributed/partition_kernels.cu - distributed/partition_kernels.cu + distributed/vector_kernels.cu factorization/factorization_kernels.cu factorization/ic_kernels.cu factorization/ilu_kernels.cu diff --git a/cuda/distributed/vector_kernels.cu b/cuda/distributed/vector_kernels.cu index da09b9f7ad2..ca59db17671 100644 --- a/cuda/distributed/vector_kernels.cu +++ b/cuda/distributed/vector_kernels.cu @@ -47,8 +47,7 @@ void build_local( std::shared_ptr exec, const Array>& input, const distributed::Partition* partition, - comm_index_type local_part, - Array>& local_data, + comm_index_type local_part, matrix::Dense* local_mtx, ValueType deduction_help) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( diff --git a/dpcpp/distributed/vector_kernels.dp.cpp b/dpcpp/distributed/vector_kernels.dp.cpp index 6927677ad21..f332a832ab4 100644 --- a/dpcpp/distributed/vector_kernels.dp.cpp +++ b/dpcpp/distributed/vector_kernels.dp.cpp @@ -47,8 +47,7 @@ void build_local( std::shared_ptr exec, const Array>& input, const distributed::Partition* partition, - comm_index_type local_part, - Array>& local_data, + comm_index_type local_part, matrix::Dense* local_mtx, ValueType deduction_help) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( diff --git a/hip/distributed/vector_kernels.hip.cpp b/hip/distributed/vector_kernels.hip.cpp index 3d4e100c7c7..7fedfd0cce4 100644 --- a/hip/distributed/vector_kernels.hip.cpp +++ b/hip/distributed/vector_kernels.hip.cpp @@ -47,8 +47,7 @@ void build_local( std::shared_ptr exec, const Array>& input, const distributed::Partition* partition, - comm_index_type local_part, - Array>& local_data, + comm_index_type local_part, matrix::Dense* local_mtx, ValueType deduction_help) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index 3189a98830d..0207f7a2457 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -758,7 +758,7 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x, template inline constexpr GKO_ATTRIBUTES IndexType invalid_index() { - static_assert(std::is_integral::value, + static_assert(std::is_signed::value, "IndexType needs to be signed"); return static_cast(-1); } diff --git a/include/ginkgo/core/distributed/base.hpp b/include/ginkgo/core/distributed/base.hpp index d69e137bf78..cd3d10d8bd2 100644 --- a/include/ginkgo/core/distributed/base.hpp +++ b/include/ginkgo/core/distributed/base.hpp @@ -35,12 +35,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #if GINKGO_BUILD_MPI +#include + + namespace gko { namespace distributed { @@ -58,13 +60,10 @@ class DistributedBase { mpi::communicator get_communicator() const { return comm_; } - explicit DistributedBase( - mpi::communicator comm = mpi::communicator(MPI_COMM_NULL)) - : comm_{comm} - {} + explicit DistributedBase(mpi::communicator comm) : comm_{std::move(comm)} {} protected: - void set_communicator(mpi::communicator comm) { comm_ = comm; } + void set_communicator(mpi::communicator comm) { comm_ = std::move(comm); } private: mpi::communicator comm_; diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index 2107b374555..7f01d3bca2a 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -35,15 +35,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include + + +#if GINKGO_BUILD_MPI + + #include #include #include #include -#if GINKGO_BUILD_MPI - - namespace gko { namespace distributed { @@ -307,16 +309,29 @@ class Vector * @param partition Partition of global rows * @param global_size Global size of the vector * @param local_size Processor-local size of the vector - * @param stride Stride of the local vector. If not specified, it defaults - * to local_size[1] + * @param stride Stride of the local vector. + */ + Vector(std::shared_ptr exec, mpi::communicator comm, + std::shared_ptr> + partition, + dim<2> global_size, dim<2> local_size, size_type stride); + + /** + * Creates an empty distributed vector with a specified size + * @param exec Executor associated with vector + * @param comm Communicator associated with vector, the default is + * MPI_COMM_WORLD + * @param partition Partition of global rows + * @param global_size Global size of the vector + * @param local_size Processor-local size of the vector, uses local_size[1] + * as the stride */ explicit Vector( std::shared_ptr exec, mpi::communicator comm = mpi::communicator(MPI_COMM_WORLD), std::shared_ptr> partition = nullptr, - dim<2> global_size = {}, dim<2> local_size = {}, - size_type stride = invalid_index()); + dim<2> global_size = {}, dim<2> local_size = {}); void apply_impl(const LinOp*, LinOp*) const override; diff --git a/omp/distributed/vector_kernels.cpp b/omp/distributed/vector_kernels.cpp index 1a6fd51a95d..fbb1c1d351f 100644 --- a/omp/distributed/vector_kernels.cpp +++ b/omp/distributed/vector_kernels.cpp @@ -33,10 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/distributed/vector_kernels.hpp" -#include - - -#include "core/base/allocator.hpp" #include "core/components/prefix_sum_kernels.hpp" @@ -51,91 +47,42 @@ void build_local( std::shared_ptr exec, const Array>& input, const distributed::Partition* partition, - comm_index_type local_part, - Array>& local_data, + comm_index_type local_part, matrix::Dense* local_mtx, ValueType deduction_help) { - using range_index_type = GlobalIndexType; - using part_index_type = comm_index_type; auto input_data = input.get_const_data(); auto range_bounds = partition->get_range_bounds(); auto range_parts = partition->get_part_ids(); auto range_starting_indices = partition->get_range_starting_indices(); auto num_ranges = partition->get_num_ranges(); - // helpers for retrieving range info - struct range_info { - range_index_type index{}; - GlobalIndexType begin{}; - GlobalIndexType end{}; - LocalIndexType base_rank{}; - part_index_type part{}; - }; - auto find_range = [&](GlobalIndexType idx) { - auto it = std::upper_bound(range_bounds + 1, - range_bounds + num_ranges + 1, idx); - return std::distance(range_bounds + 1, it); - }; - auto update_range = [&](GlobalIndexType idx, range_info& info) { - if (idx < info.begin || idx >= info.end) { - info.index = find_range(idx); - info.begin = range_bounds[info.index]; - info.end = range_bounds[info.index + 1]; - info.base_rank = range_starting_indices[info.index]; - info.part = range_parts[info.index]; - // assert(info.index < num_ranges); + auto find_range = [&](GlobalIndexType idx, size_type hint) { + if (range_bounds[hint] <= idx && idx < range_bounds[hint + 1]) { + return hint; + } else { + auto it = std::upper_bound(range_bounds + 1, + range_bounds + num_ranges + 1, idx); + return static_cast(std::distance(range_bounds + 1, it)); } - // assert(idx >= info.begin && idx < info.end); }; auto map_to_local = [&](GlobalIndexType idx, - range_info info) -> LocalIndexType { - return static_cast(idx - info.begin) + info.base_rank; + size_type range_id) -> LocalIndexType { + return static_cast(idx - range_bounds[range_id]) + + range_starting_indices[range_id]; }; - range_info row_range{}; - auto num_threads = omp_get_max_threads(); - vector partial_counts(num_threads + 1, exec); - auto work_per_thread = - static_cast(ceildiv(input.get_num_elems(), num_threads)); -#pragma omp parallel - { - auto thread_id = omp_get_thread_num(); - auto begin = static_cast(work_per_thread * thread_id); - auto end = std::min(begin + work_per_thread, input.get_num_elems()); - size_type count{}; - for (auto i = begin; i < end; i++) { - auto entry = input_data[i]; - update_range(entry.row, row_range); - // skip non-local rows - if (row_range.part != local_part) { - continue; - } - count++; - } - partial_counts[thread_id] = count; - } - - components::prefix_sum(exec, partial_counts.data(), num_threads + 1); - - local_data.resize_and_reset(partial_counts.back()); -#pragma omp parallel - { - auto thread_id = omp_get_thread_num(); - auto begin = static_cast(work_per_thread * thread_id); - auto end = std::min(begin + work_per_thread, input.get_num_elems()); - auto idx = partial_counts[thread_id]; - for (auto i = begin; i < end; i++) { - auto entry = input_data[i]; - update_range(entry.row, row_range); - // skip non-local rows - if (row_range.part != local_part) { - continue; - } - local_data.get_data()[idx] = { - // map global row idx to local row idx - map_to_local(entry.row, row_range), - static_cast(entry.column), entry.value}; - idx++; + size_type range_id_hint = 0; +#pragma omp parallel for firstprivate(range_id_hint) + for (size_type i = 0; i < input.get_num_elems(); ++i) { + auto entry = input_data[i]; + auto range_id = find_range(entry.row, range_id_hint); + range_id_hint = range_id; + auto part_id = range_parts[range_id]; + // skip non-local rows + if (part_id == local_part) { + local_mtx->at(map_to_local(entry.row, range_id), + static_cast(entry.column)) = + entry.value; } } } diff --git a/omp/test/CMakeLists.txt b/omp/test/CMakeLists.txt index cf7723a11f1..2ddf2808922 100644 --- a/omp/test/CMakeLists.txt +++ b/omp/test/CMakeLists.txt @@ -2,6 +2,7 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(base) add_subdirectory(components) +add_subdirectory(distributed) add_subdirectory(factorization) add_subdirectory(matrix) add_subdirectory(preconditioner) diff --git a/omp/test/distributed/CMakeLists.txt b/omp/test/distributed/CMakeLists.txt new file mode 100644 index 00000000000..61e5d60cb39 --- /dev/null +++ b/omp/test/distributed/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(vector_kernels) diff --git a/omp/test/distributed/vector_kernels.cpp b/omp/test/distributed/vector_kernels.cpp new file mode 100644 index 00000000000..caec47ef4c4 --- /dev/null +++ b/omp/test/distributed/vector_kernels.cpp @@ -0,0 +1,223 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include + + +#include +#include + + +#include +#include + + +#include "core/distributed/vector_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + +using comm_index_type = gko::distributed::comm_index_type; + + +template +class Vector : public ::testing::Test { +protected: + using value_type = typename std::tuple_element< + 0, decltype(ValueLocalGlobalIndexType())>::type; + using local_index_type = typename std::tuple_element< + 1, decltype(ValueLocalGlobalIndexType())>::type; + using global_index_type = typename std::tuple_element< + 2, decltype(ValueLocalGlobalIndexType())>::type; + using global_entry = gko::matrix_data_entry; + using mtx = gko::matrix::Dense; + + Vector() + : ref(gko::ReferenceExecutor::create()), + exec(gko::OmpExecutor::create()), + engine(42) + {} + + void validate( + const gko::size_type num_cols, + const gko::distributed::Partition* + partition, + const gko::distributed::Partition* + d_partition, + gko::Array input) + { + gko::Array d_input{exec, input}; + for (comm_index_type part = 0; part < partition->get_num_parts(); + ++part) { + auto num_rows = + static_cast(partition->get_part_size(part)); + auto output = mtx::create(ref, gko::dim<2>{num_rows, num_cols}); + output->fill(gko::zero()); + auto d_output = gko::clone(exec, output); + + gko::kernels::reference::distributed_vector::build_local( + ref, input, partition, part, output.get(), value_type{}); + gko::kernels::omp::distributed_vector::build_local( + exec, d_input, d_partition, part, d_output.get(), value_type{}); + + GKO_ASSERT_MTX_NEAR(output, d_output, 0); + } + } + + std::shared_ptr ref; + std::shared_ptr exec; + std::default_random_engine engine; +}; +template + +gko::Array> +generate_random_matrix_data_array(gko::size_type num_rows, + gko::size_type num_cols, + NonzeroDistribution&& nonzero_dist, + ValueDistribution&& value_dist, + Engine&& engine, + std::shared_ptr exec) +{ + auto md = gko::test::generate_random_matrix_data( + num_rows, num_cols, std::forward(nonzero_dist), + std::forward(value_dist), + std::forward(engine)); + md.ensure_row_major_order(); + return gko::Array>( + exec, md.nonzeros.begin(), md.nonzeros.end()); +} + + +TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); + + +TYPED_TEST(Vector, BuildsLocalEmptyIsEquivalentToRef) +{ + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + using global_entry = typename TestFixture::global_entry; + gko::distributed::comm_index_type num_parts = 10; + auto mapping = + gko::test::generate_random_array( + 100, + std::uniform_int_distribution( + 0, num_parts - 1), + this->engine, this->ref); + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + auto d_partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + mapping, + num_parts); + + this->validate(0, partition.get(), d_partition.get(), + gko::Array{this->ref}); +} + + +TYPED_TEST(Vector, BuildsLocalSmallIsEquivalentToRef) +{ + using value_type = typename TestFixture::value_type; + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + gko::distributed::comm_index_type num_parts = 3; + gko::size_type num_rows = 10; + gko::size_type num_cols = 2; + auto mapping = + gko::test::generate_random_array( + num_rows, + std::uniform_int_distribution( + 0, num_parts - 1), + this->engine, this->ref); + auto input = + generate_random_matrix_data_array( + num_rows, num_cols, + std::uniform_int_distribution(0, + static_cast(num_cols - 1)), + std::uniform_real_distribution>(0, + 1), + this->engine, this->ref); + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + auto d_partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + mapping, + num_parts); + + this->validate(num_cols, partition.get(), d_partition.get(), input); +} + + +TYPED_TEST(Vector, BuildsLocalIsEquivalentToRef) +{ + using value_type = typename TestFixture::value_type; + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + gko::distributed::comm_index_type num_parts = 13; + gko::size_type num_rows = 40; + gko::size_type num_cols = 67; + auto mapping = + gko::test::generate_random_array( + num_rows, + std::uniform_int_distribution( + 0, num_parts - 1), + this->engine, this->ref); + auto input = + generate_random_matrix_data_array( + num_rows, num_cols, + std::uniform_int_distribution(0, + static_cast(num_cols - 1)), + std::uniform_real_distribution>(0, + 1), + this->engine, this->ref); + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + auto d_partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + mapping, + num_parts); + + this->validate(num_cols, partition.get(), d_partition.get(), input); +} + + +} // namespace diff --git a/reference/distributed/vector_kernels.cpp b/reference/distributed/vector_kernels.cpp index 2605e4b98f9..c453ca5716f 100644 --- a/reference/distributed/vector_kernels.cpp +++ b/reference/distributed/vector_kernels.cpp @@ -33,9 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/distributed/vector_kernels.hpp" -#include "core/base/allocator.hpp" #include "core/components/prefix_sum_kernels.hpp" + namespace gko { namespace kernels { namespace reference { @@ -47,8 +47,7 @@ void build_local( std::shared_ptr exec, const Array>& input, const distributed::Partition* partition, - comm_index_type local_part, - Array>& local_data, + comm_index_type local_part, matrix::Dense* local_mtx, ValueType deduction_help) { auto input_data = input.get_const_data(); @@ -72,7 +71,6 @@ void build_local( range_starting_indices[range_id]; }; - size_type count{}; size_type range_id_hint = 0; for (size_type i = 0; i < input.get_num_elems(); ++i) { auto entry = input_data[i]; @@ -81,24 +79,9 @@ void build_local( auto part_id = range_parts[range_id]; // skip non-local rows if (part_id == local_part) { - count++; - } - } - - local_data.resize_and_reset(count); - size_type idx{}; - for (size_type i = 0; i < input.get_num_elems(); ++i) { - auto entry = input_data[i]; - auto range_id = find_range(entry.row, range_id_hint); - range_id_hint = range_id; - auto part_id = range_parts[range_id]; - // skip non-local rows - if (part_id == local_part) { - local_data.get_data()[idx] = { - // map global row idx to local row idx - map_to_local(entry.row, range_id), - static_cast(entry.column), entry.value}; - idx++; + local_mtx->at(map_to_local(entry.row, range_id), + static_cast(entry.column)) = + entry.value; } } } diff --git a/reference/test/distributed/vector_kernels.cpp b/reference/test/distributed/vector_kernels.cpp index be9124d45cc..10993fdb29c 100644 --- a/reference/test/distributed/vector_kernels.cpp +++ b/reference/test/distributed/vector_kernels.cpp @@ -55,51 +55,48 @@ using comm_index_type = gko::distributed::comm_index_type; template class Vector : public ::testing::Test { protected: - using value_type = - typename std::tuple_element<0, decltype( - ValueLocalGlobalIndexType())>::type; - using local_index_type = - typename std::tuple_element<1, decltype( - ValueLocalGlobalIndexType())>::type; - using global_index_type = - typename std::tuple_element<2, decltype( - ValueLocalGlobalIndexType())>::type; - using local_entry = gko::matrix_data_entry; + using value_type = typename std::tuple_element< + 0, decltype(ValueLocalGlobalIndexType())>::type; + using local_index_type = typename std::tuple_element< + 1, decltype(ValueLocalGlobalIndexType())>::type; + using global_index_type = typename std::tuple_element< + 2, decltype(ValueLocalGlobalIndexType())>::type; using global_entry = gko::matrix_data_entry; - - Vector() - : ref(gko::ReferenceExecutor::create()), - mapping{ref}, - input{ref}, - output{ref} - {} - - void validate(const gko::distributed::Partition< - local_index_type, global_index_type>* partition, - std::initializer_list input_entries, - std::initializer_list> - output_entries) + using mtx = gko::matrix::Dense; + + Vector() : ref(gko::ReferenceExecutor::create()), mapping{ref} {} + + void validate( + const gko::size_type num_cols, + const gko::distributed::Partition* + partition, + std::initializer_list input_entries, + std::initializer_list< + std::initializer_list>> + output_entries) { - std::vector> ref_outputs; - - input = gko::Array{ref, input_entries}; + std::vector>> + ref_outputs; + auto input = gko::Array{ref, input_entries}; for (auto entry : output_entries) { - ref_outputs.push_back(gko::Array{ref, entry}); + ref_outputs.emplace_back(entry); } - for (comm_index_type part = 0; part < partition->get_num_parts(); ++part) { + auto num_rows = + static_cast(partition->get_part_size(part)); + auto output = mtx::create(ref, gko::dim<2>{num_rows, num_cols}); + output->fill(gko::zero()); + gko::kernels::reference::distributed_vector::build_local( - ref, input, partition, part, output, value_type{}); + ref, input, partition, part, output.get(), value_type{}); - GKO_ASSERT_ARRAY_EQ(output, ref_outputs[part]); + GKO_ASSERT_MTX_NEAR(output, ref_outputs[part], 0); } } std::shared_ptr ref; gko::Array mapping; - gko::Array input; - gko::Array output; }; TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); @@ -116,7 +113,8 @@ TYPED_TEST(Vector, BuildsLocalEmpty) this->mapping, num_parts); - this->validate(partition.get(), {}, {{}, {}, {}}); + this->validate(0, partition.get(), {}, + {{{}, {}}, {{}, {}, {}}, {{}, {}, {}}}); } @@ -131,9 +129,9 @@ TYPED_TEST(Vector, BuildsLocalSmall) this->mapping, num_parts); - this->validate(partition.get(), + this->validate(2, partition.get(), {{0, 0, 1}, {0, 1, 2}, {1, 0, 3}, {1, 1, 4}}, - {{{0, 0, 3}, {0, 1, 4}}, {{0, 0, 1}, {0, 1, 2}}}); + {{{3, 4}}, {{1, 2}}}); } @@ -148,7 +146,7 @@ TYPED_TEST(Vector, BuildsLocal) this->mapping, num_parts); - this->validate(partition.get(), + this->validate(8, partition.get(), {{0, 0, 1}, {0, 1, 2}, {1, 2, 3}, @@ -157,9 +155,9 @@ TYPED_TEST(Vector, BuildsLocal) {3, 5, 6}, {4, 6, 7}, {5, 7, 8}}, - {{{0, 4, 5}, {1, 5, 6}}, - {{0, 0, 1}, {0, 1, 2}, {1, 7, 8}}, - {{0, 2, 3}, {0, 3, 4}, {1, 6, 7}}}); + {{{0, 0, 0, 0, 5, 0, 0, 0}, {0, 0, 0, 0, 0, 6, 0, 0}}, + {{1, 2, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 8}}, + {{0, 0, 3, 4, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 7, 0}}}); } From 278daf2aa76e2ba0a5e4fa975bc1096d2f2e3386 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 10 Feb 2022 10:50:18 +0100 Subject: [PATCH 12/38] adds DenseCache to reuse a dense vector without repeated allocations Co-authored-by: Tobias Ribizel --- core/CMakeLists.txt | 1 + core/base/cache.cpp | 68 ++++++ core/distributed/vector.cpp | 28 +-- core/test/base/CMakeLists.txt | 1 + core/test/base/cache.cpp | 227 +++++++++++++++++++++ include/ginkgo/core/base/cache.hpp | 116 +++++++++++ include/ginkgo/core/distributed/vector.hpp | 4 + include/ginkgo/ginkgo.hpp | 1 + 8 files changed, 434 insertions(+), 12 deletions(-) create mode 100644 core/base/cache.cpp create mode 100644 core/test/base/cache.cpp create mode 100644 include/ginkgo/core/base/cache.hpp diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index d2e27b340df..b7760ca47fe 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -4,6 +4,7 @@ add_library(ginkgo "") target_sources(ginkgo PRIVATE base/array.cpp + base/cache.cpp base/combination.cpp base/composition.cpp base/device_matrix_data.cpp diff --git a/core/base/cache.cpp b/core/base/cache.cpp new file mode 100644 index 00000000000..5a01317da99 --- /dev/null +++ b/core/base/cache.cpp @@ -0,0 +1,68 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include + + +namespace gko { +namespace detail { + + +template +void DenseCache::init(std::shared_ptr exec, + dim<2> size) +{ + if (!vec || vec->get_size() != size || vec->get_executor() != exec) { + vec = matrix::Dense::create(exec, size); + } +} + + +template +void DenseCache::init_from( + const matrix::Dense* template_vec) +{ + if (!vec || vec->get_size() != template_vec->get_size() || + vec->get_executor() != template_vec->get_executor()) { + vec = matrix::Dense::create_with_config_of(template_vec); + } + vec->copy_from(template_vec); +} + + +#define GKO_DECLARE_DENSE_CACHE(_type) class DenseCache<_type> +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CACHE); + + +} // namespace detail +} // namespace gko diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 3891a744ff6..3c5eba9a75e 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -336,10 +336,11 @@ void Vector::compute_dot( auto use_host_buffer = exec->get_master() != exec || !gko::mpi::is_gpu_aware(); if (use_host_buffer) { - auto dense_res_host = - make_temporary_clone(exec->get_master(), dense_res.get()); - comm.all_reduce(dense_res_host->get_values(), + host_reduction_buffer_.init(exec->get_master(), dense_res->get_size()); + host_reduction_buffer_->copy_from(dense_res.get()); + comm.all_reduce(host_reduction_buffer_->get_values(), static_cast(this->get_size()[1]), MPI_SUM); + dense_res->copy_from(host_reduction_buffer_.get()); } else { comm.all_reduce(dense_res->get_values(), static_cast(this->get_size()[1]), MPI_SUM); @@ -361,10 +362,11 @@ void Vector::compute_conj_dot( auto use_host_buffer = exec->get_master() != exec || !gko::mpi::is_gpu_aware(); if (use_host_buffer) { - auto dense_res_host = - make_temporary_clone(exec->get_master(), dense_res.get()); - comm.all_reduce(dense_res_host->get_values(), + host_reduction_buffer_.init(exec->get_master(), dense_res->get_size()); + host_reduction_buffer_->copy_from(dense_res.get()); + comm.all_reduce(host_reduction_buffer_->get_values(), static_cast(this->get_size()[1]), MPI_SUM); + dense_res->copy_from(host_reduction_buffer_.get()); } else { comm.all_reduce(dense_res->get_values(), static_cast(this->get_size()[1]), MPI_SUM); @@ -387,10 +389,11 @@ void Vector::compute_norm2( auto use_host_buffer = exec->get_master() != exec || !gko::mpi::is_gpu_aware(); if (use_host_buffer) { - auto dense_res_host = - make_temporary_clone(exec->get_master(), dense_res.get()); - comm.all_reduce(dense_res_host->get_values(), + host_norm_buffer_.init(exec->get_master(), dense_res->get_size()); + host_norm_buffer_->copy_from(dense_res.get()); + comm.all_reduce(host_norm_buffer_->get_values(), static_cast(this->get_size()[1]), MPI_SUM); + dense_res->copy_from(host_norm_buffer_.get()); } else { comm.all_reduce(dense_res->get_values(), static_cast(this->get_size()[1]), MPI_SUM); @@ -413,10 +416,11 @@ void Vector::compute_norm1( auto use_host_buffer = exec->get_master() != exec || !gko::mpi::is_gpu_aware(); if (use_host_buffer) { - auto dense_res_host = - make_temporary_clone(exec->get_master(), dense_res.get()); - comm.all_reduce(dense_res_host->get_values(), + host_norm_buffer_.init(exec->get_master(), dense_res->get_size()); + host_norm_buffer_->copy_from(dense_res.get()); + comm.all_reduce(host_norm_buffer_->get_values(), static_cast(this->get_size()[1]), MPI_SUM); + dense_res->copy_from(host_norm_buffer_.get()); } else { comm.all_reduce(dense_res->get_values(), static_cast(this->get_size()[1]), MPI_SUM); diff --git a/core/test/base/CMakeLists.txt b/core/test/base/CMakeLists.txt index 6f8c7291165..56c983f1481 100644 --- a/core/test/base/CMakeLists.txt +++ b/core/test/base/CMakeLists.txt @@ -1,6 +1,7 @@ ginkgo_create_test(abstract_factory) ginkgo_create_test(allocator) ginkgo_create_test(array) +ginkgo_create_test(cache) ginkgo_create_test(combination) ginkgo_create_test(composition) ginkgo_create_test(dim) diff --git a/core/test/base/cache.cpp b/core/test/base/cache.cpp new file mode 100644 index 00000000000..6c099addf7d --- /dev/null +++ b/core/test/base/cache.cpp @@ -0,0 +1,227 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class DenseCache : public ::testing::Test { +protected: + using value_type = ValueType; + + DenseCache() {} + + void SetUp() { ref = gko::ReferenceExecutor::create(); } + + void TearDown() {} + + void gen_cache(gko::dim<2> size) { cache.init(ref, size); } + + std::shared_ptr ref; + gko::detail::DenseCache cache; +}; + + +TYPED_TEST_SUITE(DenseCache, gko::test::ValueTypes, TypenameNameGenerator); + + +TYPED_TEST(DenseCache, CanDefaultConstruct) +{ + using value_type = typename TestFixture::value_type; + gko::detail::DenseCache cache; + + ASSERT_EQ(cache.get(), nullptr); +} + + +TYPED_TEST(DenseCache, CanInitWithSize) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{4, 7}; + + this->cache.init(this->ref, size); + + ASSERT_NE(this->cache.get(), nullptr); + GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), size); + ASSERT_EQ(this->cache->get_executor(), this->ref); +} + + +TYPED_TEST(DenseCache, SecondInitWithSameSizeIsNoOp) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{4, 7}; + this->cache.init(this->ref, size); + auto first_ptr = this->cache.get(); + + this->cache.init(this->ref, size); + + ASSERT_NE(this->cache.get(), nullptr); + ASSERT_EQ(first_ptr, this->cache.get()); +} + + +TYPED_TEST(DenseCache, SecondInitWithDifferentSizeInitializes) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{4, 7}; + gko::dim<2> second_size{7, 4}; + this->cache.init(this->ref, size); + auto first_ptr = this->cache.get(); + + this->cache.init(this->ref, second_size); + + ASSERT_NE(this->cache.get(), nullptr); + ASSERT_NE(first_ptr, this->cache.get()); +} + + +TYPED_TEST(DenseCache, CanInitFromDense) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{5, 2}; + auto dense = gko::matrix::Dense::create(this->ref, size); + + this->cache.init_from(dense.get()); + + ASSERT_NE(this->cache.get(), nullptr); + GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), size); + ASSERT_EQ(this->cache->get_executor(), dense->get_executor()); +} + + +TYPED_TEST(DenseCache, SecondInitFromSameDenseIsNoOp) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{4, 7}; + auto dense = gko::matrix::Dense::create(this->ref, size); + this->cache.init_from(dense.get()); + auto first_ptr = this->cache.get(); + + this->cache.init_from(dense.get()); + + ASSERT_NE(this->cache.get(), nullptr); + ASSERT_EQ(first_ptr, this->cache.get()); +} + + +TYPED_TEST(DenseCache, SecondInitFromDifferentDenseWithSameSizeIsNoOp) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{4, 7}; + auto first_dense = gko::matrix::Dense::create(this->ref, size); + auto second_dense = gko::matrix::Dense::create(this->ref, size); + this->cache.init_from(first_dense.get()); + auto first_ptr = this->cache.get(); + + this->cache.init_from(second_dense.get()); + + ASSERT_NE(this->cache.get(), nullptr); + ASSERT_EQ(first_ptr, this->cache.get()); +} + + +TYPED_TEST(DenseCache, SecondInitFromDifferentDenseWithDifferentSizeInitializes) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{4, 7}; + gko::dim<2> second_size{7, 4}; + auto first_dense = gko::matrix::Dense::create(this->ref, size); + auto second_dense = + gko::matrix::Dense::create(this->ref, second_size); + this->cache.init_from(first_dense.get()); + auto first_ptr = this->cache.get(); + + this->cache.init_from(second_dense.get()); + + ASSERT_NE(this->cache.get(), nullptr); + ASSERT_NE(first_ptr, this->cache.get()); +} + + +TYPED_TEST(DenseCache, VectorIsNotCopied) +{ + using value_type = typename TestFixture::value_type; + this->gen_cache({1, 1}); + gko::detail::DenseCache cache(this->cache); + + ASSERT_EQ(cache.get(), nullptr); + GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), gko::dim<2>(1, 1)); +} + + +TYPED_TEST(DenseCache, VectorIsNotMoved) +{ + using value_type = typename TestFixture::value_type; + this->gen_cache({1, 1}); + gko::detail::DenseCache cache(std::move(this->cache)); + + ASSERT_EQ(cache.get(), nullptr); + GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), gko::dim<2>(1, 1)); +} + + +TYPED_TEST(DenseCache, VectorIsNotCopyAssigned) +{ + using value_type = typename TestFixture::value_type; + this->gen_cache({1, 1}); + gko::detail::DenseCache cache = this->cache; + + ASSERT_EQ(cache.get(), nullptr); + GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), gko::dim<2>(1, 1)); +} + + +TYPED_TEST(DenseCache, VectorIsNotMoveAssigned) +{ + using value_type = typename TestFixture::value_type; + this->gen_cache({1, 1}); + gko::detail::DenseCache cache = std::move(this->cache); + + ASSERT_EQ(cache.get(), nullptr); + GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), gko::dim<2>(1, 1)); +} + + +} // namespace diff --git a/include/ginkgo/core/base/cache.hpp b/include/ginkgo/core/base/cache.hpp new file mode 100644 index 00000000000..cfac3f83d0f --- /dev/null +++ b/include/ginkgo/core/base/cache.hpp @@ -0,0 +1,116 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#ifndef GKO_PUBLIC_CORE_BASE_CACHE_HPP_ +#define GKO_PUBLIC_CORE_BASE_CACHE_HPP_ + + +#include + + +#include + + +namespace gko { +namespace matrix { + + +template +class Dense; + + +} + + +namespace detail { + + +/** + * Manages a Dense vector that is buffered and reused internally to avoid + * repeated allocations. Copying an instance will only yield an empty object + * since copying the cached vector would not make sense. + * + * @internal The struct is present to wrap cache-like buffer storage that will + * not be copied when the outer object gets copied. + */ +template +struct DenseCache { + DenseCache() = default; + ~DenseCache() = default; + DenseCache(const DenseCache&) {} + DenseCache(DenseCache&&) {} + DenseCache& operator=(const DenseCache&) { return *this; } + DenseCache& operator=(DenseCache&&) { return *this; } + std::unique_ptr> vec{}; + + + /** + * Initializes the buffered vector with the same configuration as the + * template vector, if + * - the current vector is null, + * - the sizes of the buffered and template vector differ, + * - the executor of the buffered and template vector differ. + * + * @note This does not copy any data from the template vector. + * + * @param template_vec Defines the configuration (executor, size, stride) + * of the buffered vector. + */ + void init_from(const matrix::Dense* template_vec); + + /** + * Initializes the buffered vector, if + * - the current vector is null, + * - the sizes differ, + * - the executor differs. + * + * @param exec Executor of the buffered vector. + * @param size Size of the buffered vector. + */ + void init(std::shared_ptr exec, dim<2> size); + + matrix::Dense& operator*() { return *vec; } + + matrix::Dense* operator->() { return vec.get(); } + + matrix::Dense* get() { return vec.get(); } + + const matrix::Dense* get() const { return vec.get(); } +}; + + +} // namespace detail +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_BASE_CACHE_HPP_ diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index 7f01d3bca2a..4c9f1f5987a 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if GINKGO_BUILD_MPI +#include #include #include #include @@ -342,6 +343,9 @@ class Vector std::shared_ptr> partition_; local_vector_type local_; + mutable ::gko::detail::DenseCache host_reduction_buffer_; + mutable ::gko::detail::DenseCache> + host_norm_buffer_; }; diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 4d009402039..671cd033ece 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include From c6a92ae6cfa371b8fa5ad42ba8c1d43096fb15ca Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 10 Feb 2022 15:15:03 +0100 Subject: [PATCH 13/38] fixes switch to soa of device matrix data from rebase --- core/distributed/vector.cpp | 64 +++++++------------ core/distributed/vector_kernels.hpp | 5 +- core/test/mpi/distributed/vector.cpp | 41 +++++------- cuda/distributed/vector_kernels.cu | 6 +- dpcpp/distributed/vector_kernels.dp.cpp | 6 +- hip/distributed/vector_kernels.hip.cpp | 6 +- include/ginkgo/core/distributed/vector.hpp | 3 + omp/distributed/vector_kernels.cpp | 17 +++-- omp/test/distributed/vector_kernels.cpp | 39 ++++++----- reference/distributed/vector_kernels.cpp | 17 +++-- reference/test/distributed/vector_kernels.cpp | 31 ++++----- 11 files changed, 100 insertions(+), 135 deletions(-) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 3c5eba9a75e..a43c5b75ea6 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -91,46 +91,15 @@ Vector::Vector( {} -template -void read_local_impl( - std::shared_ptr exec, mpi::communicator comm, - const Partition* partition, - const size_type num_cols, - const Array>& global_data, - LocalMtxType* local_mtx) -{ - auto rank = comm.rank(); - - auto num_rows = static_cast(partition->get_part_size(rank)); - if (local_mtx->get_size() != dim<2>{num_rows, num_cols}) { - auto stride = - local_mtx->get_stride() > 0 ? local_mtx->get_stride() : num_cols; - LocalMtxType::create(exec, dim<2>{num_rows, num_cols}, stride) - ->move_to(local_mtx); - } - local_mtx->fill(zero()); - exec->run(vector::make_build_local(global_data, partition, rank, local_mtx, - ValueType{})); -} - - template void Vector::read_distributed( const matrix_data& data, std::shared_ptr> partition) { - GKO_ASSERT(partition->get_executor() == this->get_executor()); - this->partition_ = std::move(partition); - - auto exec = this->get_executor(); - Array> global_data{ - exec, data.nonzeros.begin(), data.nonzeros.end()}; - read_local_impl(exec, this->get_communicator(), this->get_partition().get(), - data.size[1], global_data, this->get_local()); - - auto global_rows = static_cast(this->partition_->get_size()); - this->set_size({global_rows, data.size[1]}); + this->read_distributed( + device_matrix_data::create_from_host( + this->get_executor(), data), + std::move(partition)); } @@ -139,15 +108,28 @@ void Vector::read_distributed( const device_matrix_data& data, std::shared_ptr> partition) { - GKO_ASSERT(partition->get_executor() == this->get_executor()); - this->partition_ = std::move(partition); + auto exec = this->get_executor(); - read_local_impl(this->get_executor(), this->get_communicator(), - this->get_partition().get(), data.size[1], data.nonzeros, - this->get_local()); + GKO_ASSERT(partition->get_executor() == exec); + this->partition_ = std::move(partition); auto global_rows = static_cast(this->partition_->get_size()); - this->set_size({global_rows, data.size[1]}); + auto global_cols = data.get_size()[1]; + this->set_size({global_rows, global_cols}); + + auto rank = this->get_communicator().rank(); + auto local_rows = + static_cast(this->get_partition()->get_part_size(rank)); + if (this->get_local()->get_size() != dim<2>{local_rows, global_cols}) { + auto stride = this->get_local()->get_stride() > 0 + ? this->get_local()->get_stride() + : global_cols; + local_vector_type::create(exec, dim<2>{local_rows, global_cols}, stride) + ->move_to(this->get_local()); + } + this->get_local()->fill(zero()); + exec->run(vector::make_build_local(data, this->get_partition().get(), rank, + this->get_local())); } diff --git a/core/distributed/vector_kernels.hpp b/core/distributed/vector_kernels.hpp index 078266dcfb5..8965265259d 100644 --- a/core/distributed/vector_kernels.hpp +++ b/core/distributed/vector_kernels.hpp @@ -53,11 +53,10 @@ namespace kernels { GlobalIndexType) \ void build_local( \ std::shared_ptr exec, \ - const Array>& input, \ + const device_matrix_data& input, \ const distributed::Partition* \ partition, \ - comm_index_type local_part, matrix::Dense* local_mtx, \ - ValueType deduction_help) + comm_index_type local_part, matrix::Dense* local_mtx) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp index 1f3c349a7cc..ae4c3ed7249 100644 --- a/core/test/mpi/distributed/vector.cpp +++ b/core/test/mpi/distributed/vector.cpp @@ -49,15 +49,12 @@ namespace { template class Vector : public ::testing::Test { public: - using value_type = - typename std::tuple_element<0, decltype( - ValueLocalGlobalIndexType())>::type; - using local_index_type = - typename std::tuple_element<1, decltype( - ValueLocalGlobalIndexType())>::type; - using global_index_type = - typename std::tuple_element<2, decltype( - ValueLocalGlobalIndexType())>::type; + using value_type = typename std::tuple_element< + 0, decltype(ValueLocalGlobalIndexType())>::type; + using local_index_type = typename std::tuple_element< + 1, decltype(ValueLocalGlobalIndexType())>::type; + using global_index_type = typename std::tuple_element< + 2, decltype(ValueLocalGlobalIndexType())>::type; using part_type = gko::distributed::Partition; using md_type = gko::matrix_data; @@ -144,28 +141,20 @@ TYPED_TEST(Vector, CanReadGlobalMatrixDataSomeEmpty) TYPED_TEST(Vector, CanReadGlobalDeviceMatrixData) { + using it = typename TestFixture::global_index_type; using d_md_type = typename TestFixture::d_md_type; using part_type = typename TestFixture::part_type; - using value_type = typename TestFixture::value_type; - using nz_type = typename TestFixture::nz_type; - d_md_type md{gko::dim<2>{6, 2}, - gko::Array{this->ref, I{{0, 0, 0}, - {0, 1, 1}, - {1, 0, 2}, - {1, 1, 3}, - {2, 0, 4}, - {2, 1, 5}, - {3, 0, 6}, - {3, 1, 7}, - {4, 0, 8}, - {4, 1, 9}, - {5, 0, 10}, - {5, 1, 11}}}}; + using vt = typename TestFixture::value_type; + d_md_type md{ + this->ref, gko::dim<2>{6, 2}, + gko::Array{this->ref, I{0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5}}, + gko::Array{this->ref, I{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}}, + gko::Array{this->ref, I{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}}}; auto part = gko::share( part_type::build_from_contiguous(this->ref, {this->ref, {0, 2, 4, 6}})); auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); auto rank = this->comm.rank(); - I> ref_data[3] = { + I> ref_data[3] = { {{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}, {{8, 9}, {10, 11}}, @@ -176,7 +165,7 @@ TYPED_TEST(Vector, CanReadGlobalDeviceMatrixData) GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), gko::dim<2>(2, 2)); - GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], r::value); + GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], r::value); } TYPED_TEST(Vector, CanReadGlobalMatrixDataScattered) diff --git a/cuda/distributed/vector_kernels.cu b/cuda/distributed/vector_kernels.cu index ca59db17671..46d834ee0ca 100644 --- a/cuda/distributed/vector_kernels.cu +++ b/cuda/distributed/vector_kernels.cu @@ -45,10 +45,10 @@ namespace distributed_vector { template void build_local( std::shared_ptr exec, - const Array>& input, + const device_matrix_data& input, const distributed::Partition* partition, - comm_index_type local_part, matrix::Dense* local_mtx, - ValueType deduction_help) GKO_NOT_IMPLEMENTED; + comm_index_type local_part, + matrix::Dense* local_mtx) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); diff --git a/dpcpp/distributed/vector_kernels.dp.cpp b/dpcpp/distributed/vector_kernels.dp.cpp index f332a832ab4..a51c9e22669 100644 --- a/dpcpp/distributed/vector_kernels.dp.cpp +++ b/dpcpp/distributed/vector_kernels.dp.cpp @@ -45,10 +45,10 @@ namespace distributed_vector { template void build_local( std::shared_ptr exec, - const Array>& input, + const device_matrix_data& input, const distributed::Partition* partition, - comm_index_type local_part, matrix::Dense* local_mtx, - ValueType deduction_help) GKO_NOT_IMPLEMENTED; + comm_index_type local_part, + matrix::Dense* local_mtx) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); diff --git a/hip/distributed/vector_kernels.hip.cpp b/hip/distributed/vector_kernels.hip.cpp index 7fedfd0cce4..1133317e4e4 100644 --- a/hip/distributed/vector_kernels.hip.cpp +++ b/hip/distributed/vector_kernels.hip.cpp @@ -45,10 +45,10 @@ namespace distributed_vector { template void build_local( std::shared_ptr exec, - const Array>& input, + const device_matrix_data& input, const distributed::Partition* partition, - comm_index_type local_part, matrix::Dense* local_mtx, - ValueType deduction_help) GKO_NOT_IMPLEMENTED; + comm_index_type local_part, + matrix::Dense* local_mtx) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index 4c9f1f5987a..c7858b4c94b 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -68,6 +68,9 @@ namespace distributed { * vector with specified global and local sizes and fill the local vectors using * the accessor get_local. * + * @note Operations between two vectors (axpy, dot product, etc.) are only valid + * if both vectors use the same partition. + * * @tparam ValueType The precision of vector elements. * @tparam LocalIndexType The index type for local indices used by the * partition. diff --git a/omp/distributed/vector_kernels.cpp b/omp/distributed/vector_kernels.cpp index fbb1c1d351f..aa716e4f778 100644 --- a/omp/distributed/vector_kernels.cpp +++ b/omp/distributed/vector_kernels.cpp @@ -45,12 +45,13 @@ namespace distributed_vector { template void build_local( std::shared_ptr exec, - const Array>& input, + const device_matrix_data& input, const distributed::Partition* partition, - comm_index_type local_part, matrix::Dense* local_mtx, - ValueType deduction_help) + comm_index_type local_part, matrix::Dense* local_mtx) { - auto input_data = input.get_const_data(); + auto row_idxs = input.get_const_row_idxs(); + auto col_idxs = input.get_const_col_idxs(); + auto values = input.get_const_values(); auto range_bounds = partition->get_range_bounds(); auto range_parts = partition->get_part_ids(); auto range_starting_indices = partition->get_range_starting_indices(); @@ -74,15 +75,13 @@ void build_local( size_type range_id_hint = 0; #pragma omp parallel for firstprivate(range_id_hint) for (size_type i = 0; i < input.get_num_elems(); ++i) { - auto entry = input_data[i]; - auto range_id = find_range(entry.row, range_id_hint); + auto range_id = find_range(row_idxs[i], range_id_hint); range_id_hint = range_id; auto part_id = range_parts[range_id]; // skip non-local rows if (part_id == local_part) { - local_mtx->at(map_to_local(entry.row, range_id), - static_cast(entry.column)) = - entry.value; + local_mtx->at(map_to_local(row_idxs[i], range_id), + static_cast(col_idxs[i])) = values[i]; } } } diff --git a/omp/test/distributed/vector_kernels.cpp b/omp/test/distributed/vector_kernels.cpp index caec47ef4c4..45f6ba47c29 100644 --- a/omp/test/distributed/vector_kernels.cpp +++ b/omp/test/distributed/vector_kernels.cpp @@ -71,26 +71,27 @@ class Vector : public ::testing::Test { {} void validate( - const gko::size_type num_cols, const gko::distributed::Partition* partition, const gko::distributed::Partition* d_partition, - gko::Array input) + gko::device_matrix_data input) { - gko::Array d_input{exec, input}; + gko::device_matrix_data d_input{exec, + input}; for (comm_index_type part = 0; part < partition->get_num_parts(); ++part) { auto num_rows = static_cast(partition->get_part_size(part)); - auto output = mtx::create(ref, gko::dim<2>{num_rows, num_cols}); + auto output = + mtx::create(ref, gko::dim<2>{num_rows, input.get_size()[1]}); output->fill(gko::zero()); auto d_output = gko::clone(exec, output); gko::kernels::reference::distributed_vector::build_local( - ref, input, partition, part, output.get(), value_type{}); + ref, input, partition, part, output.get()); gko::kernels::omp::distributed_vector::build_local( - exec, d_input, d_partition, part, d_output.get(), value_type{}); + exec, d_input, d_partition, part, d_output.get()); GKO_ASSERT_MTX_NEAR(output, d_output, 0); } @@ -103,21 +104,18 @@ class Vector : public ::testing::Test { template -gko::Array> -generate_random_matrix_data_array(gko::size_type num_rows, - gko::size_type num_cols, - NonzeroDistribution&& nonzero_dist, - ValueDistribution&& value_dist, - Engine&& engine, - std::shared_ptr exec) +gko::device_matrix_data generate_random_matrix_data_array( + gko::size_type num_rows, gko::size_type num_cols, + NonzeroDistribution&& nonzero_dist, ValueDistribution&& value_dist, + Engine&& engine, std::shared_ptr exec) { auto md = gko::test::generate_random_matrix_data( num_rows, num_cols, std::forward(nonzero_dist), std::forward(value_dist), std::forward(engine)); md.ensure_row_major_order(); - return gko::Array>( - exec, md.nonzeros.begin(), md.nonzeros.end()); + return gko::device_matrix_data::create_from_host(exec, + md); } @@ -126,9 +124,9 @@ TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); TYPED_TEST(Vector, BuildsLocalEmptyIsEquivalentToRef) { + using value_type = typename TestFixture::value_type; using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; - using global_entry = typename TestFixture::global_entry; gko::distributed::comm_index_type num_parts = 10; auto mapping = gko::test::generate_random_array( @@ -145,8 +143,9 @@ TYPED_TEST(Vector, BuildsLocalEmptyIsEquivalentToRef) mapping, num_parts); - this->validate(0, partition.get(), d_partition.get(), - gko::Array{this->ref}); + this->validate( + partition.get(), d_partition.get(), + gko::device_matrix_data{this->ref}); } @@ -181,7 +180,7 @@ TYPED_TEST(Vector, BuildsLocalSmallIsEquivalentToRef) mapping, num_parts); - this->validate(num_cols, partition.get(), d_partition.get(), input); + this->validate(partition.get(), d_partition.get(), input); } @@ -216,7 +215,7 @@ TYPED_TEST(Vector, BuildsLocalIsEquivalentToRef) mapping, num_parts); - this->validate(num_cols, partition.get(), d_partition.get(), input); + this->validate(partition.get(), d_partition.get(), input); } diff --git a/reference/distributed/vector_kernels.cpp b/reference/distributed/vector_kernels.cpp index c453ca5716f..d9eb90f80ff 100644 --- a/reference/distributed/vector_kernels.cpp +++ b/reference/distributed/vector_kernels.cpp @@ -45,12 +45,13 @@ namespace distributed_vector { template void build_local( std::shared_ptr exec, - const Array>& input, + const device_matrix_data& input, const distributed::Partition* partition, - comm_index_type local_part, matrix::Dense* local_mtx, - ValueType deduction_help) + comm_index_type local_part, matrix::Dense* local_mtx) { - auto input_data = input.get_const_data(); + auto row_idxs = input.get_const_row_idxs(); + auto col_idxs = input.get_const_col_idxs(); + auto values = input.get_const_values(); auto range_bounds = partition->get_range_bounds(); auto range_parts = partition->get_part_ids(); auto range_starting_indices = partition->get_range_starting_indices(); @@ -73,15 +74,13 @@ void build_local( size_type range_id_hint = 0; for (size_type i = 0; i < input.get_num_elems(); ++i) { - auto entry = input_data[i]; - auto range_id = find_range(entry.row, range_id_hint); + auto range_id = find_range(row_idxs[i], range_id_hint); range_id_hint = range_id; auto part_id = range_parts[range_id]; // skip non-local rows if (part_id == local_part) { - local_mtx->at(map_to_local(entry.row, range_id), - static_cast(entry.column)) = - entry.value; + local_mtx->at(map_to_local(row_idxs[i], range_id), + static_cast(col_idxs[i])) = values[i]; } } } diff --git a/reference/test/distributed/vector_kernels.cpp b/reference/test/distributed/vector_kernels.cpp index 10993fdb29c..3b68dbc50a3 100644 --- a/reference/test/distributed/vector_kernels.cpp +++ b/reference/test/distributed/vector_kernels.cpp @@ -67,17 +67,20 @@ class Vector : public ::testing::Test { Vector() : ref(gko::ReferenceExecutor::create()), mapping{ref} {} void validate( - const gko::size_type num_cols, + const gko::dim<2> size, const gko::distributed::Partition* partition, - std::initializer_list input_entries, + std::initializer_list input_rows, + std::initializer_list input_cols, + std::initializer_list input_vals, std::initializer_list< std::initializer_list>> output_entries) { std::vector>> ref_outputs; - auto input = gko::Array{ref, input_entries}; + auto input = gko::device_matrix_data{ + ref, size, input_rows, input_cols, input_vals}; for (auto entry : output_entries) { ref_outputs.emplace_back(entry); } @@ -85,11 +88,11 @@ class Vector : public ::testing::Test { ++part) { auto num_rows = static_cast(partition->get_part_size(part)); - auto output = mtx::create(ref, gko::dim<2>{num_rows, num_cols}); + auto output = mtx::create(ref, gko::dim<2>{num_rows, size[1]}); output->fill(gko::zero()); gko::kernels::reference::distributed_vector::build_local( - ref, input, partition, part, output.get(), value_type{}); + ref, input, partition, part, output.get()); GKO_ASSERT_MTX_NEAR(output, ref_outputs[part], 0); } @@ -113,7 +116,7 @@ TYPED_TEST(Vector, BuildsLocalEmpty) this->mapping, num_parts); - this->validate(0, partition.get(), {}, + this->validate(gko::dim<2>{0, 0}, partition.get(), {}, {}, {}, {{{}, {}}, {{}, {}, {}}, {{}, {}, {}}}); } @@ -129,9 +132,8 @@ TYPED_TEST(Vector, BuildsLocalSmall) this->mapping, num_parts); - this->validate(2, partition.get(), - {{0, 0, 1}, {0, 1, 2}, {1, 0, 3}, {1, 1, 4}}, - {{{3, 4}}, {{1, 2}}}); + this->validate(gko::dim<2>{2, 2}, partition.get(), {0, 0, 1, 1}, + {0, 1, 0, 1}, {1, 2, 3, 4}, {{{3, 4}}, {{1, 2}}}); } @@ -146,15 +148,8 @@ TYPED_TEST(Vector, BuildsLocal) this->mapping, num_parts); - this->validate(8, partition.get(), - {{0, 0, 1}, - {0, 1, 2}, - {1, 2, 3}, - {1, 3, 4}, - {2, 4, 5}, - {3, 5, 6}, - {4, 6, 7}, - {5, 7, 8}}, + this->validate(gko::dim<2>{6, 8}, partition.get(), {0, 0, 1, 1, 2, 3, 4, 5}, + {0, 1, 2, 3, 4, 5, 6, 7}, {1, 2, 3, 4, 5, 6, 7, 8}, {{{0, 0, 0, 0, 5, 0, 0, 0}, {0, 0, 0, 0, 0, 6, 0, 0}}, {{1, 2, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 8}}, {{0, 0, 3, 4, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 7, 0}}}); From 6a38b0821f5ac792f41eeb940f85c4f0894a890f Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 10 Feb 2022 15:53:18 +0100 Subject: [PATCH 14/38] remove partition from read* and make partition mandatory in constructor --- core/distributed/vector.cpp | 25 +++++++----- core/test/mpi/distributed/vector.cpp | 34 +++++++++-------- include/ginkgo/core/distributed/vector.hpp | 44 +++++++++++----------- 3 files changed, 54 insertions(+), 49 deletions(-) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index a43c5b75ea6..b37b173b620 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -77,6 +77,14 @@ Vector::Vector( local_size[1]) {} + +template +Vector::Vector( + std::shared_ptr exec) + : Vector(exec, mpi::communicator(MPI_COMM_NULL), {}, {}, 0) +{} + + template Vector::Vector( std::shared_ptr exec, mpi::communicator comm, @@ -93,26 +101,20 @@ Vector::Vector( template void Vector::read_distributed( - const matrix_data& data, - std::shared_ptr> partition) + const matrix_data& data) { this->read_distributed( device_matrix_data::create_from_host( - this->get_executor(), data), - std::move(partition)); + this->get_executor(), data)); } template void Vector::read_distributed( - const device_matrix_data& data, - std::shared_ptr> partition) + const device_matrix_data& data) { auto exec = this->get_executor(); - GKO_ASSERT(partition->get_executor() == exec); - this->partition_ = std::move(partition); - auto global_rows = static_cast(this->partition_->get_size()); auto global_cols = data.get_size()[1]; this->set_size({global_rows, global_cols}); @@ -157,7 +159,10 @@ template void Vector::move_to( Vector, LocalIndexType, GlobalIndexType>* result) { - this->convert_to(result); + result->set_size(this->get_size()); + result->set_communicator(this->get_communicator()); + result->partition_ = this->partition_; + this->get_local()->move_to(result->get_local()); } diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp index ae4c3ed7249..5c1cb3cc2ca 100644 --- a/core/test/mpi/distributed/vector.cpp +++ b/core/test/mpi/distributed/vector.cpp @@ -74,8 +74,8 @@ class Vector : public ::testing::Test { vec_a(dist_vec_type::create(ref, comm, part)), vec_b(dist_vec_type::create(ref, comm, part)) { - vec_a->read_distributed(md_a, part); - vec_b->read_distributed(md_b, part); + vec_a->read_distributed(md_a); + vec_b->read_distributed(md_b); } std::shared_ptr ref; @@ -96,10 +96,11 @@ TYPED_TEST(Vector, CanReadGlobalMatrixData) { using part_type = typename TestFixture::part_type; using value_type = typename TestFixture::value_type; - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); + auto vec = + TestFixture::dist_vec_type::create(this->ref, this->comm, this->part); auto rank = this->comm.rank(); - vec->read_distributed(this->md_a, this->part); + vec->read_distributed(this->md_a); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), @@ -119,10 +120,10 @@ TYPED_TEST(Vector, CanReadGlobalMatrixDataSomeEmpty) using value_type = typename TestFixture::value_type; auto part = gko::share( part_type::build_from_contiguous(this->ref, {this->ref, {0, 0, 6, 6}})); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm, part); auto rank = this->comm.rank(); - vec->read_distributed(this->md_a, part); + vec->read_distributed(this->md_a); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); if (rank == 1) { @@ -152,7 +153,7 @@ TYPED_TEST(Vector, CanReadGlobalDeviceMatrixData) gko::Array{this->ref, I{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}}}; auto part = gko::share( part_type::build_from_contiguous(this->ref, {this->ref, {0, 2, 4, 6}})); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm, part); auto rank = this->comm.rank(); I> ref_data[3] = { {{0, 1}, {2, 3}}, @@ -160,7 +161,7 @@ TYPED_TEST(Vector, CanReadGlobalDeviceMatrixData) {{8, 9}, {10, 11}}, }; - vec->read_distributed(md, part); + vec->read_distributed(md); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), @@ -176,7 +177,7 @@ TYPED_TEST(Vector, CanReadGlobalMatrixDataScattered) md_type md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}; auto part = gko::share(part_type::build_from_mapping( this->ref, {this->ref, {0, 1, 2, 0, 2, 0}}, 3)); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm, part); auto rank = this->comm.rank(); gko::dim<2> ref_size[3] = {{3, 2}, {1, 2}, {2, 2}}; I> ref_data[3] = { @@ -185,7 +186,7 @@ TYPED_TEST(Vector, CanReadGlobalMatrixDataScattered) {{4, 5}, {8, 9}}, }; - vec->read_distributed(md, part); + vec->read_distributed(md); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), ref_size[rank]); @@ -204,7 +205,7 @@ TYPED_TEST(Vector, CanReadLocalMatrixData) {gko::dim<2>{6, 2}, {{4, 0, 8}, {4, 1, 9}, {5, 0, 10}, {5, 1, 11}}}}; auto part = gko::share( part_type::build_from_contiguous(this->ref, {this->ref, {0, 2, 4, 6}})); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm, part); auto rank = this->comm.rank(); I> ref_data[3] = { {{0, 1}, {2, 3}}, @@ -212,7 +213,7 @@ TYPED_TEST(Vector, CanReadLocalMatrixData) {{8, 9}, {10, 11}}, }; - vec->read_distributed(md[rank], part); + vec->read_distributed(md[rank]); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), @@ -239,10 +240,10 @@ TYPED_TEST(Vector, CanReadLocalMatrixDataSomeEmpty) {gko::dim<2>{6, 2}, {}}}; auto part = gko::share( part_type::build_from_contiguous(this->ref, {this->ref, {0, 0, 6, 6}})); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm, part); auto rank = this->comm.rank(); - vec->read_distributed(md[rank], part); + vec->read_distributed(md[rank]); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); if (rank == 1) { @@ -291,8 +292,8 @@ TYPED_TEST(Vector, ComputesConjDot) auto dist_vec_b = dist_vec_type::create(this->ref, this->comm, this->part); auto dense_vec_a = dense_type::create(this->ref); auto dense_vec_b = dense_type::create(this->ref); - dist_vec_a->read_distributed(md_a, this->part); - dist_vec_b->read_distributed(md_b, this->part); + dist_vec_a->read_distributed(md_a); + dist_vec_b->read_distributed(md_b); dense_vec_a->read(md_a); dense_vec_b->read(md_b); auto res = dense_type::create(this->ref, gko::dim<2>{1, 2}); @@ -320,4 +321,5 @@ TYPED_TEST(Vector, ComputesNorm) GKO_ASSERT_MTX_NEAR(res, ref_res, r::value); } + } // namespace diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index c7858b4c94b..6c7087060c6 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -114,32 +114,26 @@ class Vector using local_vector_type = gko::matrix::Dense; /** - * Reads a vector from the matrix_data structure and a global row partition. + * Reads a vector from the matrix_data structure. * * The number of rows of the matrix data is ignored, only its number of - * columns is relevant. The number of rows is inferred from the partition. + * columns is relevant. The number of rows is inferred from the vector's + * partition. * * @note The matrix data can contain entries for rows other than those owned * by the process. Entries for those rows are discarded. * * @param data The matrix_data structure - * @param partition The global row partition */ - void read_distributed( - const matrix_data& data, - std::shared_ptr> - partition); + void read_distributed(const matrix_data& data); /** - * Reads a vector from the device_matrix_data structure and a global row - * partition. + * Reads a vector from the device_matrix_data structure. * * See @read_distributed */ void read_distributed( - const device_matrix_data& data, - std::shared_ptr> - partition); + const device_matrix_data& data); void convert_to(Vector, LocalIndexType, GlobalIndexType>* result) const override; @@ -305,6 +299,12 @@ class Vector } protected: + /** + * Creates an empty distributed vector. + * @param exec Executor associated with vector + */ + explicit Vector(std::shared_ptr exec); + /** * Creates an empty distributed vector with a specified size * @param exec Executor associated with vector @@ -312,13 +312,13 @@ class Vector * MPI_COMM_WORLD * @param partition Partition of global rows * @param global_size Global size of the vector - * @param local_size Processor-local size of the vector - * @param stride Stride of the local vector. + * @param local_size Processor-local size of the vector, uses local_size[1] + * as the stride */ Vector(std::shared_ptr exec, mpi::communicator comm, std::shared_ptr> partition, - dim<2> global_size, dim<2> local_size, size_type stride); + dim<2> global_size = {}, dim<2> local_size = {}); /** * Creates an empty distributed vector with a specified size @@ -327,15 +327,13 @@ class Vector * MPI_COMM_WORLD * @param partition Partition of global rows * @param global_size Global size of the vector - * @param local_size Processor-local size of the vector, uses local_size[1] - * as the stride + * @param local_size Processor-local size of the vector + * @param stride Stride of the local vector. */ - explicit Vector( - std::shared_ptr exec, - mpi::communicator comm = mpi::communicator(MPI_COMM_WORLD), - std::shared_ptr> - partition = nullptr, - dim<2> global_size = {}, dim<2> local_size = {}); + Vector(std::shared_ptr exec, mpi::communicator comm, + std::shared_ptr> + partition, + dim<2> global_size, dim<2> local_size, size_type stride); void apply_impl(const LinOp*, LinOp*) const override; From 6503e407efc4ffffce5b3a50135ac0e3747e0ecb Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 11 Feb 2022 15:38:42 +0100 Subject: [PATCH 15/38] remove partition member from vector for now, no functionality (except reading/consistency checking) depends on the partition, so there is no point in storing it. Regarding the consistency check: we can ignore that for the moment, since we have to use a pointer comparison to check for equality, which is not sufficient for the general use case. --- core/distributed/vector.cpp | 282 +++++++++------------ core/test/mpi/distributed/vector.cpp | 116 +++++---- include/ginkgo/core/distributed/vector.hpp | 120 ++++----- include/ginkgo/core/matrix/dense.hpp | 6 +- 4 files changed, 252 insertions(+), 272 deletions(-) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index b37b173b620..1f165da1120 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -53,129 +53,117 @@ GKO_REGISTER_OPERATION(build_local, distributed_vector::build_local); } // namespace vector -template -void Vector::apply_impl( - const LinOp* b, LinOp* x) const -{ - GKO_NOT_SUPPORTED(this); -} +namespace detail { template -void Vector::apply_impl( - const LinOp* alpha, const LinOp* b, const LinOp* beta, LinOp* x) const +void read_distributed_impl( + const device_matrix_data& data, + const Partition* partition, + Vector* result) { - GKO_NOT_SUPPORTED(this); -} + auto exec = result->get_executor(); -template -Vector::Vector( - std::shared_ptr exec, mpi::communicator comm, - std::shared_ptr> partition, - dim<2> global_size, dim<2> local_size) - : Vector(exec, comm, std::move(partition), global_size, local_size, - local_size[1]) -{} + GKO_ASSERT(partition->get_executor() == exec); + auto global_rows = static_cast(partition->get_size()); + auto global_cols = data.get_size()[1]; + auto tmp = Vector::create(exec, result->get_communicator(), + gko::dim<2>{global_rows, global_cols}); + + auto rank = tmp->get_communicator().rank(); + auto local_rows = static_cast(partition->get_part_size(rank)); + if (tmp->get_local()->get_size() != dim<2>{local_rows, global_cols}) { + auto stride = tmp->get_local()->get_stride() > 0 + ? tmp->get_local()->get_stride() + : global_cols; + Vector::local_vector_type::create( + exec, dim<2>{local_rows, global_cols}, stride) + ->move_to(tmp->get_local()); + } + tmp->get_local()->fill(zero()); + exec->run( + vector::make_build_local(data, partition, rank, tmp->get_local())); + tmp->move_to(result); +} -template -Vector::Vector( - std::shared_ptr exec) - : Vector(exec, mpi::communicator(MPI_COMM_NULL), {}, {}, 0) -{} +#define GKO_DECLARE_DISTRIBUTED_READ_DISTRIBUTED_IMPL( \ + ValueType, LocalIndexType, GlobalIndexType) \ + void read_distributed_impl( \ + const device_matrix_data& data, \ + const Partition* partition, \ + Vector* storage) +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_READ_DISTRIBUTED_IMPL); -template -Vector::Vector( - std::shared_ptr exec, mpi::communicator comm, - std::shared_ptr> partition, - dim<2> global_size, dim<2> local_size, size_type stride) - : EnableLinOp< - Vector>{exec, - global_size}, - DistributedBase{comm}, - partition_{std::move(partition)}, - local_{exec, local_size, stride} -{} +} // namespace detail -template -void Vector::read_distributed( - const matrix_data& data) +template +void Vector::apply_impl(const LinOp* b, LinOp* x) const { - this->read_distributed( - device_matrix_data::create_from_host( - this->get_executor(), data)); + GKO_NOT_SUPPORTED(this); } -template -void Vector::read_distributed( - const device_matrix_data& data) +template +void Vector::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { - auto exec = this->get_executor(); - - auto global_rows = static_cast(this->partition_->get_size()); - auto global_cols = data.get_size()[1]; - this->set_size({global_rows, global_cols}); - - auto rank = this->get_communicator().rank(); - auto local_rows = - static_cast(this->get_partition()->get_part_size(rank)); - if (this->get_local()->get_size() != dim<2>{local_rows, global_cols}) { - auto stride = this->get_local()->get_stride() > 0 - ? this->get_local()->get_stride() - : global_cols; - local_vector_type::create(exec, dim<2>{local_rows, global_cols}, stride) - ->move_to(this->get_local()); - } - this->get_local()->fill(zero()); - exec->run(vector::make_build_local(data, this->get_partition().get(), rank, - this->get_local())); + GKO_NOT_SUPPORTED(this); } +template +Vector::Vector(std::shared_ptr exec, + mpi::communicator comm, dim<2> global_size, + dim<2> local_size) + : Vector(exec, comm, global_size, local_size, local_size[1]) +{} -template -void Vector::fill( - const ValueType value) +template +Vector::Vector(std::shared_ptr exec, + mpi::communicator comm, dim<2> global_size, + dim<2> local_size, size_type stride) + : EnableLinOp>{exec, global_size}, + DistributedBase{comm}, + local_{exec, local_size, stride} +{} + + +template +void Vector::fill(const ValueType value) { this->get_local()->fill(value); } -template -void Vector::convert_to( - Vector, LocalIndexType, GlobalIndexType>* result) - const +template +void Vector::convert_to( + Vector>* result) const { result->set_size(this->get_size()); result->set_communicator(this->get_communicator()); - result->partition_ = this->partition_; this->get_const_local()->convert_to(result->get_local()); } -template -void Vector::move_to( - Vector, LocalIndexType, GlobalIndexType>* result) +template +void Vector::move_to(Vector>* result) { - result->set_size(this->get_size()); - result->set_communicator(this->get_communicator()); - result->partition_ = this->partition_; - this->get_local()->move_to(result->get_local()); + this->convert_to(result); } -template -std::unique_ptr< - typename Vector::absolute_type> -Vector::compute_absolute() const +template +std::unique_ptr::absolute_type> +Vector::compute_absolute() const { auto exec = this->get_executor(); - auto result = absolute_type::create(exec, this->get_communicator(), - this->get_partition(), this->get_size(), - this->get_const_local()->get_size()); + auto result = + absolute_type::create(exec, this->get_communicator(), this->get_size(), + this->get_const_local()->get_size()); exec->run(vector::make_outplace_absolute_dense(this->get_const_local(), result->get_local())); @@ -184,134 +172,120 @@ Vector::compute_absolute() const } -template -void Vector::compute_absolute_inplace() +template +void Vector::compute_absolute_inplace() { this->get_local()->compute_absolute_inplace(); } -template -const typename Vector::local_vector_type* -Vector::get_const_local() const +template +const typename Vector::local_vector_type* +Vector::get_const_local() const { return &local_; } -template -typename Vector::local_vector_type* -Vector::get_local() +template +typename Vector::local_vector_type* Vector::get_local() { return &local_; } -template -std::unique_ptr< - typename Vector::complex_type> -Vector::make_complex() const +template +std::unique_ptr::complex_type> +Vector::make_complex() const { auto result = complex_type::create( - this->get_executor(), this->get_communicator(), this->get_partition(), - this->get_size(), this->get_const_local()->get_size(), + this->get_executor(), this->get_communicator(), this->get_size(), + this->get_const_local()->get_size(), this->get_const_local()->get_stride()); this->make_complex(result.get()); return result; } -template -void Vector::make_complex( - Vector::complex_type* result) const +template +void Vector::make_complex(Vector::complex_type* result) const { this->get_const_local()->make_complex(result->get_local()); } -template -std::unique_ptr< - typename Vector::real_type> -Vector::get_real() const +template +std::unique_ptr::real_type> +Vector::get_real() const { - auto result = real_type::create( - this->get_executor(), this->get_communicator(), this->get_partition(), - this->get_size(), this->get_const_local()->get_size(), - this->get_const_local()->get_stride()); + auto result = + real_type::create(this->get_executor(), this->get_communicator(), + this->get_size(), this->get_const_local()->get_size(), + this->get_const_local()->get_stride()); this->get_real(result.get()); return result; } -template -void Vector::get_real( - Vector::real_type* result) const +template +void Vector::get_real(Vector::real_type* result) const { this->get_const_local()->get_real(result->get_local()); } -template -std::unique_ptr< - typename Vector::real_type> -Vector::get_imag() const +template +std::unique_ptr::real_type> +Vector::get_imag() const { - auto result = real_type::create( - this->get_executor(), this->get_communicator(), this->get_partition(), - this->get_size(), this->get_const_local()->get_size(), - this->get_const_local()->get_stride()); + auto result = + real_type::create(this->get_executor(), this->get_communicator(), + this->get_size(), this->get_const_local()->get_size(), + this->get_const_local()->get_stride()); this->get_imag(result.get()); return result; } -template -void Vector::get_imag( - Vector::real_type* result) const +template +void Vector::get_imag(Vector::real_type* result) const { this->get_const_local()->get_imag(result->get_local()); } -template -void Vector::scale( - const LinOp* alpha) +template +void Vector::scale(const LinOp* alpha) { this->get_local()->scale(alpha); } -template -void Vector::inv_scale( - const LinOp* alpha) +template +void Vector::inv_scale(const LinOp* alpha) { this->get_local()->inv_scale(alpha); } -template -void Vector::add_scaled( - const LinOp* alpha, const LinOp* b) +template +void Vector::add_scaled(const LinOp* alpha, const LinOp* b) { - auto dense_b = as>(b); + auto dense_b = as>(b); this->get_local()->add_scaled(alpha, dense_b->get_const_local()); } -template -void Vector::sub_scaled( - const LinOp* alpha, const LinOp* b) +template +void Vector::sub_scaled(const LinOp* alpha, const LinOp* b) { - auto dense_b = as>(b); + auto dense_b = as>(b); this->get_local()->sub_scaled(alpha, dense_b->get_const_local()); } -template -void Vector::compute_dot( - const LinOp* b, LinOp* result) const +template +void Vector::compute_dot(const LinOp* b, LinOp* result) const { auto exec = this->get_executor(); const auto comm = this->get_communicator(); @@ -335,9 +309,8 @@ void Vector::compute_dot( } -template -void Vector::compute_conj_dot( - const LinOp* b, LinOp* result) const +template +void Vector::compute_conj_dot(const LinOp* b, LinOp* result) const { auto exec = this->get_executor(); const auto comm = this->get_communicator(); @@ -361,9 +334,8 @@ void Vector::compute_conj_dot( } -template -void Vector::compute_norm2( - LinOp* result) const +template +void Vector::compute_norm2(LinOp* result) const { using NormVector = typename local_vector_type::absolute_type; GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); @@ -389,9 +361,8 @@ void Vector::compute_norm2( } -template -void Vector::compute_norm1( - LinOp* result) const +template +void Vector::compute_norm1(LinOp* result) const { using NormVector = typename local_vector_type::absolute_type; GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); @@ -415,11 +386,8 @@ void Vector::compute_norm1( } -#define GKO_DECLARE_DISTRIBUTED_VECTOR(ValueType, LocalIndexType, \ - GlobalIndexType) \ - class Vector -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( - GKO_DECLARE_DISTRIBUTED_VECTOR); +#define GKO_DECLARE_DISTRIBUTED_VECTOR(ValueType) class Vector +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DISTRIBUTED_VECTOR); } // namespace distributed diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp index 5c1cb3cc2ca..c69080299c7 100644 --- a/core/test/mpi/distributed/vector.cpp +++ b/core/test/mpi/distributed/vector.cpp @@ -47,7 +47,7 @@ namespace { template -class Vector : public ::testing::Test { +class VectorRead : public ::testing::Test { public: using value_type = typename std::tuple_element< 0, decltype(ValueLocalGlobalIndexType())>::type; @@ -59,48 +59,37 @@ class Vector : public ::testing::Test { gko::distributed::Partition; using md_type = gko::matrix_data; using d_md_type = gko::device_matrix_data; - using dist_vec_type = gko::distributed::Vector; + using dist_vec_type = gko::distributed::Vector; using dense_type = gko::matrix::Dense; using nz_type = gko::matrix_data_entry; - Vector() + VectorRead() : ref(gko::ReferenceExecutor::create()), comm(MPI_COMM_WORLD), part(gko::share(part_type::build_from_contiguous( this->ref, {ref, {0, 2, 4, 6}}))), - md_a{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}, - md_b{{10, -11}, {8, -9}, {-6, 7}, {4, -5}, {2, -3}, {0, 1}}, - vec_a(dist_vec_type::create(ref, comm, part)), - vec_b(dist_vec_type::create(ref, comm, part)) - { - vec_a->read_distributed(md_a); - vec_b->read_distributed(md_b); - } + md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}} + {} std::shared_ptr ref; gko::mpi::communicator comm; std::shared_ptr part; - md_type md_a; - md_type md_b; - - std::unique_ptr vec_a; - std::unique_ptr vec_b; + md_type md; }; -TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); +TYPED_TEST_SUITE(VectorRead, gko::test::ValueLocalGlobalIndexTypes); + -TYPED_TEST(Vector, CanReadGlobalMatrixData) +TYPED_TEST(VectorRead, CanReadGlobalMatrixData) { using part_type = typename TestFixture::part_type; using value_type = typename TestFixture::value_type; - auto vec = - TestFixture::dist_vec_type::create(this->ref, this->comm, this->part); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); auto rank = this->comm.rank(); - vec->read_distributed(this->md_a); + vec->read_distributed(this->md, this->part.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), @@ -114,16 +103,16 @@ TYPED_TEST(Vector, CanReadGlobalMatrixData) } -TYPED_TEST(Vector, CanReadGlobalMatrixDataSomeEmpty) +TYPED_TEST(VectorRead, CanReadGlobalMatrixDataSomeEmpty) { using part_type = typename TestFixture::part_type; using value_type = typename TestFixture::value_type; auto part = gko::share( part_type::build_from_contiguous(this->ref, {this->ref, {0, 0, 6, 6}})); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm, part); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); auto rank = this->comm.rank(); - vec->read_distributed(this->md_a); + vec->read_distributed(this->md, part.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); if (rank == 1) { @@ -140,7 +129,7 @@ TYPED_TEST(Vector, CanReadGlobalMatrixDataSomeEmpty) } -TYPED_TEST(Vector, CanReadGlobalDeviceMatrixData) +TYPED_TEST(VectorRead, CanReadGlobalDeviceMatrixData) { using it = typename TestFixture::global_index_type; using d_md_type = typename TestFixture::d_md_type; @@ -153,7 +142,7 @@ TYPED_TEST(Vector, CanReadGlobalDeviceMatrixData) gko::Array{this->ref, I{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}}}; auto part = gko::share( part_type::build_from_contiguous(this->ref, {this->ref, {0, 2, 4, 6}})); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm, part); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); auto rank = this->comm.rank(); I> ref_data[3] = { {{0, 1}, {2, 3}}, @@ -161,7 +150,7 @@ TYPED_TEST(Vector, CanReadGlobalDeviceMatrixData) {{8, 9}, {10, 11}}, }; - vec->read_distributed(md); + vec->read_distributed(md, part.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), @@ -169,7 +158,7 @@ TYPED_TEST(Vector, CanReadGlobalDeviceMatrixData) GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], r::value); } -TYPED_TEST(Vector, CanReadGlobalMatrixDataScattered) +TYPED_TEST(VectorRead, CanReadGlobalMatrixDataScattered) { using md_type = typename TestFixture::md_type; using part_type = typename TestFixture::part_type; @@ -177,7 +166,7 @@ TYPED_TEST(Vector, CanReadGlobalMatrixDataScattered) md_type md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}; auto part = gko::share(part_type::build_from_mapping( this->ref, {this->ref, {0, 1, 2, 0, 2, 0}}, 3)); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm, part); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); auto rank = this->comm.rank(); gko::dim<2> ref_size[3] = {{3, 2}, {1, 2}, {2, 2}}; I> ref_data[3] = { @@ -186,7 +175,7 @@ TYPED_TEST(Vector, CanReadGlobalMatrixDataScattered) {{4, 5}, {8, 9}}, }; - vec->read_distributed(md); + vec->read_distributed(md, part.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), ref_size[rank]); @@ -194,7 +183,7 @@ TYPED_TEST(Vector, CanReadGlobalMatrixDataScattered) } -TYPED_TEST(Vector, CanReadLocalMatrixData) +TYPED_TEST(VectorRead, CanReadLocalMatrixData) { using md_type = typename TestFixture::md_type; using part_type = typename TestFixture::part_type; @@ -205,7 +194,7 @@ TYPED_TEST(Vector, CanReadLocalMatrixData) {gko::dim<2>{6, 2}, {{4, 0, 8}, {4, 1, 9}, {5, 0, 10}, {5, 1, 11}}}}; auto part = gko::share( part_type::build_from_contiguous(this->ref, {this->ref, {0, 2, 4, 6}})); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm, part); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); auto rank = this->comm.rank(); I> ref_data[3] = { {{0, 1}, {2, 3}}, @@ -213,7 +202,7 @@ TYPED_TEST(Vector, CanReadLocalMatrixData) {{8, 9}, {10, 11}}, }; - vec->read_distributed(md[rank]); + vec->read_distributed(md[rank], part.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), @@ -222,7 +211,7 @@ TYPED_TEST(Vector, CanReadLocalMatrixData) } -TYPED_TEST(Vector, CanReadLocalMatrixDataSomeEmpty) +TYPED_TEST(VectorRead, CanReadLocalMatrixDataSomeEmpty) { using md_type = typename TestFixture::md_type; using part_type = typename TestFixture::part_type; @@ -240,10 +229,10 @@ TYPED_TEST(Vector, CanReadLocalMatrixDataSomeEmpty) {gko::dim<2>{6, 2}, {}}}; auto part = gko::share( part_type::build_from_contiguous(this->ref, {this->ref, {0, 0, 6, 6}})); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm, part); + auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); auto rank = this->comm.rank(); - vec->read_distributed(md[rank]); + vec->read_distributed(md[rank], part.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); if (rank == 1) { @@ -261,7 +250,45 @@ TYPED_TEST(Vector, CanReadLocalMatrixDataSomeEmpty) } -TYPED_TEST(Vector, ComputesDotProduct) +template +class VectorOperation : public ::testing::Test { +public: + using value_type = ValueType; + using local_index_type = gko::int32; + using global_index_type = gko::int64; + using part_type = + gko::distributed::Partition; + using md_type = gko::matrix_data; + using dist_vec_type = gko::distributed::Vector; + using dense_type = gko::matrix::Dense; + + VectorOperation() + : ref(gko::ReferenceExecutor::create()), + comm(MPI_COMM_WORLD), + part(gko::share(part_type::build_from_contiguous( + this->ref, {ref, {0, 2, 4, 6}}))), + vec_a(dist_vec_type::create(ref, comm)), + vec_b(dist_vec_type::create(ref, comm)) + { + md_type md_a{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}; + md_type md_b{{10, -11}, {8, -9}, {-6, 7}, {4, -5}, {2, -3}, {0, 1}}; + + vec_a->read_distributed(md_a, part.get()); + vec_b->read_distributed(md_b, part.get()); + } + + std::shared_ptr ref; + gko::mpi::communicator comm; + std::shared_ptr part; + + std::unique_ptr vec_a; + std::unique_ptr vec_b; +}; + +TYPED_TEST_SUITE(VectorOperation, gko::test::ValueTypes); + + +TYPED_TEST(VectorOperation, ComputesDotProduct) { using dense_type = typename TestFixture::dense_type; using value_type = typename TestFixture::value_type; @@ -275,7 +302,7 @@ TYPED_TEST(Vector, ComputesDotProduct) } -TYPED_TEST(Vector, ComputesConjDot) +TYPED_TEST(VectorOperation, ComputesConjDot) { using dense_type = typename TestFixture::dense_type; using value_type = typename TestFixture::value_type; @@ -288,12 +315,12 @@ TYPED_TEST(Vector, ComputesConjDot) auto md_b = gko::test::generate_random_matrix_data( 6, 2, std::uniform_int_distribution(2, 2), std::normal_distribution(0, 1), std::ranlux48{42}); - auto dist_vec_a = dist_vec_type::create(this->ref, this->comm, this->part); - auto dist_vec_b = dist_vec_type::create(this->ref, this->comm, this->part); + auto dist_vec_a = dist_vec_type::create(this->ref, this->comm); + auto dist_vec_b = dist_vec_type::create(this->ref, this->comm); auto dense_vec_a = dense_type::create(this->ref); auto dense_vec_b = dense_type::create(this->ref); - dist_vec_a->read_distributed(md_a); - dist_vec_b->read_distributed(md_b); + dist_vec_a->read_distributed(md_a, this->part.get()); + dist_vec_b->read_distributed(md_b, this->part.get()); dense_vec_a->read(md_a); dense_vec_b->read(md_b); auto res = dense_type::create(this->ref, gko::dim<2>{1, 2}); @@ -306,7 +333,7 @@ TYPED_TEST(Vector, ComputesConjDot) } -TYPED_TEST(Vector, ComputesNorm) +TYPED_TEST(VectorOperation, ComputesNorm) { using dense_type = typename TestFixture::dense_type; using value_type = typename TestFixture::value_type; @@ -321,5 +348,4 @@ TYPED_TEST(Vector, ComputesNorm) GKO_ASSERT_MTX_NEAR(res, ref_res, r::value); } - } // namespace diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index 6c7087060c6..40b726f7d17 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -50,6 +50,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { namespace distributed { +namespace detail { + + +template +void read_distributed_impl( + const device_matrix_data& data, + const Partition* partition, + Vector* result); + + +} + /** * Vector is a format which explicitly stores (multiple) distributed column @@ -69,77 +81,75 @@ namespace distributed { * the accessor get_local. * * @note Operations between two vectors (axpy, dot product, etc.) are only valid - * if both vectors use the same partition. + * if both vectors where created using the same partition. * * @tparam ValueType The precision of vector elements. - * @tparam LocalIndexType The index type for local indices used by the - * partition. - * @tparam GlobalIndexType The index type for the global indices used by the - * partition. * * @ingroup dist_vector * @ingroup distributed */ -template +template class Vector - : public EnableLinOp>, - public EnableCreateMethod< - Vector>, - public ConvertibleTo< - Vector, LocalIndexType, GlobalIndexType>>, - public EnableAbsoluteComputation< - remove_complex>>, + : public EnableLinOp>, + public EnableCreateMethod>, + public ConvertibleTo>>, + public EnableAbsoluteComputation>>, public DistributedBase { - friend class EnableCreateMethod< - Vector>; - friend class EnablePolymorphicObject< - Vector, LinOp>; - friend class Vector, LocalIndexType, GlobalIndexType>; - friend class Vector, LocalIndexType, - GlobalIndexType>; + friend class EnableCreateMethod>; + friend class EnablePolymorphicObject, LinOp>; + friend class Vector>; + friend class Vector>; public: using EnableLinOp::convert_to; using EnableLinOp::move_to; using value_type = ValueType; - using index_type = GlobalIndexType; - using local_index_type = LocalIndexType; - using global_index_type = GlobalIndexType; using absolute_type = remove_complex; using real_type = absolute_type; - using complex_type = - Vector, local_index_type, global_index_type>; + using complex_type = Vector>; using local_vector_type = gko::matrix::Dense; /** - * Reads a vector from the matrix_data structure. + * Reads a vector from the matrix_data structure and a global row partition. * * The number of rows of the matrix data is ignored, only its number of - * columns is relevant. The number of rows is inferred from the vector's - * partition. + * columns is relevant. The number of rows is inferred from the partition. * * @note The matrix data can contain entries for rows other than those owned * by the process. Entries for those rows are discarded. * * @param data The matrix_data structure + * @param partition The global row partition */ - void read_distributed(const matrix_data& data); + template + void read_distributed( + const matrix_data& data, + const Partition* partition) + { + this->read_distributed( + device_matrix_data::create_from_host( + this->get_executor(), data), + std::move(partition)); + } /** - * Reads a vector from the device_matrix_data structure. + * Reads a vector from the device_matrix_data structure and a global row + * partition. * * See @read_distributed */ + template void read_distributed( - const device_matrix_data& data); + const device_matrix_data& data, + const Partition* partition) + { + detail::read_distributed_impl(data, partition, this); + } - void convert_to(Vector, LocalIndexType, - GlobalIndexType>* result) const override; + void convert_to(Vector>* result) const override; - void move_to(Vector, LocalIndexType, - GlobalIndexType>* result) override; + void move_to(Vector>* result) override; std::unique_ptr compute_absolute() const override; @@ -287,24 +297,7 @@ class Vector */ local_vector_type* get_local(); - /** - * Access to the partition that defines these global vectors. - * - * @return a shared_ptr to the global row partition - */ - std::shared_ptr> - get_partition() const - { - return partition_; - } - protected: - /** - * Creates an empty distributed vector. - * @param exec Executor associated with vector - */ - explicit Vector(std::shared_ptr exec); - /** * Creates an empty distributed vector with a specified size * @param exec Executor associated with vector @@ -312,13 +305,11 @@ class Vector * MPI_COMM_WORLD * @param partition Partition of global rows * @param global_size Global size of the vector - * @param local_size Processor-local size of the vector, uses local_size[1] - * as the stride + * @param local_size Processor-local size of the vector + * @param stride Stride of the local vector. */ Vector(std::shared_ptr exec, mpi::communicator comm, - std::shared_ptr> - partition, - dim<2> global_size = {}, dim<2> local_size = {}); + dim<2> global_size, dim<2> local_size, size_type stride); /** * Creates an empty distributed vector with a specified size @@ -327,13 +318,12 @@ class Vector * MPI_COMM_WORLD * @param partition Partition of global rows * @param global_size Global size of the vector - * @param local_size Processor-local size of the vector - * @param stride Stride of the local vector. + * @param local_size Processor-local size of the vector, uses local_size[1] + * as the stride */ - Vector(std::shared_ptr exec, mpi::communicator comm, - std::shared_ptr> - partition, - dim<2> global_size, dim<2> local_size, size_type stride); + explicit Vector(std::shared_ptr exec, + mpi::communicator comm = mpi::communicator(MPI_COMM_WORLD), + dim<2> global_size = {}, dim<2> local_size = {}); void apply_impl(const LinOp*, LinOp*) const override; @@ -341,8 +331,6 @@ class Vector LinOp*) const override; private: - std::shared_ptr> - partition_; local_vector_type local_; mutable ::gko::detail::DenseCache host_reduction_buffer_; mutable ::gko::detail::DenseCache> diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp index 63afd572083..46bef18b440 100644 --- a/include/ginkgo/core/matrix/dense.hpp +++ b/include/ginkgo/core/matrix/dense.hpp @@ -50,7 +50,7 @@ namespace gko { namespace distributed { -template +template class Vector; @@ -147,9 +147,7 @@ class Dense friend class SparsityCsr; friend class SparsityCsr; friend class Dense>; - friend class distributed::Vector; - friend class distributed::Vector; - friend class distributed::Vector; + friend class distributed::Vector; public: using ReadableFromMatrixData::read; From f54f69bec0ca4314477c2a5f69f13bd45ae86a8b Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Mon, 14 Feb 2022 09:13:43 +0000 Subject: [PATCH 16/38] Format files Co-authored-by: Marcel Koch --- core/base/cache.cpp | 2 ++ core/test/base/cache.cpp | 4 ++-- core/test/mpi/distributed/vector.cpp | 15 +++++++++------ include/ginkgo/core/base/cache.hpp | 1 - omp/test/distributed/vector_kernels.cpp | 15 +++++++++------ reference/test/distributed/vector_kernels.cpp | 15 +++++++++------ 6 files changed, 31 insertions(+), 21 deletions(-) diff --git a/core/base/cache.cpp b/core/base/cache.cpp index 5a01317da99..088f959750e 100644 --- a/core/base/cache.cpp +++ b/core/base/cache.cpp @@ -31,6 +31,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ #include + + #include diff --git a/core/test/base/cache.cpp b/core/test/base/cache.cpp index 6c099addf7d..1f0a8c38a90 100644 --- a/core/test/base/cache.cpp +++ b/core/test/base/cache.cpp @@ -33,10 +33,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include +#include -#include +#include #include "core/test/utils.hpp" diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp index c69080299c7..956327dc4c9 100644 --- a/core/test/mpi/distributed/vector.cpp +++ b/core/test/mpi/distributed/vector.cpp @@ -49,12 +49,15 @@ namespace { template class VectorRead : public ::testing::Test { public: - using value_type = typename std::tuple_element< - 0, decltype(ValueLocalGlobalIndexType())>::type; - using local_index_type = typename std::tuple_element< - 1, decltype(ValueLocalGlobalIndexType())>::type; - using global_index_type = typename std::tuple_element< - 2, decltype(ValueLocalGlobalIndexType())>::type; + using value_type = + typename std::tuple_element<0, decltype( + ValueLocalGlobalIndexType())>::type; + using local_index_type = + typename std::tuple_element<1, decltype( + ValueLocalGlobalIndexType())>::type; + using global_index_type = + typename std::tuple_element<2, decltype( + ValueLocalGlobalIndexType())>::type; using part_type = gko::distributed::Partition; using md_type = gko::matrix_data; diff --git a/include/ginkgo/core/base/cache.hpp b/include/ginkgo/core/base/cache.hpp index cfac3f83d0f..8e4fba6a36c 100644 --- a/include/ginkgo/core/base/cache.hpp +++ b/include/ginkgo/core/base/cache.hpp @@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ - #ifndef GKO_PUBLIC_CORE_BASE_CACHE_HPP_ #define GKO_PUBLIC_CORE_BASE_CACHE_HPP_ diff --git a/omp/test/distributed/vector_kernels.cpp b/omp/test/distributed/vector_kernels.cpp index 45f6ba47c29..ecd06a7e551 100644 --- a/omp/test/distributed/vector_kernels.cpp +++ b/omp/test/distributed/vector_kernels.cpp @@ -55,12 +55,15 @@ using comm_index_type = gko::distributed::comm_index_type; template class Vector : public ::testing::Test { protected: - using value_type = typename std::tuple_element< - 0, decltype(ValueLocalGlobalIndexType())>::type; - using local_index_type = typename std::tuple_element< - 1, decltype(ValueLocalGlobalIndexType())>::type; - using global_index_type = typename std::tuple_element< - 2, decltype(ValueLocalGlobalIndexType())>::type; + using value_type = + typename std::tuple_element<0, decltype( + ValueLocalGlobalIndexType())>::type; + using local_index_type = + typename std::tuple_element<1, decltype( + ValueLocalGlobalIndexType())>::type; + using global_index_type = + typename std::tuple_element<2, decltype( + ValueLocalGlobalIndexType())>::type; using global_entry = gko::matrix_data_entry; using mtx = gko::matrix::Dense; diff --git a/reference/test/distributed/vector_kernels.cpp b/reference/test/distributed/vector_kernels.cpp index 3b68dbc50a3..ad0bf5710a0 100644 --- a/reference/test/distributed/vector_kernels.cpp +++ b/reference/test/distributed/vector_kernels.cpp @@ -55,12 +55,15 @@ using comm_index_type = gko::distributed::comm_index_type; template class Vector : public ::testing::Test { protected: - using value_type = typename std::tuple_element< - 0, decltype(ValueLocalGlobalIndexType())>::type; - using local_index_type = typename std::tuple_element< - 1, decltype(ValueLocalGlobalIndexType())>::type; - using global_index_type = typename std::tuple_element< - 2, decltype(ValueLocalGlobalIndexType())>::type; + using value_type = + typename std::tuple_element<0, decltype( + ValueLocalGlobalIndexType())>::type; + using local_index_type = + typename std::tuple_element<1, decltype( + ValueLocalGlobalIndexType())>::type; + using global_index_type = + typename std::tuple_element<2, decltype( + ValueLocalGlobalIndexType())>::type; using global_entry = gko::matrix_data_entry; using mtx = gko::matrix::Dense; From 0e260a7cbc8334dd6b633dc15617b245703ba3d4 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Tue, 15 Feb 2022 15:03:32 +0100 Subject: [PATCH 17/38] keep communicator on assignment --- core/distributed/vector.cpp | 1 - include/ginkgo/core/distributed/base.hpp | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 1f165da1120..7a42e7819f0 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -143,7 +143,6 @@ void Vector::convert_to( Vector>* result) const { result->set_size(this->get_size()); - result->set_communicator(this->get_communicator()); this->get_const_local()->convert_to(result->get_local()); } diff --git a/include/ginkgo/core/distributed/base.hpp b/include/ginkgo/core/distributed/base.hpp index cd3d10d8bd2..4fc0c816944 100644 --- a/include/ginkgo/core/distributed/base.hpp +++ b/include/ginkgo/core/distributed/base.hpp @@ -58,12 +58,12 @@ class DistributedBase { public: virtual ~DistributedBase() = default; - mpi::communicator get_communicator() const { return comm_; } + DistributedBase& operator=(const DistributedBase&) { return *this; } - explicit DistributedBase(mpi::communicator comm) : comm_{std::move(comm)} {} + mpi::communicator get_communicator() const { return comm_; } protected: - void set_communicator(mpi::communicator comm) { comm_ = std::move(comm); } + explicit DistributedBase(mpi::communicator comm) : comm_{std::move(comm)} {} private: mpi::communicator comm_; From 6ff2672d420483e2d8447ee00dbf1ba14fbecfd7 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 16 Feb 2022 11:35:24 +0100 Subject: [PATCH 18/38] review updates - adds tests - updates some documentation - moves mutability into DenseCache - reuses already allocated memory for read if available Co-authored-by: Tobias Ribizel Co-authored-by: Pratik Nayak --- core/base/cache.cpp | 4 +- core/distributed/vector.cpp | 38 +- core/test/mpi/distributed/vector.cpp | 410 ++++++++++++++++++++- include/ginkgo/core/base/cache.hpp | 33 +- include/ginkgo/core/distributed/vector.hpp | 37 +- reference/test/matrix/dense_kernels.cpp | 53 +++ test/matrix/dense_kernels.cpp | 30 +- 7 files changed, 551 insertions(+), 54 deletions(-) diff --git a/core/base/cache.cpp b/core/base/cache.cpp index 088f959750e..222b94ac9f8 100644 --- a/core/base/cache.cpp +++ b/core/base/cache.cpp @@ -42,7 +42,7 @@ namespace detail { template void DenseCache::init(std::shared_ptr exec, - dim<2> size) + dim<2> size) const { if (!vec || vec->get_size() != size || vec->get_executor() != exec) { vec = matrix::Dense::create(exec, size); @@ -52,7 +52,7 @@ void DenseCache::init(std::shared_ptr exec, template void DenseCache::init_from( - const matrix::Dense* template_vec) + const matrix::Dense* template_vec) const { if (!vec || vec->get_size() != template_vec->get_size() || vec->get_executor() != template_vec->get_executor()) { diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 7a42e7819f0..07b5991b4f3 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -64,27 +64,11 @@ void read_distributed_impl( { auto exec = result->get_executor(); - GKO_ASSERT(partition->get_executor() == exec); - - auto global_rows = static_cast(partition->get_size()); - auto global_cols = data.get_size()[1]; - auto tmp = Vector::create(exec, result->get_communicator(), - gko::dim<2>{global_rows, global_cols}); - - auto rank = tmp->get_communicator().rank(); - auto local_rows = static_cast(partition->get_part_size(rank)); - if (tmp->get_local()->get_size() != dim<2>{local_rows, global_cols}) { - auto stride = tmp->get_local()->get_stride() > 0 - ? tmp->get_local()->get_stride() - : global_cols; - Vector::local_vector_type::create( - exec, dim<2>{local_rows, global_cols}, stride) - ->move_to(tmp->get_local()); - } - tmp->get_local()->fill(zero()); - exec->run( - vector::make_build_local(data, partition, rank, tmp->get_local())); - tmp->move_to(result); + auto rank = result->get_communicator().rank(); + result->get_local()->fill(zero()); + exec->run(vector::make_build_local( + data, make_temporary_clone(exec, partition).get(), rank, + result->get_local())); } #define GKO_DECLARE_DISTRIBUTED_READ_DISTRIBUTED_IMPL( \ @@ -142,6 +126,8 @@ template void Vector::convert_to( Vector>* result) const { + GKO_ASSERT(this->get_communicator().size() == + result->get_communicator().size()); result->set_size(this->get_size()); this->get_const_local()->convert_to(result->get_local()); } @@ -385,6 +371,16 @@ void Vector::compute_norm1(LinOp* result) const } +template +void Vector::resize(dim<2> global_size, dim<2> local_size) +{ + if (this->get_size() != global_size) { + this->set_size(global_size); + } + this->get_local()->resize(local_size); +} + + #define GKO_DECLARE_DISTRIBUTED_VECTOR(ValueType) class Vector GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DISTRIBUTED_VECTOR); diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp index 956327dc4c9..e545e2d7025 100644 --- a/core/test/mpi/distributed/vector.cpp +++ b/core/test/mpi/distributed/vector.cpp @@ -74,6 +74,8 @@ class VectorRead : public ::testing::Test { md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}} {} + void SetUp() override { ASSERT_EQ(this->comm.size(), 3); } + std::shared_ptr ref; gko::mpi::communicator comm; std::shared_ptr part; @@ -161,6 +163,7 @@ TYPED_TEST(VectorRead, CanReadGlobalDeviceMatrixData) GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], r::value); } + TYPED_TEST(VectorRead, CanReadGlobalMatrixDataScattered) { using md_type = typename TestFixture::md_type; @@ -254,7 +257,7 @@ TYPED_TEST(VectorRead, CanReadLocalMatrixDataSomeEmpty) template -class VectorOperation : public ::testing::Test { +class VectorReductions : public ::testing::Test { public: using value_type = ValueType; using local_index_type = gko::int32; @@ -265,7 +268,7 @@ class VectorOperation : public ::testing::Test { using dist_vec_type = gko::distributed::Vector; using dense_type = gko::matrix::Dense; - VectorOperation() + VectorReductions() : ref(gko::ReferenceExecutor::create()), comm(MPI_COMM_WORLD), part(gko::share(part_type::build_from_contiguous( @@ -280,6 +283,8 @@ class VectorOperation : public ::testing::Test { vec_b->read_distributed(md_b, part.get()); } + void SetUp() override { ASSERT_EQ(this->comm.size(), 3); } + std::shared_ptr ref; gko::mpi::communicator comm; std::shared_ptr part; @@ -288,10 +293,11 @@ class VectorOperation : public ::testing::Test { std::unique_ptr vec_b; }; -TYPED_TEST_SUITE(VectorOperation, gko::test::ValueTypes); +TYPED_TEST_SUITE(VectorReductions, gko::test::ValueTypes); -TYPED_TEST(VectorOperation, ComputesDotProduct) + +TYPED_TEST(VectorReductions, ComputesDotProduct) { using dense_type = typename TestFixture::dense_type; using value_type = typename TestFixture::value_type; @@ -305,7 +311,7 @@ TYPED_TEST(VectorOperation, ComputesDotProduct) } -TYPED_TEST(VectorOperation, ComputesConjDot) +TYPED_TEST(VectorReductions, ComputesConjDot) { using dense_type = typename TestFixture::dense_type; using value_type = typename TestFixture::value_type; @@ -336,7 +342,7 @@ TYPED_TEST(VectorOperation, ComputesConjDot) } -TYPED_TEST(VectorOperation, ComputesNorm) +TYPED_TEST(VectorReductions, ComputesNorm) { using dense_type = typename TestFixture::dense_type; using value_type = typename TestFixture::value_type; @@ -351,4 +357,396 @@ TYPED_TEST(VectorOperation, ComputesNorm) GKO_ASSERT_MTX_NEAR(res, ref_res, r::value); } + +template +class VectorLocalOp : public ::testing::Test { +public: + using value_type = ValueType; + using local_index_type = gko::int32; + using global_index_type = gko::int64; + using part_type = + gko::distributed::Partition; + using md_type = gko::matrix_data; + using dist_vec_type = gko::distributed::Vector; + using dense_type = gko::matrix::Dense; + + VectorLocalOp() + : ref(gko::ReferenceExecutor::create()), + comm(MPI_COMM_WORLD), + part(gko::share(part_type::build_from_contiguous( + this->ref, {ref, {0, 2, 4, 6}}))), + vec_a(dist_vec_type::create(ref, comm)), + vec_b(dist_vec_type::create(ref, comm)), + engine(42) + { + md_type md_a{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}; + md_type md_b{{10, -11}, {8, -9}, {-6, 7}, {4, -5}, {2, -3}, {0, 1}}; + + vec_a->read_distributed(md_a, part.get()); + vec_b->read_distributed(md_b, part.get()); + } + + void SetUp() override { ASSERT_EQ(this->comm.size(), 3); } + + auto generate_local_and_global_pair(gko::dim<2> local_size) + { + auto local_vec = gko::test::generate_random_matrix( + local_size[0], local_size[1], + std::uniform_int_distribution(0, local_size[1] - 1), + std::normal_distribution>(), + this->engine, this->ref); + auto dist_vec = dist_vec_type::create( + this->ref, this->comm, + gko::dim<2>{local_size[0] * this->comm.size(), local_size[1]}, + local_size); + dist_vec->get_local()->copy_from(local_vec.get()); + + return std::make_pair(std::move(dist_vec), std::move(local_vec)); + } + + + std::shared_ptr ref; + gko::mpi::communicator comm; + std::shared_ptr part; + + std::unique_ptr vec_a; + std::unique_ptr vec_b; + + std::default_random_engine engine; +}; + + +TYPED_TEST_SUITE(VectorLocalOp, gko::test::ValueTypes); + + +TYPED_TEST(VectorLocalOp, ApplyNotSupported) +{ + using dist_vec_type = typename TestFixture::dist_vec_type; + auto a = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{}); + auto b = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{}); + auto c = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{}); + + ASSERT_THROW(a->apply(b.get(), c.get()), gko::NotSupported); +} + + +TYPED_TEST(VectorLocalOp, AdvancedApplyNotSupported) +{ + using dist_vec_type = typename TestFixture::dist_vec_type; + auto a = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{}); + auto b = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{1, 1}, + gko::dim<2>{}); + auto c = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{}); + auto d = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{1, 1}, + gko::dim<2>{}); + auto e = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{}); + ASSERT_THROW(a->apply(b.get(), c.get(), d.get(), e.get()), + gko::NotSupported); +} + + +TYPED_TEST(VectorLocalOp, ConvertsToPrecision) +{ + using Vector = typename TestFixture::dist_vec_type; + using T = typename TestFixture::value_type; + using OtherT = typename gko::next_precision; + using OtherVector = typename gko::distributed::Vector; + auto tmp = OtherVector::create(this->ref, this->comm); + auto res = Vector::create(this->ref, this->comm); + // If OtherT is more precise: 0, otherwise r + auto residual = r::value < r::value + ? gko::remove_complex{0} + : gko::remove_complex{r::value}; + + this->vec_a->convert_to(tmp.get()); + tmp->convert_to(res.get()); + + GKO_ASSERT_MTX_NEAR(this->vec_a->get_local(), res->get_local(), residual); +} + + +TYPED_TEST(VectorLocalOp, MovesToPrecision) +{ + using Vector = typename TestFixture::dist_vec_type; + using T = typename TestFixture::value_type; + using OtherT = typename gko::next_precision; + using OtherVector = typename gko::distributed::Vector; + auto tmp = OtherVector::create(this->ref, this->comm); + auto res = Vector::create(this->ref, this->comm); + auto clone_vec_a = gko::clone(this->vec_a); + // If OtherT is more precise: 0, otherwise r + auto residual = r::value < r::value + ? gko::remove_complex{0} + : gko::remove_complex{r::value}; + + clone_vec_a->move_to(tmp.get()); + tmp->move_to(res.get()); + + GKO_ASSERT_MTX_NEAR(this->vec_a->get_local(), res->get_local(), residual); +} + + +TYPED_TEST(VectorLocalOp, ComputeAbsoluteSameAsLocal) +{ + gko::size_type local_size = 20; + gko::size_type num_cols = 7; + auto pair = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto& dist = pair.first; + auto& local = pair.second; + + auto dist_absolute = dist->compute_absolute(); + auto local_absolute = local->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(dist_absolute->get_const_local(), local_absolute, 0); +} + + +TYPED_TEST(VectorLocalOp, ComputeAbsoluteInplaceSameAsLocal) +{ + gko::size_type local_size = 20; + gko::size_type num_cols = 7; + auto pair = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto& dist = pair.first; + auto& local = pair.second; + + dist->compute_absolute_inplace(); + local->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(dist->get_const_local(), local, 0); +} + + +TYPED_TEST(VectorLocalOp, MakeComplexSameAsLocal) +{ + gko::size_type local_size = 20; + gko::size_type num_cols = 7; + auto pair = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto& dist = pair.first; + auto& local = pair.second; + + auto dist_complex = dist->make_complex(); + auto local_complex = local->make_complex(); + + GKO_ASSERT_MTX_NEAR(dist_complex->get_const_local(), local_complex, 0); +} + + +TYPED_TEST(VectorLocalOp, MakeComplexInplaceSameAsLocal) +{ + gko::size_type local_size = 20; + gko::size_type num_cols = 7; + auto pair = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto& dist = pair.first; + auto& local = pair.second; + auto dist_complex = dist->make_complex(); + dist_complex->fill(0); + auto local_complex = local->make_complex(); + local_complex->fill(0); + + dist->make_complex(dist_complex.get()); + local->make_complex(local_complex.get()); + + GKO_ASSERT_MTX_NEAR(dist_complex->get_const_local(), local_complex, 0); +} + + +TYPED_TEST(VectorLocalOp, GetRealSameAsLocal) +{ + gko::size_type local_size = 20; + gko::size_type num_cols = 7; + auto pair = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto& dist = pair.first; + auto& local = pair.second; + + auto dist_real = dist->get_real(); + auto local_real = local->get_real(); + + GKO_ASSERT_MTX_NEAR(dist_real->get_const_local(), local_real, 0); +} + + +TYPED_TEST(VectorLocalOp, GetRealInplaceSameAsLocal) +{ + gko::size_type local_size = 20; + gko::size_type num_cols = 7; + auto pair = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto& dist = pair.first; + auto& local = pair.second; + auto dist_real = dist->get_real(); + dist_real->fill(0); + auto local_real = local->get_real(); + local_real->fill(0); + + dist->get_real(dist_real.get()); + local->get_real(local_real.get()); + + GKO_ASSERT_MTX_NEAR(dist_real->get_const_local(), local_real, 0); +} + + +TYPED_TEST(VectorLocalOp, GetImagSameAsLocal) +{ + gko::size_type local_size = 20; + gko::size_type num_cols = 7; + auto pair = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto& dist = pair.first; + auto& local = pair.second; + + auto dist_imag = dist->get_imag(); + auto local_imag = local->get_imag(); + + GKO_ASSERT_MTX_NEAR(dist_imag->get_const_local(), local_imag, 0); +} + + +TYPED_TEST(VectorLocalOp, GetImagInplaceSameAsLocal) +{ + gko::size_type local_size = 20; + gko::size_type num_cols = 7; + auto pair = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto& dist = pair.first; + auto& local = pair.second; + auto dist_imag = dist->get_imag(); + dist_imag->fill(0); + auto local_imag = local->get_imag(); + local_imag->fill(0); + + dist->get_imag(dist_imag.get()); + local->get_imag(local_imag.get()); + + GKO_ASSERT_MTX_NEAR(dist_imag->get_const_local(), local_imag, 0); +} + + +TYPED_TEST(VectorLocalOp, FillSameAsLocal) +{ + using value_type = typename TestFixture::value_type; + gko::size_type local_size = 20; + gko::size_type num_cols = 7; + auto pair = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto& dist = pair.first; + auto& local = pair.second; + auto value = gko::test::detail::get_rand_value( + std::normal_distribution>(), + this->engine); + + dist->fill(value); + local->fill(value); + + GKO_ASSERT_MTX_NEAR(dist->get_const_local(), local, 0); +} + + +TYPED_TEST(VectorLocalOp, ScaleSameAsLocal) +{ + using value_type = typename TestFixture::value_type; + gko::size_type local_size = 20; + gko::size_type num_cols = 7; + auto pair = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto& dist = pair.first; + auto& local = pair.second; + auto value = gko::test::generate_random_matrix( + 1, num_cols, + std::uniform_int_distribution(num_cols, num_cols), + std::normal_distribution>(), + this->engine, this->ref); + + dist->scale(value.get()); + local->scale(value.get()); + + GKO_ASSERT_MTX_NEAR(dist->get_const_local(), local, 0); +} + + +TYPED_TEST(VectorLocalOp, InvScaleSameAsLocal) +{ + using value_type = typename TestFixture::value_type; + gko::size_type local_size = 20; + gko::size_type num_cols = 7; + auto pair = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto& dist = pair.first; + auto& local = pair.second; + auto value = gko::test::generate_random_matrix( + 1, num_cols, + std::uniform_int_distribution(num_cols, num_cols), + std::uniform_real_distribution>(1.0, + 2.0), + this->engine, this->ref); + + dist->inv_scale(value.get()); + local->inv_scale(value.get()); + + GKO_ASSERT_MTX_NEAR(dist->get_const_local(), local, 0); +} + + +TYPED_TEST(VectorLocalOp, AddScaleSameAsLocal) +{ + using value_type = typename TestFixture::value_type; + gko::size_type local_size = 20; + gko::size_type num_cols = 7; + auto pair = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto pair_b = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto& dist = pair.first; + auto& local = pair.second; + auto& dist_b = pair_b.first; + auto& local_b = pair_b.second; + auto value = gko::test::generate_random_matrix( + 1, num_cols, + std::uniform_int_distribution(num_cols, num_cols), + std::normal_distribution>(), + this->engine, this->ref); + + dist->add_scaled(value.get(), dist_b.get()); + local->add_scaled(value.get(), local_b.get()); + + GKO_ASSERT_MTX_NEAR(dist->get_const_local(), local, 0); +} + + +TYPED_TEST(VectorLocalOp, SubScaleSameAsLocal) +{ + using value_type = typename TestFixture::value_type; + gko::size_type local_size = 20; + gko::size_type num_cols = 7; + auto pair = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto pair_b = + this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); + auto& dist = pair.first; + auto& local = pair.second; + auto& dist_b = pair_b.first; + auto& local_b = pair_b.second; + auto value = gko::test::generate_random_matrix( + 1, num_cols, + std::uniform_int_distribution(num_cols, num_cols), + std::normal_distribution>(), + this->engine, this->ref); + + dist->sub_scaled(value.get(), dist_b.get()); + local->sub_scaled(value.get(), local_b.get()); + + GKO_ASSERT_MTX_NEAR(dist->get_const_local(), local, 0); +} + + } // namespace diff --git a/include/ginkgo/core/base/cache.hpp b/include/ginkgo/core/base/cache.hpp index 8e4fba6a36c..19c56700fbe 100644 --- a/include/ginkgo/core/base/cache.hpp +++ b/include/ginkgo/core/base/cache.hpp @@ -57,7 +57,8 @@ namespace detail { /** * Manages a Dense vector that is buffered and reused internally to avoid * repeated allocations. Copying an instance will only yield an empty object - * since copying the cached vector would not make sense. + * since copying the cached vector would not make sense. The stored object is + * always mutable, so the cache can be used in a const-context. * * @internal The struct is present to wrap cache-like buffer storage that will * not be copied when the outer object gets copied. @@ -67,10 +68,10 @@ struct DenseCache { DenseCache() = default; ~DenseCache() = default; DenseCache(const DenseCache&) {} - DenseCache(DenseCache&&) {} + DenseCache(DenseCache&&) noexcept {} DenseCache& operator=(const DenseCache&) { return *this; } - DenseCache& operator=(DenseCache&&) { return *this; } - std::unique_ptr> vec{}; + DenseCache& operator=(DenseCache&&) noexcept { return *this; } + mutable std::unique_ptr> vec{}; /** @@ -85,7 +86,7 @@ struct DenseCache { * @param template_vec Defines the configuration (executor, size, stride) * of the buffered vector. */ - void init_from(const matrix::Dense* template_vec); + void init_from(const matrix::Dense* template_vec) const; /** * Initializes the buffered vector, if @@ -96,15 +97,25 @@ struct DenseCache { * @param exec Executor of the buffered vector. * @param size Size of the buffered vector. */ - void init(std::shared_ptr exec, dim<2> size); + void init(std::shared_ptr exec, dim<2> size) const; - matrix::Dense& operator*() { return *vec; } - - matrix::Dense* operator->() { return vec.get(); } + /** + * Reference access to the underlying vector. + * @return Reference to the stored vector. + */ + matrix::Dense& operator*() const { return *vec; } - matrix::Dense* get() { return vec.get(); } + /** + * Pointer access to the underlying vector. + * @return Pointer to the stored vector. + */ + matrix::Dense* operator->() const { return vec.get(); } - const matrix::Dense* get() const { return vec.get(); } + /** + * Pointer access to the underlying vector. + * @return Pointer to the stored vector. + */ + matrix::Dense* get() const { return vec.get(); } }; diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index 40b726f7d17..48a48642042 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -111,7 +111,8 @@ class Vector using local_vector_type = gko::matrix::Dense; /** - * Reads a vector from the matrix_data structure and a global row partition. + * Reads a vector from the device_matrix_data structure and a global row + * partition. * * The number of rows of the matrix data is ignored, only its number of * columns is relevant. The number of rows is inferred from the partition. @@ -119,34 +120,43 @@ class Vector * @note The matrix data can contain entries for rows other than those owned * by the process. Entries for those rows are discarded. * - * @param data The matrix_data structure + * @param data The device_matrix_data structure * @param partition The global row partition */ template void read_distributed( - const matrix_data& data, + const device_matrix_data& data, const Partition* partition) { - this->read_distributed( - device_matrix_data::create_from_host( - this->get_executor(), data), - std::move(partition)); + auto global_cols = data.get_size()[1]; + this->resize( + dim<2>(partition->get_size(), global_cols), + dim<2>(partition->get_part_size(this->get_communicator().rank()), + global_cols)); + detail::read_distributed_impl(data, partition, this); } /** - * Reads a vector from the device_matrix_data structure and a global row + * Reads a vector from the matrix_data structure and a global row * partition. * * See @read_distributed + * + * @note For efficiency it is advised to use the device_matrix_data + * overload. */ template void read_distributed( - const device_matrix_data& data, + const matrix_data& data, const Partition* partition) { - detail::read_distributed_impl(data, partition, this); + this->read_distributed( + device_matrix_data::create_from_host( + this->get_executor(), data), + std::move(partition)); } + void convert_to(Vector>* result) const override; void move_to(Vector>* result) override; @@ -325,6 +335,8 @@ class Vector mpi::communicator comm = mpi::communicator(MPI_COMM_WORLD), dim<2> global_size = {}, dim<2> local_size = {}); + void resize(dim<2> global_size, dim<2> local_size); + void apply_impl(const LinOp*, LinOp*) const override; void apply_impl(const LinOp*, const LinOp*, const LinOp*, @@ -332,9 +344,8 @@ class Vector private: local_vector_type local_; - mutable ::gko::detail::DenseCache host_reduction_buffer_; - mutable ::gko::detail::DenseCache> - host_norm_buffer_; + ::gko::detail::DenseCache host_reduction_buffer_; + ::gko::detail::DenseCache> host_norm_buffer_; }; diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index 477193f8372..48f9f69cfe6 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -630,6 +630,59 @@ TYPED_TEST(Dense, ComputesNorm2Mixed) } +TYPED_TEST(Dense, ComputesNorm2Squared) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using T_nc = gko::remove_complex; + using NormVector = gko::matrix::Dense; + auto mtx(gko::initialize( + {I{1.0, 0.0}, I{2.0, 3.0}, I{2.0, 4.0}}, this->exec)); + auto result = NormVector::create(this->exec, gko::dim<2>{1, 2}); + + gko::kernels::reference::dense::compute_norm2_sqr( + gko::as(this->exec), mtx.get(), result.get()); + + EXPECT_EQ(result->at(0, 0), T_nc{9.0}); + EXPECT_EQ(result->at(0, 1), T_nc{25.0}); +} + + +TYPED_TEST(Dense, ComputesNorm2SquaredMixed) +{ + using MixedMtx = typename TestFixture::MixedMtx; + using MixedT = typename MixedMtx::value_type; + using MixedT_nc = gko::remove_complex; + using MixedNormVector = gko::matrix::Dense; + auto mtx(gko::initialize( + {I{1.0, 0.0}, I{2.0, 3.0}, I{2.0, 4.0}}, + this->exec)); + auto result = MixedNormVector::create(this->exec, gko::dim<2>{1, 2}); + + gko::kernels::reference::dense::compute_norm2_sqr( + gko::as(this->exec), mtx.get(), result.get()); + + EXPECT_EQ(result->at(0, 0), MixedT_nc{9.0}); + EXPECT_EQ(result->at(0, 1), MixedT_nc{25.0}); +} + + +TYPED_TEST(Dense, ComputesSqrt) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using T_nc = gko::remove_complex; + using NormVector = gko::matrix::Dense; + auto mtx(gko::initialize(I>{{9.0, 25.0}}, this->exec)); + + gko::kernels::reference::dense::compute_sqrt( + gko::as(this->exec), mtx.get()); + + EXPECT_EQ(mtx->at(0, 0), T_nc{3.0}); + EXPECT_EQ(mtx->at(0, 1), T_nc{5.0}); +} + + TYPED_TEST(Dense, ComputesNorm1) { using Mtx = typename TestFixture::Mtx; diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp index eba5be5fd9d..43178920308 100644 --- a/test/matrix/dense_kernels.cpp +++ b/test/matrix/dense_kernels.cpp @@ -1104,7 +1104,7 @@ TEST_F(Dense, ComputeNorm1IsEquivalentToRef) auto norm_size = gko::dim<2>{1, x->get_size()[1]}; auto norm_expected = NormVector::create(ref, norm_size); - auto dnorm = NormVector::create(ref, norm_size); + auto dnorm = NormVector::create(exec, norm_size); // all parameters are on ref to check cross-executor calls x->compute_norm1(norm_expected.get()); @@ -1332,4 +1332,32 @@ TEST_F(Dense, AddScaledIdentityToNonSquareOnDifferentExecutor) } +TEST_F(Dense, ComputeNorm2SquaredIsEquivalentToRef) +{ + set_up_apply_data(); + auto norm_size = gko::dim<2>{1, x->get_size()[1]}; + auto norm_expected = NormVector::create(ref, norm_size); + auto dnorm = NormVector::create(exec, norm_size); + + gko::kernels::reference::dense::compute_norm2_sqr(ref, x.get(), + norm_expected.get()); + gko::kernels::EXEC_NAMESPACE::dense::compute_norm2_sqr(exec, dx.get(), + dnorm.get()); + + GKO_ASSERT_MTX_NEAR(dnorm, norm_expected, r::value); +} + + +TEST_F(Dense, ComputesSqrt) +{ + auto mtx(gko::initialize(I>>{{9.0, 25.0}}, ref)); + auto dmtx(gko::initialize(I>>{{9.0, 25.0}}, exec)); + + gko::kernels::reference::dense::compute_sqrt(ref, mtx.get()); + gko::kernels::EXEC_NAMESPACE::dense::compute_sqrt(exec, dmtx.get()); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, r::value); +} + + } // namespace From 2291e5d19d6dda120ae4c0786aab4931d3756966 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 16 Feb 2022 18:11:11 +0100 Subject: [PATCH 19/38] refactor cmake mpi test handling --- cmake/create_test.cmake | 83 +++++++++++++++++++++--------------- core/test/mpi/CMakeLists.txt | 6 --- 2 files changed, 49 insertions(+), 40 deletions(-) diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index 28685fdf232..2cb1e06abf8 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -3,11 +3,18 @@ function(ginkgo_build_test_name test_name target_name) ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}") set(${target_name} ${TEST_TARGET_NAME} PARENT_SCOPE) -endfunction() +endfunction(ginkgo_build_test_name) -function(ginkgo_set_test_target_properties test_name test_target_name) - file(RELATIVE_PATH REL_BINARY_DIR - ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) +function(ginkgo_create_gtest_mpi_main) + add_library(gtest_mpi_main "") + target_sources(gtest_mpi_main + PRIVATE + ${PROJECT_SOURCE_DIR}/core/test/mpi/gtest/mpi_listener.cpp) + find_package(MPI REQUIRED) + target_link_libraries(gtest_mpi_main PRIVATE GTest::GTest MPI::MPI_CXX) +endfunction(ginkgo_create_gtest_mpi_main) + +function(ginkgo_set_test_target_default_properties test_name test_target_name) set_target_properties(${test_target_name} PROPERTIES OUTPUT_NAME ${test_name}) if (GINKGO_FAST_TESTS) @@ -19,12 +26,33 @@ function(ginkgo_set_test_target_properties test_name test_target_name) if (GINKGO_CHECK_CIRCULAR_DEPS) target_link_libraries(${test_target_name} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}") endif() + target_include_directories(${test_target_name} PRIVATE ${Ginkgo_BINARY_DIR}) + target_link_libraries(${test_target_name} PRIVATE ginkgo) +endfunction(ginkgo_set_test_target_default_properties) + +function(ginkgo_internal_add_test test_name test_target_name) + file(RELATIVE_PATH REL_BINARY_DIR + ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + ginkgo_set_test_target_default_properties(${test_name} ${test_target_name}) add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${test_target_name} WORKING_DIRECTORY "$") - target_include_directories(${test_target_name} PRIVATE ${Ginkgo_BINARY_DIR}) - target_link_libraries(${test_target_name} PRIVATE ginkgo GTest::Main GTest::GTest) -endfunction() + target_link_libraries(${test_target_name} PRIVATE GTest::Main GTest::GTest) +endfunction(ginkgo_internal_add_test) + +function(ginkgo_internal_add_mpi_test test_name test_target_name num_mpi_procs) + file(RELATIVE_PATH REL_BINARY_DIR + ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + ginkgo_set_test_target_default_properties(${test_name} ${test_target_name}) + if(NOT TARGET gtest_mpi_main) + ginkgo_create_gtest_mpi_main() + endif() + target_link_libraries(${test_target_name} PRIVATE gtest_mpi_main GTest::GTest MPI::MPI_CXX) + set(test_param ${MPIEXEC_NUMPROC_FLAG} ${num_mpi_procs} ${OPENMPI_RUN_AS_ROOT_FLAG} + ${CMAKE_BINARY_DIR}/${REL_BINARY_DIR}/${test_name}) + add_test(NAME ${REL_BINARY_DIR}/${test_name} + COMMAND ${MPIEXEC_EXECUTABLE} ${test_param}) +endfunction(ginkgo_internal_add_mpi_test) function(ginkgo_create_test test_name) ginkgo_build_test_name(${test_name} test_target_name) @@ -32,7 +60,7 @@ function(ginkgo_create_test test_name) target_compile_features(${test_target_name} PUBLIC cxx_std_14) target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) target_link_libraries(${test_target_name} PRIVATE ${ARGN}) - ginkgo_set_test_target_properties(${test_name} ${test_target_name}) + ginkgo_internal_add_test(${test_name} ${test_target_name}) endfunction(ginkgo_create_test) function(ginkgo_create_dpcpp_test test_name) @@ -42,7 +70,7 @@ function(ginkgo_create_dpcpp_test test_name) target_compile_options(${test_target_name} PRIVATE "${GINKGO_DPCPP_FLAGS}") target_compile_options(${test_target_name} PRIVATE "${GINKGO_COMPILER_FLAGS}") target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel) - ginkgo_set_test_target_properties(${test_name} ${test_target_name}) + ginkgo_internal_add_test(${test_name} ${test_target_name}) # Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test. if (MKL_ENV) set_tests_properties(${test_target_name} PROPERTIES ENVIRONMENT "${MKL_ENV}") @@ -57,29 +85,16 @@ function(ginkgo_create_thread_test test_name) target_compile_features(${test_target_name} PUBLIC cxx_std_14) target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) target_link_libraries(${test_target_name} PRIVATE Threads::Threads ${ARGN}) - ginkgo_set_test_target_properties(${test_name} ${test_target_name}) + ginkgo_internal_add_test(${test_name} ${test_target_name}) endfunction(ginkgo_create_thread_test) function(ginkgo_create_mpi_test test_name num_mpi_procs) - file(RELATIVE_PATH REL_BINARY_DIR - ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) - string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}") - add_executable(${TEST_TARGET_NAME} ${test_name}.cpp) - set_target_properties(${TEST_TARGET_NAME} PROPERTIES - OUTPUT_NAME ${test_name}) - if (GINKGO_CHECK_CIRCULAR_DEPS) - target_link_libraries(${TEST_TARGET_NAME} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}") - endif() - if("${GINKGO_MPI_EXEC_SUFFIX}" MATCHES ".openmpi" AND MPI_RUN_AS_ROOT) - set(OPENMPI_RUN_AS_ROOT_FLAG "--allow-run-as-root") - else() - set(OPENMPI_RUN_AS_ROOT_FLAG "") - endif() - target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo gtest_mpi_main GTest::GTest ${ARGN}) - target_link_libraries(${TEST_TARGET_NAME} PRIVATE MPI::MPI_CXX) - set(test_param ${MPIEXEC_NUMPROC_FLAG} ${num_mpi_procs} ${OPENMPI_RUN_AS_ROOT_FLAG} ${CMAKE_BINARY_DIR}/${REL_BINARY_DIR}/${test_name}) - add_test(NAME ${REL_BINARY_DIR}/${test_name} - COMMAND ${MPIEXEC_EXECUTABLE} ${test_param}) + ginkgo_build_test_name(${test_name} test_target_name) + add_executable(${test_target_name} ${test_name}.cpp) + target_compile_features(${test_target_name} PUBLIC cxx_std_14) + target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) + target_link_libraries(${test_target_name} PRIVATE ${ARGN}) + ginkgo_internal_add_mpi_test(${test_name} ${test_target_name} ${num_mpi_procs}) endfunction(ginkgo_create_mpi_test) function(ginkgo_create_test_cpp_cuda_header test_name) @@ -89,7 +104,7 @@ function(ginkgo_create_test_cpp_cuda_header test_name) target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) target_include_directories(${test_target_name} PRIVATE "${CUDA_INCLUDE_DIRS}") target_link_libraries(${test_target_name} PRIVATE ${ARGN}) - ginkgo_set_test_target_properties(${test_name} ${test_target_name}) + ginkgo_internal_add_test(${test_name} ${test_target_name}) endfunction(ginkgo_create_test_cpp_cuda_header) function(ginkgo_create_cuda_test test_name) @@ -112,7 +127,7 @@ function(ginkgo_create_cuda_test test_name) if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) set_target_properties(${test_target_name} PROPERTIES CUDA_ARCHITECTURES OFF) endif() - ginkgo_set_test_target_properties(${test_name} ${test_target_name}) + ginkgo_internal_add_test(${test_name} ${test_target_name}) endfunction(ginkgo_create_cuda_test) function(ginkgo_create_hip_test test_name) @@ -159,7 +174,7 @@ ginkgo_build_test_name(${test_name} test_target_name) ${HIPSPARSE_INCLUDE_DIRS} ) target_link_libraries(${test_target_name} PRIVATE ${ARGN}) - ginkgo_set_test_target_properties(${test_name} ${test_target_name}) + ginkgo_internal_add_test(${test_name} ${test_target_name}) endfunction(ginkgo_create_hip_test) function(ginkgo_create_common_test test_name) @@ -200,7 +215,7 @@ function(ginkgo_create_common_test test_name) target_compile_definitions(${test_target_name} PRIVATE GINKGO_COMMON_SINGLE_MODE=1) target_compile_definitions(${test_target_name} PRIVATE GINKGO_DPCPP_SINGLE_MODE=1) endif() - ginkgo_set_test_target_properties(${test_name}_${exec} ${test_target_name}) + ginkgo_internal_add_test(${test_name}_${exec} ${test_target_name}) endforeach() endfunction(ginkgo_create_common_test) @@ -213,5 +228,5 @@ function(ginkgo_create_common_and_reference_test test_name) target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=ReferenceExecutor EXEC_NAMESPACE=reference) target_link_libraries(${test_target_name} PRIVATE ${ARGN}) - ginkgo_set_test_target_properties(${test_name}_reference ${test_target_name}) + ginkgo_internal_add_test(${test_name}_reference ${test_target_name}) endfunction() diff --git a/core/test/mpi/CMakeLists.txt b/core/test/mpi/CMakeLists.txt index afbd19bacae..eb2c9192ebc 100644 --- a/core/test/mpi/CMakeLists.txt +++ b/core/test/mpi/CMakeLists.txt @@ -1,8 +1,2 @@ -add_library(gtest_mpi_main "") -target_sources(gtest_mpi_main - PRIVATE - gtest/mpi_listener.cpp) -find_package(MPI REQUIRED) -target_link_libraries(gtest_mpi_main PRIVATE GTest::GTest MPI::MPI_CXX) add_subdirectory(base) add_subdirectory(distributed) From b925a03ee10070cfe608943b8d600a84eb917faf Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 17 Feb 2022 14:45:21 +0100 Subject: [PATCH 20/38] add cmake function for common mpi test --- cmake/create_test.cmake | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index 2cb1e06abf8..b318243da5a 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -177,8 +177,9 @@ ginkgo_build_test_name(${test_name} test_target_name) ginkgo_internal_add_test(${test_name} ${test_target_name}) endfunction(ginkgo_create_hip_test) -function(ginkgo_create_common_test test_name) - cmake_parse_arguments(PARSE_ARGV 1 common_test "" "" "DISABLE_EXECUTORS;ADDITIONAL_LIBRARIES") +function(ginkgo_internal_create_common_test_template test_name) + cmake_parse_arguments(PARSE_ARGV 1 common_test "" "TEST_TYPE" "DISABLE_EXECUTORS;ADDITIONAL_LIBRARIES;ADDITIONAL_TEST_PARAMETERS") + string(TOLOWER ${common_test_TEST_TYPE} test_type) set(executors) if(GINKGO_BUILD_OMP) list(APPEND executors omp) @@ -215,10 +216,24 @@ function(ginkgo_create_common_test test_name) target_compile_definitions(${test_target_name} PRIVATE GINKGO_COMMON_SINGLE_MODE=1) target_compile_definitions(${test_target_name} PRIVATE GINKGO_DPCPP_SINGLE_MODE=1) endif() - ginkgo_internal_add_test(${test_name}_${exec} ${test_target_name}) + if(${test_type} STREQUAL default) + ginkgo_internal_add_test(${test_name}_${exec} ${test_target_name}) + elseif(${test_type} STREQUAL mpi) + ginkgo_internal_add_mpi_test(${test_name}_${exec} ${test_target_name} ${common_test_ADDITIONAL_TEST_PARAMETERS}) + else() + message(WARNING "Encountered unrecognized test type ${test_type} during common test creation.") + endif() endforeach() +endfunction(ginkgo_internal_create_common_test_template) + +function(ginkgo_create_common_test test_name) + ginkgo_internal_create_common_test_template(${test_name} TEST_TYPE default ${ARGN}) endfunction(ginkgo_create_common_test) +function(ginkgo_create_common_mpi_test test_name num_mpi_procs) + ginkgo_internal_create_common_test_template(${test_name} TEST_TYPE mpi ADDITIONAL_TEST_PARAMETERS ${num_mpi_procs} ${ARGN}) +endfunction(ginkgo_create_common_mpi_test) + function(ginkgo_create_common_and_reference_test test_name) ginkgo_create_common_test(${test_name}) ginkgo_build_test_name(${test_name} test_target_name) From 3d7fa0fcba5c11f443d3a51cc052c740187bea6b Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 17 Feb 2022 17:15:58 +0100 Subject: [PATCH 21/38] adds missing typedef documentation --- include/ginkgo/core/base/types.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index 0207f7a2457..be421de3cf6 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -152,7 +152,7 @@ using uint64 = std::uint64_t; /** - * + * Unsigned integer type capable of holding a pointer to void */ using uintptr = std::uintptr_t; From 7f38ddfc3ba01c2d4ceda578233b6e32fbceb8b6 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 17 Feb 2022 17:20:39 +0100 Subject: [PATCH 22/38] adds exec initializer with MPI for common test --- test/utils/executor.hpp | 64 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp index a21ce962f4d..41b41460b40 100644 --- a/test/utils/executor.hpp +++ b/test/utils/executor.hpp @@ -33,9 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef GKO_TEST_UTILS_EXECUTOR_HPP_ #define GKO_TEST_UTILS_EXECUTOR_HPP_ + #include +#if GINKGO_BUILD_MPI +#include +#endif + + #include @@ -85,4 +91,62 @@ void init_executor(std::shared_ptr ref, } +#if GINKGO_BUILD_MPI + + +void init_executor(std::shared_ptr ref, + std::shared_ptr& exec, + gko::mpi::communicator comm) +{ + exec = gko::ReferenceExecutor::create(); +} + + +void init_executor(std::shared_ptr ref, + std::shared_ptr& exec, + gko::mpi::communicator comm) +{ + exec = gko::OmpExecutor::create(); +} + + +void init_executor(std::shared_ptr ref, + std::shared_ptr& exec, + gko::mpi::communicator comm) +{ + ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); + auto device_id = comm.node_local_rank() % gko::CudaExecutor::get_num_devices(); + exec = gko::CudaExecutor::create(device_id, ref); +} + + +void init_executor(std::shared_ptr ref, + std::shared_ptr& exec, + gko::mpi::communicator comm) +{ + ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); + auto device_id = comm.node_local_rank() % gko::HipExecutor::get_num_devices(); + exec = gko::HipExecutor::create(device_id, ref); +} + + +void init_executor(std::shared_ptr ref, + std::shared_ptr& exec, + gko::mpi::communicator comm) +{ + if (gko::DpcppExecutor::get_num_devices("gpu") > 0) { + auto device_id = comm.node_local_rank() % gko::DpcppExecutor::get_num_devices("gpu"); + exec = gko::DpcppExecutor::create(device_id, ref); + } else if (gko::DpcppExecutor::get_num_devices("cpu") > 0) { + auto device_id = comm.node_local_rank() % gko::DpcppExecutor::get_num_devices("cpu"); + exec = gko::DpcppExecutor::create(device_id, ref); + } else { + FAIL() << "No suitable DPC++ devices"; + } +} + + +#endif + + #endif // GKO_TEST_UTILS_EXECUTOR_HPP_ From d82958498fd3031546984fa92975873b5ab1b455 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 17 Feb 2022 17:27:44 +0100 Subject: [PATCH 23/38] fix need for host buffer check --- core/distributed/vector.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 07b5991b4f3..04a11d3d758 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -280,7 +280,7 @@ void Vector::compute_dot(const LinOp* b, LinOp* result) const dense_res.get()); exec->synchronize(); auto use_host_buffer = - exec->get_master() != exec || !gko::mpi::is_gpu_aware(); + exec->get_master() != exec && !gko::mpi::is_gpu_aware(); if (use_host_buffer) { host_reduction_buffer_.init(exec->get_master(), dense_res->get_size()); host_reduction_buffer_->copy_from(dense_res.get()); @@ -305,7 +305,7 @@ void Vector::compute_conj_dot(const LinOp* b, LinOp* result) const dense_res.get()); exec->synchronize(); auto use_host_buffer = - exec->get_master() != exec || !gko::mpi::is_gpu_aware(); + exec->get_master() != exec && !gko::mpi::is_gpu_aware(); if (use_host_buffer) { host_reduction_buffer_.init(exec->get_master(), dense_res->get_size()); host_reduction_buffer_->copy_from(dense_res.get()); @@ -331,7 +331,7 @@ void Vector::compute_norm2(LinOp* result) const dense_res.get())); exec->synchronize(); auto use_host_buffer = - exec->get_master() != exec || !gko::mpi::is_gpu_aware(); + exec->get_master() != exec && !gko::mpi::is_gpu_aware(); if (use_host_buffer) { host_norm_buffer_.init(exec->get_master(), dense_res->get_size()); host_norm_buffer_->copy_from(dense_res.get()); @@ -357,7 +357,7 @@ void Vector::compute_norm1(LinOp* result) const this->get_const_local()->compute_norm1(dense_res.get()); exec->synchronize(); auto use_host_buffer = - exec->get_master() != exec || !gko::mpi::is_gpu_aware(); + exec->get_master() != exec && !gko::mpi::is_gpu_aware(); if (use_host_buffer) { host_norm_buffer_.init(exec->get_master(), dense_res->get_size()); host_norm_buffer_->copy_from(dense_res.get()); From a01f74eadb5a2ef466970f9007b82a38019591b7 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 17 Feb 2022 17:28:30 +0100 Subject: [PATCH 24/38] add common mpi test for vector --- test/CMakeLists.txt | 3 + test/mpi/CMakeLists.txt | 1 + test/mpi/distributed/CMakeLists.txt | 1 + test/mpi/distributed/vector.cpp | 545 ++++++++++++++++++++++++++++ 4 files changed, 550 insertions(+) create mode 100644 test/mpi/CMakeLists.txt create mode 100644 test/mpi/distributed/CMakeLists.txt create mode 100644 test/mpi/distributed/vector.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1c8be7007f3..b9679fce155 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -4,5 +4,8 @@ add_subdirectory(base) add_subdirectory(components) add_subdirectory(distributed) add_subdirectory(matrix) +if(GINKGO_BUILD_MPI) + add_subdirectory(mpi) +endif() add_subdirectory(multigrid) add_subdirectory(solver) diff --git a/test/mpi/CMakeLists.txt b/test/mpi/CMakeLists.txt new file mode 100644 index 00000000000..9066de66970 --- /dev/null +++ b/test/mpi/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(distributed) diff --git a/test/mpi/distributed/CMakeLists.txt b/test/mpi/distributed/CMakeLists.txt new file mode 100644 index 00000000000..f6e8195ebc6 --- /dev/null +++ b/test/mpi/distributed/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_common_mpi_test(vector 3) diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/distributed/vector.cpp new file mode 100644 index 00000000000..04ed646ac29 --- /dev/null +++ b/test/mpi/distributed/vector.cpp @@ -0,0 +1,545 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include + + +#include "core/test/utils.hpp" +#include "test/utils/executor.hpp" + + +namespace { + + +bool needs_transfers(std::shared_ptr exec) +{ + return exec->get_master() != exec && !gko::mpi::is_gpu_aware(); +} + + +class HostToDeviceLogger : public gko::log::Logger { +public: + void on_copy_started(const gko::Executor* exec_from, + const gko::Executor* exec_to, + const gko::uintptr& loc_from, + const gko::uintptr& loc_to, + const gko::size_type& num_bytes) const override + { + if (exec_from != exec_to) { + transfer_count_++; + } + } + + int get_transfer_count() const { return transfer_count_; } + + static std::unique_ptr create( + std::shared_ptr exec) + { + return std::unique_ptr( + new HostToDeviceLogger(std::move(exec))); + } + +protected: + explicit HostToDeviceLogger(std::shared_ptr exec) + : gko::log::Logger(exec, gko::log::Logger::copy_started_mask) + {} + +private: + mutable int transfer_count_ = 0; +}; + + +class VectorReductions : public ::testing::Test { +public: + using value_type = float; + using local_index_type = gko::int32; + using global_index_type = gko::int64; + using part_type = + gko::distributed::Partition; + using md_type = gko::matrix_data; + using dist_vec_type = gko::distributed::Vector; + using dense_type = gko::matrix::Dense; + + VectorReductions() + : ref(gko::ReferenceExecutor::create()), + exec(), + comm(MPI_COMM_WORLD), + size{53, 11}, + x(dist_vec_type::create(ref, comm)), + dx(dist_vec_type::create(exec, comm)), + y(dist_vec_type::create(ref, comm)), + dy(dist_vec_type::create(exec, comm)), + logger(gko::share(HostToDeviceLogger::create(exec))), + engine(42) + { + init_executor(ref, exec, comm); + exec->add_logger(logger); + + auto num_parts = + static_cast(comm.size()); + auto mapping = + gko::test::generate_random_array( + size[0], + std::uniform_int_distribution< + gko::distributed::comm_index_type>(0, num_parts - 1), + engine, ref); + auto part = part_type::build_from_mapping(ref, mapping, num_parts); + + auto md_x = gko::test::generate_random_matrix_data( + size[0], size[1], + std::uniform_int_distribution(size[1], size[1]), + std::normal_distribution>(), + engine); + x->read_distributed(md_x, part.get()); + dx = gko::clone(exec, x); + + auto md_y = gko::test::generate_random_matrix_data( + size[0], size[1], + std::uniform_int_distribution(size[1], size[1]), + std::normal_distribution>(), + engine); + y->read_distributed(md_y, part.get()); + dy = gko::clone(exec, y); + } + + void SetUp() override { ASSERT_EQ(comm.size(), 3); } + + void init_result() + { + res = dense_type::create(ref, gko::dim<2>{1, size[1]}); + dres = dense_type::create(exec, gko::dim<2>{1, size[1]}); + res->fill(0.); + dres->fill(0.); + } + + std::shared_ptr ref; + std::shared_ptr exec; + + gko::mpi::communicator comm; + + gko::dim<2> size; + + std::unique_ptr x; + std::unique_ptr dx; + std::unique_ptr y; + std::unique_ptr dy; + std::unique_ptr res; + std::unique_ptr dres; + + std::shared_ptr logger; + + std::default_random_engine engine; +}; + + +TEST_F(VectorReductions, ComputesDotProductIsSameAsRef) +{ + init_result(); + + x->compute_dot(y.get(), res.get()); + dx->compute_dot(dy.get(), dres.get()); + + GKO_ASSERT_MTX_NEAR(res, dres, r::value); +} + + +TEST_F(VectorReductions, ComputesConjDotProductIsSameAsRef) +{ + init_result(); + + x->compute_conj_dot(y.get(), res.get()); + dx->compute_conj_dot(dy.get(), dres.get()); + + GKO_ASSERT_MTX_NEAR(res, dres, r::value); +} + + +TEST_F(VectorReductions, ComputesNorm2IsSameAsRef) +{ + init_result(); + + x->compute_norm2(res.get()); + dx->compute_norm2(dres.get()); + + GKO_ASSERT_MTX_NEAR(res, dres, r::value); +} + + +TEST_F(VectorReductions, ComputesNorm1IsSameAsRef) +{ + init_result(); + + x->compute_norm1(res.get()); + dx->compute_norm1(dres.get()); + + GKO_ASSERT_MTX_NEAR(res, dres, r::value); +} + + +TEST_F(VectorReductions, ComputeDotCopiesToHostOnlyIfNecessary) +{ + init_result(); + auto transfer_count_before = logger->get_transfer_count(); + + dx->compute_dot(dy.get(), dres.get()); + + ASSERT_EQ(logger->get_transfer_count() > transfer_count_before, + needs_transfers(exec)); +} + + +TEST_F(VectorReductions, ComputeConjDotCopiesToHostOnlyIfNecessary) +{ + init_result(); + auto transfer_count_before = logger->get_transfer_count(); + + dx->compute_conj_dot(dy.get(), dres.get()); + + ASSERT_EQ(logger->get_transfer_count() > transfer_count_before, + needs_transfers(exec)); +} + + +TEST_F(VectorReductions, ComputeNorm2CopiesToHostOnlyIfNecessary) +{ + init_result(); + auto transfer_count_before = logger->get_transfer_count(); + + dx->compute_norm2(dres.get()); + + ASSERT_EQ(logger->get_transfer_count() > transfer_count_before, + needs_transfers(exec)); +} + + +TEST_F(VectorReductions, ComputeNorm1CopiesToHostOnlyIfNecessary) +{ + init_result(); + auto transfer_count_before = logger->get_transfer_count(); + + dx->compute_norm1(dres.get()); + + ASSERT_EQ(logger->get_transfer_count() > transfer_count_before, + needs_transfers(exec)); +} + + +class VectorLocalOps : public ::testing::Test { +public: + using value_type = float; + using mixed_type = double; + using local_index_type = gko::int32; + using global_index_type = gko::int64; + using part_type = + gko::distributed::Partition; + using md_type = gko::matrix_data; + using dist_vec_type = gko::distributed::Vector; + using complex_dist_vec_type = + gko::distributed::Vector::complex_type; + using dense_type = gko::matrix::Dense; + + VectorLocalOps() + : ref(gko::ReferenceExecutor::create()), + exec(), + comm(MPI_COMM_WORLD), + size{53, 11}, + engine(42) + { + init_executor(ref, exec, comm); + + x = dist_vec_type::create(ref, comm); + dx = dist_vec_type::create(exec, comm); + y = dist_vec_type::create(ref, comm); + dy = dist_vec_type::create(exec, comm); + alpha = dense_type ::create(ref); + dalpha = dense_type ::create(exec); + complex = complex_dist_vec_type::create(ref, comm); + dcomplex = complex_dist_vec_type::create(exec, comm); + + auto num_parts = + static_cast(comm.size()); + auto mapping = + gko::test::generate_random_array( + size[0], + std::uniform_int_distribution< + gko::distributed::comm_index_type>(0, num_parts - 1), + engine, ref); + part = part_type::build_from_mapping(ref, mapping, num_parts); + } + + void SetUp() override { ASSERT_EQ(comm.size(), 3); } + + template + void generate_vector_pair(std::unique_ptr& host, + std::unique_ptr& device) + { + using vtype = typename VectorType::value_type; + auto md = + gko::test::generate_random_matrix_data( + size[0], size[1], + std::uniform_int_distribution(size[1], size[1]), + std::normal_distribution>(), engine); + host->read_distributed(md, part.get()); + device = gko::clone(exec, host); + } + + void init_vectors() + { + generate_vector_pair(x, dx); + generate_vector_pair(y, dy); + + alpha = gko::test::generate_random_matrix( + 1, size[1], + std::uniform_int_distribution(size[1], size[1]), + std::normal_distribution(), engine, ref); + dalpha = gko::clone(exec, alpha); + } + + void init_complex_vectors() { generate_vector_pair(complex, dcomplex); } + + std::shared_ptr ref; + std::shared_ptr exec; + + gko::mpi::communicator comm; + + gko::dim<2> size; + + std::unique_ptr part; + + std::unique_ptr x; + std::unique_ptr dx; + std::unique_ptr y; + std::unique_ptr dy; + std::unique_ptr alpha; + std::unique_ptr dalpha; + std::unique_ptr complex; + std::unique_ptr dcomplex; + + std::default_random_engine engine; +}; + + +TEST_F(VectorLocalOps, ConvertsToPrecision) +{ + using OtherVector = typename gko::distributed::Vector; + auto tmp = OtherVector::create(ref, comm); + auto dtmp = OtherVector::create(exec, comm); + init_vectors(); + + x->convert_to(tmp.get()); + dx->convert_to(dtmp.get()); + + GKO_ASSERT_MTX_NEAR(tmp->get_local(), dtmp->get_local(), + r::value); +} + + +TEST_F(VectorLocalOps, MovesToPrecision) +{ + using OtherVector = typename gko::distributed::Vector; + auto tmp = OtherVector::create(ref, comm); + auto dtmp = OtherVector::create(exec, comm); + init_vectors(); + + x->move_to(tmp.get()); + dx->move_to(dtmp.get()); + + GKO_ASSERT_MTX_NEAR(tmp->get_local(), dtmp->get_local(), + r::value); +} + + +TEST_F(VectorLocalOps, ComputeAbsoluteSameAsLocal) +{ + init_vectors(); + + auto abs = x->compute_absolute(); + auto dabs = dx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs->get_local(), dabs->get_local(), + r::value); +} + + +TEST_F(VectorLocalOps, ComputeAbsoluteInplaceSameAsLocal) +{ + init_vectors(); + + x->compute_absolute_inplace(); + dx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); +} + + +TEST_F(VectorLocalOps, MakeComplexSameAsLocal) +{ + init_vectors(); + init_complex_vectors(); + + complex = x->make_complex(); + dcomplex = dx->make_complex(); + + GKO_ASSERT_MTX_NEAR(complex->get_local(), dcomplex->get_local(), + r::value); +} + + +TEST_F(VectorLocalOps, MakeComplexInplaceSameAsLocal) +{ + init_vectors(); + init_complex_vectors(); + + x->make_complex(complex.get()); + dx->make_complex(dcomplex.get()); + + GKO_ASSERT_MTX_NEAR(complex->get_local(), dcomplex->get_local(), + r::value); +} + + +TEST_F(VectorLocalOps, GetRealSameAsLocal) +{ + init_vectors(); + init_complex_vectors(); + + x = complex->get_real(); + dx = dcomplex->get_real(); + + GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); +} + + +TEST_F(VectorLocalOps, GetRealInplaceSameAsLocal) +{ + init_vectors(); + init_complex_vectors(); + + complex->get_real(x.get()); + dcomplex->get_real(dx.get()); + + GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); +} + + +TEST_F(VectorLocalOps, GetImagSameAsLocal) +{ + init_vectors(); + init_complex_vectors(); + + x = complex->get_imag(); + dx = dcomplex->get_imag(); + + GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); +} + + +TEST_F(VectorLocalOps, GetImagInplaceSameAsLocal) +{ + init_vectors(); + init_complex_vectors(); + + complex->get_imag(x.get()); + dcomplex->get_imag(dx.get()); + + GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); +} + + +TEST_F(VectorLocalOps, FillSameAsLocal) +{ + init_vectors(); + auto value = gko::test::detail::get_rand_value( + std::normal_distribution>(), engine); + + x->fill(value); + dx->fill(value); + + GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); +} + + +TEST_F(VectorLocalOps, ScaleSameAsLocal) +{ + init_vectors(); + + x->scale(alpha.get()); + dx->scale(dalpha.get()); + + GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); +} + + +TEST_F(VectorLocalOps, InvScaleSameAsLocal) +{ + init_vectors(); + + x->inv_scale(alpha.get()); + dx->inv_scale(dalpha.get()); + + GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); +} + + +TEST_F(VectorLocalOps, AddScaleSameAsLocal) +{ + init_vectors(); + + x->add_scaled(alpha.get(), y.get()); + dx->add_scaled(dalpha.get(), dy.get()); + + GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); +} + + +TEST_F(VectorLocalOps, SubScaleSameAsLocal) +{ + init_vectors(); + + x->sub_scaled(alpha.get(), y.get()); + dx->sub_scaled(dalpha.get(), dy.get()); + + GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); +} + + +} // namespace From 9df6a962566dfcada64b30a56fc8f5f0df6b7365 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 25 Feb 2022 17:13:18 +0100 Subject: [PATCH 25/38] adds constructor from local vector --- core/distributed/vector.cpp | 31 ++++++++++++++++++++++ include/ginkgo/core/distributed/vector.hpp | 27 +++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 04a11d3d758..8f7b9ec4f3f 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -84,6 +84,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( } // namespace detail +dim<2> compute_global_size(mpi::communicator comm, dim<2> local_size) +{ + size_type num_global_rows = local_size[0]; + comm.all_reduce(&num_global_rows, 1, MPI_SUM); + return {num_global_rows, local_size[1]}; +} + + template void Vector::apply_impl(const LinOp* b, LinOp* x) const { @@ -105,6 +113,7 @@ Vector::Vector(std::shared_ptr exec, : Vector(exec, comm, global_size, local_size, local_size[1]) {} + template Vector::Vector(std::shared_ptr exec, mpi::communicator comm, dim<2> global_size, @@ -112,6 +121,28 @@ Vector::Vector(std::shared_ptr exec, : EnableLinOp>{exec, global_size}, DistributedBase{comm}, local_{exec, local_size, stride} +{ + GKO_ASSERT_EQUAL_COLS(global_size, local_size); +} + +template +Vector::Vector(std::shared_ptr exec, + mpi::communicator comm, dim<2> global_size, + local_vector_type* local_vector) + : EnableLinOp>{exec, global_size}, + DistributedBase{comm}, + local_{exec} +{ + local_vector->move_to(&local_); +} + + +template +Vector::Vector(std::shared_ptr exec, + mpi::communicator comm, + local_vector_type* local_vector) + : Vector(std::move(exec), comm, + compute_global_size(comm, local_vector->get_size()), local_vector) {} diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index 48a48642042..bbdcf557e40 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -310,6 +310,7 @@ class Vector protected: /** * Creates an empty distributed vector with a specified size + * * @param exec Executor associated with vector * @param comm Communicator associated with vector, the default is * MPI_COMM_WORLD @@ -323,6 +324,7 @@ class Vector /** * Creates an empty distributed vector with a specified size + * * @param exec Executor associated with vector * @param comm Communicator associated with vector, the default is * MPI_COMM_WORLD @@ -335,6 +337,31 @@ class Vector mpi::communicator comm = mpi::communicator(MPI_COMM_WORLD), dim<2> global_size = {}, dim<2> local_size = {}); + /** + * Creates a distributed vector from local vectors with a specified size. + * + * @param exec Executor associated with this vector + * @param comm Communicator associated with this vector + * @param global_size The global size of the vector + * @param local_vector The underlying local vector, the date will be moved + * into this + */ + Vector(std::shared_ptr exec, mpi::communicator comm, + dim<2> global_size, local_vector_type* local_vector); + + /** + * Creates a distributed vector from local vectors. The global size will + * be deduced from the local sizes, which will incur a collective + * communication. + * + * @param exec Executor associated with this vector + * @param comm Communicator associated with this vector + * @param local_vector The underlying local vector, the date will be moved + * into this + */ + Vector(std::shared_ptr exec, mpi::communicator comm, + local_vector_type* local_vector); + void resize(dim<2> global_size, dim<2> local_size); void apply_impl(const LinOp*, LinOp*) const override; From d0da2ace8d062347fc1a2db34d2f57206cd59b5e Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 25 Feb 2022 13:31:54 +0100 Subject: [PATCH 26/38] adds create_real_view to distributed vector --- core/distributed/vector.cpp | 30 ++++++++++++++++++++++ core/test/mpi/distributed/vector.cpp | 22 ++++++++++++++++ include/ginkgo/core/distributed/vector.hpp | 14 ++++++++++ test/mpi/distributed/vector.cpp | 16 ++++++++++++ 4 files changed, 82 insertions(+) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 8f7b9ec4f3f..8fe812b7f13 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -412,6 +412,36 @@ void Vector::resize(dim<2> global_size, dim<2> local_size) } +template +std::unique_ptr::real_type> +Vector::create_real_view() +{ + const auto num_global_rows = this->get_size()[0]; + const auto num_cols = + is_complex() ? 2 * this->get_size()[1] : this->get_size()[1]; + + return Vector>::create( + this->get_executor(), this->get_communicator(), + dim<2>{num_global_rows, num_cols}, local_.create_real_view().get()); +} + + +template +std::unique_ptr::real_type> +Vector::create_real_view() const +{ + const auto num_global_rows = this->get_size()[0]; + const auto num_cols = + is_complex() ? 2 * this->get_size()[1] : this->get_size()[1]; + + return Vector>::create( + this->get_executor(), this->get_communicator(), + dim<2>{num_global_rows, num_cols}, + const_cast( + local_.create_real_view().get())); +} + + #define GKO_DECLARE_DISTRIBUTED_VECTOR(ValueType) class Vector GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DISTRIBUTED_VECTOR); diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp index e545e2d7025..7243e0ad10d 100644 --- a/core/test/mpi/distributed/vector.cpp +++ b/core/test/mpi/distributed/vector.cpp @@ -749,4 +749,26 @@ TYPED_TEST(VectorLocalOp, SubScaleSameAsLocal) } +TYPED_TEST(VectorLocalOp, CreateRealViewSameAsLocal) +{ + using value_type = typename TestFixture::value_type; + using real_type = gko::remove_complex; + + auto real_view = this->vec_a->create_real_view(); + auto local_real_view = this->vec_a->get_const_local()->create_real_view(); + + if (gko::is_complex()) { + EXPECT_EQ(real_view->get_size()[0], this->vec_a->get_size()[0]); + EXPECT_EQ(real_view->get_size()[1], 2 * this->vec_a->get_size()[1]); + EXPECT_EQ(real_view->get_const_local()->get_stride(), + 2 * this->vec_a->get_const_local()->get_stride()); + GKO_ASSERT_MTX_NEAR(real_view->get_const_local(), local_real_view, 0.); + } else { + EXPECT_EQ(real_view->get_size()[0], this->vec_a->get_size()[0]); + EXPECT_EQ(real_view->get_size()[1], this->vec_a->get_size()[1]); + GKO_ASSERT_MTX_NEAR(real_view->get_const_local(), local_real_view, 0.); + } +} + + } // namespace diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index bbdcf557e40..fa2f2e42ac6 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -307,6 +307,20 @@ class Vector */ local_vector_type* get_local(); + /** + * Create a real view of the (potentially) complex original multi-vector. + * If the original vector is real, nothing changes. If the original vector + * is complex, the result is created by viewing the complex vector with as + * real with a reinterpret_cast with twice the number of columns and + * double the stride. + */ + std::unique_ptr create_real_view(); + + /** + * @copydoc create_real_view() + */ + std::unique_ptr create_real_view() const; + protected: /** * Creates an empty distributed vector with a specified size diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/distributed/vector.cpp index 04ed646ac29..ded97c9d6a0 100644 --- a/test/mpi/distributed/vector.cpp +++ b/test/mpi/distributed/vector.cpp @@ -542,4 +542,20 @@ TEST_F(VectorLocalOps, SubScaleSameAsLocal) } +TEST_F(VectorLocalOps, CreateRealViewSameAsLocal) +{ + using real_type = gko::remove_complex; + init_vectors(); + + auto rv = x->create_real_view(); + auto drv = dx->create_real_view(); + + EXPECT_EQ(rv->get_size()[0], drv->get_size()[0]); + EXPECT_EQ(rv->get_size()[1], drv->get_size()[1]); + EXPECT_EQ(rv->get_const_local()->get_stride(), + drv->get_const_local()->get_stride()); + GKO_ASSERT_MTX_NEAR(rv->get_const_local(), drv->get_const_local(), 0.); +} + + } // namespace From d57202b8ae33e26245ccff238005fd547f06a2f9 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 25 Feb 2022 14:54:10 +0100 Subject: [PATCH 27/38] move read_distributed impl to .cpp --- core/distributed/vector.cpp | 49 ++++++++++++++++++++++ include/ginkgo/core/distributed/vector.hpp | 33 ++------------- 2 files changed, 53 insertions(+), 29 deletions(-) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 8fe812b7f13..9ff360a67b1 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -146,6 +146,41 @@ Vector::Vector(std::shared_ptr exec, {} +template +template +void Vector::read_distributed( + const device_matrix_data& data, + const Partition* partition) +{ + auto exec = this->get_executor(); + auto global_cols = data.get_size()[1]; + this->resize( + dim<2>(partition->get_size(), global_cols), + dim<2>(partition->get_part_size(this->get_communicator().rank()), + global_cols)); + + auto rank = this->get_communicator().rank(); + this->get_local()->fill(zero()); + exec->run(vector::make_build_local( + data, make_temporary_clone(exec, partition).get(), rank, + this->get_local())); +} + + +template +template +void Vector::read_distributed( + const matrix_data& data, + const Partition* partition) + +{ + this->read_distributed( + device_matrix_data::create_from_host( + this->get_executor(), data), + std::move(partition)); +} + + template void Vector::fill(const ValueType value) { @@ -446,5 +481,19 @@ Vector::create_real_view() const GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DISTRIBUTED_VECTOR); +#define GKO_DECLARE_DISTRIBUTED_VECTOR_READ_DISTRIBUTED( \ + ValueType, LocalIndexType, GlobalIndexType) \ + void Vector::read_distributed( \ + const device_matrix_data& data, \ + const Partition* partition); \ + template void \ + Vector::read_distributed( \ + const matrix_data& data, \ + const Partition* partition) + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_READ_DISTRIBUTED); + + } // namespace distributed } // namespace gko diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index fa2f2e42ac6..16c17383671 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -50,18 +50,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { namespace distributed { -namespace detail { - - -template -void read_distributed_impl( - const device_matrix_data& data, - const Partition* partition, - Vector* result); - - -} - /** * Vector is a format which explicitly stores (multiple) distributed column @@ -115,7 +103,8 @@ class Vector * partition. * * The number of rows of the matrix data is ignored, only its number of - * columns is relevant. The number of rows is inferred from the partition. + * columns is relevant. Both the number of local and global rows are + * inferred from the row partition. * * @note The matrix data can contain entries for rows other than those owned * by the process. Entries for those rows are discarded. @@ -126,15 +115,7 @@ class Vector template void read_distributed( const device_matrix_data& data, - const Partition* partition) - { - auto global_cols = data.get_size()[1]; - this->resize( - dim<2>(partition->get_size(), global_cols), - dim<2>(partition->get_part_size(this->get_communicator().rank()), - global_cols)); - detail::read_distributed_impl(data, partition, this); - } + const Partition* partition); /** * Reads a vector from the matrix_data structure and a global row @@ -148,13 +129,7 @@ class Vector template void read_distributed( const matrix_data& data, - const Partition* partition) - { - this->read_distributed( - device_matrix_data::create_from_host( - this->get_executor(), data), - std::move(partition)); - } + const Partition* partition); void convert_to(Vector>* result) const override; From 7a882972424f2c8b046279a485e72e3d10ef73aa Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 25 Feb 2022 17:44:41 +0100 Subject: [PATCH 28/38] use unique ptr for local vector parameter --- core/distributed/vector.cpp | 18 +++++++++++------- include/ginkgo/core/distributed/vector.hpp | 4 ++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 9ff360a67b1..b8855eac835 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -141,9 +141,13 @@ template Vector::Vector(std::shared_ptr exec, mpi::communicator comm, local_vector_type* local_vector) - : Vector(std::move(exec), comm, - compute_global_size(comm, local_vector->get_size()), local_vector) -{} + : EnableLinOp>{exec, {}}, + DistributedBase{comm}, + local_{exec} +{ + this->set_size(compute_global_size(comm, local_vector->get_size())); + local_vector->move_to(&local_); +} template @@ -455,9 +459,9 @@ Vector::create_real_view() const auto num_cols = is_complex() ? 2 * this->get_size()[1] : this->get_size()[1]; - return Vector>::create( - this->get_executor(), this->get_communicator(), - dim<2>{num_global_rows, num_cols}, local_.create_real_view().get()); + return real_type ::create(this->get_executor(), this->get_communicator(), + dim<2>{num_global_rows, num_cols}, + local_.create_real_view().get()); } @@ -469,7 +473,7 @@ Vector::create_real_view() const const auto num_cols = is_complex() ? 2 * this->get_size()[1] : this->get_size()[1]; - return Vector>::create( + return real_type ::create( this->get_executor(), this->get_communicator(), dim<2>{num_global_rows, num_cols}, const_cast( diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index 16c17383671..ff047d64b15 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -332,7 +332,7 @@ class Vector * @param exec Executor associated with this vector * @param comm Communicator associated with this vector * @param global_size The global size of the vector - * @param local_vector The underlying local vector, the date will be moved + * @param local_vector The underlying local vector, the data will be moved * into this */ Vector(std::shared_ptr exec, mpi::communicator comm, @@ -345,7 +345,7 @@ class Vector * * @param exec Executor associated with this vector * @param comm Communicator associated with this vector - * @param local_vector The underlying local vector, the date will be moved + * @param local_vector The underlying local vector, the data will be moved * into this */ Vector(std::shared_ptr exec, mpi::communicator comm, From 364da2274215fa3e11cdce248e13497375daeec5 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 25 Feb 2022 17:45:54 +0100 Subject: [PATCH 29/38] add constructor tests --- core/test/mpi/distributed/vector.cpp | 54 +++++++++++++++++++----- test/mpi/distributed/vector.cpp | 63 ++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 11 deletions(-) diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp index 7243e0ad10d..addf60c92b3 100644 --- a/core/test/mpi/distributed/vector.cpp +++ b/core/test/mpi/distributed/vector.cpp @@ -47,7 +47,7 @@ namespace { template -class VectorRead : public ::testing::Test { +class VectorCreation : public ::testing::Test { public: using value_type = typename std::tuple_element<0, decltype( @@ -64,14 +64,14 @@ class VectorRead : public ::testing::Test { using d_md_type = gko::device_matrix_data; using dist_vec_type = gko::distributed::Vector; using dense_type = gko::matrix::Dense; - using nz_type = gko::matrix_data_entry; - VectorRead() + VectorCreation() : ref(gko::ReferenceExecutor::create()), comm(MPI_COMM_WORLD), part(gko::share(part_type::build_from_contiguous( this->ref, {ref, {0, 2, 4, 6}}))), - md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}} + md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}, + md_localized{{{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}, {{8, 9}, {10, 11}}} {} void SetUp() override { ASSERT_EQ(this->comm.size(), 3); } @@ -81,13 +81,14 @@ class VectorRead : public ::testing::Test { std::shared_ptr part; md_type md; + md_type md_localized[3]; }; -TYPED_TEST_SUITE(VectorRead, gko::test::ValueLocalGlobalIndexTypes); +TYPED_TEST_SUITE(VectorCreation, gko::test::ValueLocalGlobalIndexTypes); -TYPED_TEST(VectorRead, CanReadGlobalMatrixData) +TYPED_TEST(VectorCreation, CanReadGlobalMatrixData) { using part_type = typename TestFixture::part_type; using value_type = typename TestFixture::value_type; @@ -108,7 +109,7 @@ TYPED_TEST(VectorRead, CanReadGlobalMatrixData) } -TYPED_TEST(VectorRead, CanReadGlobalMatrixDataSomeEmpty) +TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataSomeEmpty) { using part_type = typename TestFixture::part_type; using value_type = typename TestFixture::value_type; @@ -134,7 +135,7 @@ TYPED_TEST(VectorRead, CanReadGlobalMatrixDataSomeEmpty) } -TYPED_TEST(VectorRead, CanReadGlobalDeviceMatrixData) +TYPED_TEST(VectorCreation, CanReadGlobalDeviceMatrixData) { using it = typename TestFixture::global_index_type; using d_md_type = typename TestFixture::d_md_type; @@ -164,7 +165,7 @@ TYPED_TEST(VectorRead, CanReadGlobalDeviceMatrixData) } -TYPED_TEST(VectorRead, CanReadGlobalMatrixDataScattered) +TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataScattered) { using md_type = typename TestFixture::md_type; using part_type = typename TestFixture::part_type; @@ -189,7 +190,7 @@ TYPED_TEST(VectorRead, CanReadGlobalMatrixDataScattered) } -TYPED_TEST(VectorRead, CanReadLocalMatrixData) +TYPED_TEST(VectorCreation, CanReadLocalMatrixData) { using md_type = typename TestFixture::md_type; using part_type = typename TestFixture::part_type; @@ -217,7 +218,7 @@ TYPED_TEST(VectorRead, CanReadLocalMatrixData) } -TYPED_TEST(VectorRead, CanReadLocalMatrixDataSomeEmpty) +TYPED_TEST(VectorCreation, CanReadLocalMatrixDataSomeEmpty) { using md_type = typename TestFixture::md_type; using part_type = typename TestFixture::part_type; @@ -256,6 +257,37 @@ TYPED_TEST(VectorRead, CanReadLocalMatrixDataSomeEmpty) } +TYPED_TEST(VectorCreation, CanCreateFromLocalVectorAndSize) +{ + using dist_vec_type = typename TestFixture::dist_vec_type; + using dense_type = typename TestFixture::dense_type; + auto local_vec = dense_type::create(this->ref); + local_vec->read(this->md_localized[this->comm.rank()]); + auto clone_local_vec = gko::clone(local_vec); + + auto vec = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{6, 2}, + local_vec.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec, gko::dim<2>(6, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local(), clone_local_vec, 0); +} + + +TYPED_TEST(VectorCreation, CanCreateFromLocalVectorWithoutSize) +{ + using dist_vec_type = typename TestFixture::dist_vec_type; + using dense_type = typename TestFixture::dense_type; + auto local_vec = dense_type::create(this->ref); + local_vec->read(this->md_localized[this->comm.rank()]); + auto clone_local_vec = gko::clone(local_vec); + + auto vec = dist_vec_type::create(this->ref, this->comm, local_vec.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec, gko::dim<2>(6, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local(), clone_local_vec, 0); +} + + template class VectorReductions : public ::testing::Test { public: diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/distributed/vector.cpp index ded97c9d6a0..6511ac1e8ba 100644 --- a/test/mpi/distributed/vector.cpp +++ b/test/mpi/distributed/vector.cpp @@ -86,6 +86,69 @@ class HostToDeviceLogger : public gko::log::Logger { }; +class VectorCreation : public ::testing::Test { +public: + using value_type = float; + using dist_vec_type = gko::distributed::Vector; + using dense_type = dist_vec_type::local_vector_type; + + VectorCreation() + : ref(gko::ReferenceExecutor::create()), + exec(), + comm(MPI_COMM_WORLD), + local_size{4, 11}, + size{local_size[1] * comm.size(), 11}, + engine(42) + { + init_executor(ref, exec, comm); + } + + void SetUp() override { ASSERT_GT(comm.size(), 0); } + + std::shared_ptr ref; + std::shared_ptr exec; + + gko::mpi::communicator comm; + + gko::dim<2> local_size; + gko::dim<2> size; + + std::default_random_engine engine; +}; + + +TEST_F(VectorCreation, CanCreateFromLocalVectorAndSize) +{ + auto local_vec = gko::test::generate_random_matrix( + local_size[0], local_size[1], + std::uniform_int_distribution(0, local_size[1]), + std::normal_distribution(), engine, ref); + auto dlocal_vec = gko::clone(exec, local_vec); + + auto vec = dist_vec_type::create(ref, comm, size, local_vec.get()); + auto dvec = dist_vec_type::create(exec, comm, size, local_vec.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec, dvec); + GKO_ASSERT_MTX_NEAR(vec->get_local(), dvec->get_local(), 0); +} + + +TEST_F(VectorCreation, CanCreateFromLocalVectorWithoutSize) +{ + auto local_vec = gko::test::generate_random_matrix( + local_size[0], local_size[1], + std::uniform_int_distribution(0, local_size[1]), + std::normal_distribution(), engine, ref); + auto dlocal_vec = gko::clone(exec, local_vec); + + auto vec = dist_vec_type::create(ref, comm, local_vec.get()); + auto dvec = dist_vec_type::create(exec, comm, local_vec.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec, dvec); + GKO_ASSERT_MTX_NEAR(vec->get_local(), dvec->get_local(), 0); +} + + class VectorReductions : public ::testing::Test { public: using value_type = float; From 3ac773ebe164a18ed75a66efa424fbfb100b2622 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 28 Feb 2022 10:22:54 +0100 Subject: [PATCH 30/38] review updates - formating - test updates - documentation Co-authored-by: Tobias Ribizel Co-authored-by: Pratik Nayak --- core/test/mpi/distributed/vector.cpp | 45 ++++++++++++++++-------- include/ginkgo/core/distributed/base.hpp | 4 ++- test/mpi/distributed/vector.cpp | 10 +++--- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp index addf60c92b3..d5ae5ee80c5 100644 --- a/core/test/mpi/distributed/vector.cpp +++ b/core/test/mpi/distributed/vector.cpp @@ -74,7 +74,7 @@ class VectorCreation : public ::testing::Test { md_localized{{{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}, {{8, 9}, {10, 11}}} {} - void SetUp() override { ASSERT_EQ(this->comm.size(), 3); } + void SetUp() override { ASSERT_GE(this->comm.size(), 3); } std::shared_ptr ref; gko::mpi::communicator comm; @@ -315,7 +315,7 @@ class VectorReductions : public ::testing::Test { vec_b->read_distributed(md_b, part.get()); } - void SetUp() override { ASSERT_EQ(this->comm.size(), 3); } + void SetUp() override { ASSERT_GE(this->comm.size(), 3); } std::shared_ptr ref; gko::mpi::communicator comm; @@ -352,10 +352,12 @@ TYPED_TEST(VectorReductions, ComputesConjDot) using dist_vec_type = typename TestFixture::dist_vec_type; auto md_a = gko::test::generate_random_matrix_data( 6, 2, std::uniform_int_distribution(2, 2), - std::normal_distribution(0, 1), std::ranlux48{42}); + std::normal_distribution(0, 1), + std::default_random_engine{42}); auto md_b = gko::test::generate_random_matrix_data( 6, 2, std::uniform_int_distribution(2, 2), - std::normal_distribution(0, 1), std::ranlux48{42}); + std::normal_distribution(0, 1), + std::default_random_engine{42}); auto dist_vec_a = dist_vec_type::create(this->ref, this->comm); auto dist_vec_b = dist_vec_type::create(this->ref, this->comm); auto dense_vec_a = dense_type::create(this->ref); @@ -374,7 +376,7 @@ TYPED_TEST(VectorReductions, ComputesConjDot) } -TYPED_TEST(VectorReductions, ComputesNorm) +TYPED_TEST(VectorReductions, ComputesNorm2) { using dense_type = typename TestFixture::dense_type; using value_type = typename TestFixture::value_type; @@ -390,6 +392,20 @@ TYPED_TEST(VectorReductions, ComputesNorm) } +TYPED_TEST(VectorReductions, ComputesNorm1) +{ + using dense_type = typename TestFixture::dense_type; + using value_type = typename TestFixture::value_type; + auto res = dense_type::absolute_type::create(this->ref, gko::dim<2>{1, 2}); + auto ref_res = gko::initialize( + {{30, 36}}, this->ref); + + this->vec_a->compute_norm1(res.get()); + + GKO_ASSERT_MTX_NEAR(res, ref_res, r::value); +} + + template class VectorLocalOp : public ::testing::Test { public: @@ -418,7 +434,7 @@ class VectorLocalOp : public ::testing::Test { vec_b->read_distributed(md_b, part.get()); } - void SetUp() override { ASSERT_EQ(this->comm.size(), 3); } + void SetUp() override { ASSERT_GE(this->comm.size(), 3); } auto generate_local_and_global_pair(gko::dim<2> local_size) { @@ -455,11 +471,11 @@ TYPED_TEST(VectorLocalOp, ApplyNotSupported) { using dist_vec_type = typename TestFixture::dist_vec_type; auto a = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, - gko::dim<2>{}); + gko::dim<2>{2, 2}); auto b = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, - gko::dim<2>{}); + gko::dim<2>{2, 2}); auto c = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, - gko::dim<2>{}); + gko::dim<2>{2, 2}); ASSERT_THROW(a->apply(b.get(), c.get()), gko::NotSupported); } @@ -469,15 +485,16 @@ TYPED_TEST(VectorLocalOp, AdvancedApplyNotSupported) { using dist_vec_type = typename TestFixture::dist_vec_type; auto a = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, - gko::dim<2>{}); + gko::dim<2>{2, 2}); auto b = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{1, 1}, - gko::dim<2>{}); + gko::dim<2>{1, 1}); auto c = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, - gko::dim<2>{}); + gko::dim<2>{2, 2}); auto d = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{1, 1}, - gko::dim<2>{}); + gko::dim<2>{1, 1}); auto e = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, - gko::dim<2>{}); + gko::dim<2>{2, 2}); + ASSERT_THROW(a->apply(b.get(), c.get(), d.get(), e.get()), gko::NotSupported); } diff --git a/include/ginkgo/core/distributed/base.hpp b/include/ginkgo/core/distributed/base.hpp index 4fc0c816944..2be0e917163 100644 --- a/include/ginkgo/core/distributed/base.hpp +++ b/include/ginkgo/core/distributed/base.hpp @@ -50,7 +50,9 @@ namespace distributed { /** * A base class for distributed objects. * - * This class gives access to the used mpi::communicator object. + * This class stores and gives access to the used mpi::communicator object. + * + * @note The communicator is not changed on assignment. * * @ingroup distributed */ diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/distributed/vector.cpp index 6511ac1e8ba..b9da3a24545 100644 --- a/test/mpi/distributed/vector.cpp +++ b/test/mpi/distributed/vector.cpp @@ -204,14 +204,14 @@ class VectorReductions : public ::testing::Test { dy = gko::clone(exec, y); } - void SetUp() override { ASSERT_EQ(comm.size(), 3); } + void SetUp() override { ASSERT_GT(comm.size(), 0); } void init_result() { res = dense_type::create(ref, gko::dim<2>{1, size[1]}); dres = dense_type::create(exec, gko::dim<2>{1, size[1]}); - res->fill(0.); - dres->fill(0.); + res->fill(0.0); + dres->fill(0.0); } std::shared_ptr ref; @@ -369,7 +369,7 @@ class VectorLocalOps : public ::testing::Test { part = part_type::build_from_mapping(ref, mapping, num_parts); } - void SetUp() override { ASSERT_EQ(comm.size(), 3); } + void SetUp() override { ASSERT_GT(comm.size(), 0); } template void generate_vector_pair(std::unique_ptr& host, @@ -617,7 +617,7 @@ TEST_F(VectorLocalOps, CreateRealViewSameAsLocal) EXPECT_EQ(rv->get_size()[1], drv->get_size()[1]); EXPECT_EQ(rv->get_const_local()->get_stride(), drv->get_const_local()->get_stride()); - GKO_ASSERT_MTX_NEAR(rv->get_const_local(), drv->get_const_local(), 0.); + GKO_ASSERT_MTX_NEAR(rv->get_const_local(), drv->get_const_local(), 0.0); } From ba75555d980fdbcbcd28d71002eec4f435347d3c Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 28 Feb 2022 15:19:25 +0100 Subject: [PATCH 31/38] make dense::move_to adhere to move semantics will be superseeded by https://github.com/ginkgo-project/ginkgo/pull/753 --- core/matrix/dense.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 354e7220c7c..b87253820c7 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -488,7 +488,11 @@ void Dense::convert_to(Dense* result) const template void Dense::move_to(Dense* result) { - this->convert_to(result); + if (this != result) { + result->values_ = std::move(this->values_); + result->stride_ = this->stride_; + result->set_size(this->get_size()); + } } From c1f2eb1b466ec37cb2a9d16ad62286f4b03b3faf Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 28 Feb 2022 15:19:48 +0100 Subject: [PATCH 32/38] add test to check create_real_view behaviour --- core/test/mpi/distributed/vector.cpp | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp index d5ae5ee80c5..39318519d8c 100644 --- a/core/test/mpi/distributed/vector.cpp +++ b/core/test/mpi/distributed/vector.cpp @@ -820,4 +820,45 @@ TYPED_TEST(VectorLocalOp, CreateRealViewSameAsLocal) } +class AllocCounter : public gko::log::Logger { +public: + void on_allocation_started(const gko::Executor* exec, + const gko::size_type& num_bytes) const override + { + count_++; + } + + int get_count() const { return count_; } + + static std::unique_ptr create( + std::shared_ptr exec) + { + return std::unique_ptr(new AllocCounter(std::move(exec))); + } + +private: + explicit AllocCounter(std::shared_ptr exec) + : gko::log::Logger(std::move(exec), + gko::log::Logger::allocation_started_mask), + count_(0) + {} + + mutable int count_; +}; + + +TYPED_TEST(VectorLocalOp, CreateRealViewIsView) +{ + using value_type = typename TestFixture::value_type; + using real_type = gko::remove_complex; + auto log = gko::share(AllocCounter::create(this->ref)); + + this->ref->add_logger(log); + auto real_view = this->vec_a->create_real_view(); + this->ref->remove_logger(log.get()); + + ASSERT_EQ(log->get_count(), 0); +} + + } // namespace From 2578a9ca46570afe32ce908f314d2f4e7d7b17a8 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 2 Mar 2022 14:10:02 +0100 Subject: [PATCH 33/38] review updates - small rename - documentation - cmake - tests Co-authored-by: Yuhsiang Tsai --- cmake/create_test.cmake | 2 +- common/unified/matrix/dense_kernels.cpp | 9 +++-- core/CMakeLists.txt | 2 +- core/base/{cache.cpp => dense_cache.cpp} | 4 +- core/device_hooks/common_kernels.inc.cpp | 2 +- core/distributed/vector.cpp | 37 ++----------------- core/matrix/dense.cpp | 2 +- core/matrix/dense_kernels.hpp | 10 ++--- core/test/base/CMakeLists.txt | 2 +- core/test/base/{cache.cpp => dense_cache.cpp} | 4 +- core/test/mpi/distributed/vector.cpp | 23 ++++++------ .../core/base/{cache.hpp => dense_cache.hpp} | 6 +-- include/ginkgo/core/distributed/base.hpp | 12 ++++++ include/ginkgo/core/distributed/vector.hpp | 2 +- include/ginkgo/ginkgo.hpp | 2 +- reference/matrix/dense_kernels.cpp | 9 +++-- reference/test/matrix/dense_kernels.cpp | 28 +++----------- test/matrix/dense_kernels.cpp | 11 ++++-- test/mpi/distributed/vector.cpp | 4 +- 19 files changed, 67 insertions(+), 104 deletions(-) rename core/base/{cache.cpp => dense_cache.cpp} (98%) rename core/test/base/{cache.cpp => dense_cache.cpp} (99%) rename include/ginkgo/core/base/{cache.hpp => dense_cache.hpp} (96%) diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index b318243da5a..baab8c510c9 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -221,7 +221,7 @@ function(ginkgo_internal_create_common_test_template test_name) elseif(${test_type} STREQUAL mpi) ginkgo_internal_add_mpi_test(${test_name}_${exec} ${test_target_name} ${common_test_ADDITIONAL_TEST_PARAMETERS}) else() - message(WARNING "Encountered unrecognized test type ${test_type} during common test creation.") + message(FATAL_ERROR "Encountered unrecognized test type ${test_type} during common test creation.") endif() endforeach() endfunction(ginkgo_internal_create_common_test_template) diff --git a/common/unified/matrix/dense_kernels.cpp b/common/unified/matrix/dense_kernels.cpp index ef105ba7993..5e35cc42898 100644 --- a/common/unified/matrix/dense_kernels.cpp +++ b/common/unified/matrix/dense_kernels.cpp @@ -381,9 +381,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template -void compute_norm2_sqr(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense>* result) +void compute_squared_norm2(std::shared_ptr exec, + const matrix::Dense* x, + matrix::Dense>* result) { run_kernel_col_reduction( exec, @@ -392,7 +392,8 @@ void compute_norm2_sqr(std::shared_ptr exec, x->get_size(), x); } -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_SQR_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL); template diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index b7760ca47fe..16156993a0a 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -4,7 +4,7 @@ add_library(ginkgo "") target_sources(ginkgo PRIVATE base/array.cpp - base/cache.cpp + base/dense_cache.cpp base/combination.cpp base/composition.cpp base/device_matrix_data.cpp diff --git a/core/base/cache.cpp b/core/base/dense_cache.cpp similarity index 98% rename from core/base/cache.cpp rename to core/base/dense_cache.cpp index 222b94ac9f8..cb426f0ee65 100644 --- a/core/base/cache.cpp +++ b/core/base/dense_cache.cpp @@ -30,9 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include - - +#include #include diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 4a861aa3356..404062445d6 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -280,7 +280,7 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL); -GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_SQR_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL); diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index b8855eac835..7e7e03cd866 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -43,7 +43,7 @@ namespace vector { namespace { -GKO_REGISTER_OPERATION(compute_norm2_sqr, dense::compute_norm2_sqr); +GKO_REGISTER_OPERATION(compute_squared_norm2, dense::compute_squared_norm2); GKO_REGISTER_OPERATION(compute_sqrt, dense::compute_sqrt); GKO_REGISTER_OPERATION(outplace_absolute_dense, dense::outplace_absolute_dense); GKO_REGISTER_OPERATION(build_local, distributed_vector::build_local); @@ -53,37 +53,6 @@ GKO_REGISTER_OPERATION(build_local, distributed_vector::build_local); } // namespace vector -namespace detail { - - -template -void read_distributed_impl( - const device_matrix_data& data, - const Partition* partition, - Vector* result) -{ - auto exec = result->get_executor(); - - auto rank = result->get_communicator().rank(); - result->get_local()->fill(zero()); - exec->run(vector::make_build_local( - data, make_temporary_clone(exec, partition).get(), rank, - result->get_local())); -} - -#define GKO_DECLARE_DISTRIBUTED_READ_DISTRIBUTED_IMPL( \ - ValueType, LocalIndexType, GlobalIndexType) \ - void read_distributed_impl( \ - const device_matrix_data& data, \ - const Partition* partition, \ - Vector* storage) -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( - GKO_DECLARE_DISTRIBUTED_READ_DISTRIBUTED_IMPL); - - -} // namespace detail - - dim<2> compute_global_size(mpi::communicator comm, dim<2> local_size) { size_type num_global_rows = local_size[0]; @@ -397,8 +366,8 @@ void Vector::compute_norm2(LinOp* result) const auto exec = this->get_executor(); const auto comm = this->get_communicator(); auto dense_res = make_temporary_clone(exec, as(result)); - exec->run(vector::make_compute_norm2_sqr(this->get_const_local(), - dense_res.get())); + exec->run(vector::make_compute_squared_norm2(this->get_const_local(), + dense_res.get())); exec->synchronize(); auto use_host_buffer = exec->get_master() != exec && !gko::mpi::is_gpu_aware(); diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index b87253820c7..2e776d89a7e 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -80,7 +80,7 @@ GKO_REGISTER_OPERATION(compute_dot, dense::compute_dot_dispatch); GKO_REGISTER_OPERATION(compute_conj_dot, dense::compute_conj_dot_dispatch); GKO_REGISTER_OPERATION(compute_norm2, dense::compute_norm2_dispatch); GKO_REGISTER_OPERATION(compute_norm1, dense::compute_norm1); -GKO_REGISTER_OPERATION(compute_norm2_sqr, dense::compute_norm2_sqr); +GKO_REGISTER_OPERATION(compute_squared_norm2, dense::compute_squared_norm2); GKO_REGISTER_OPERATION(compute_sqrt, dense::compute_sqrt); GKO_REGISTER_OPERATION(compute_max_nnz_per_row, dense::compute_max_nnz_per_row); GKO_REGISTER_OPERATION(compute_hybrid_coo_row_ptrs, diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index f8c7fcb8aa0..530b641c56c 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -151,10 +151,10 @@ namespace kernels { const device_matrix_data<_type, _prec>& data, \ matrix::Dense<_type>* output) -#define GKO_DECLARE_DENSE_COMPUTE_NORM2_SQR_KERNEL(_type) \ - void compute_norm2_sqr(std::shared_ptr exec, \ - const matrix::Dense<_type>* x, \ - matrix::Dense>* result) +#define GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL(_type) \ + void compute_squared_norm2(std::shared_ptr exec, \ + const matrix::Dense<_type>* x, \ + matrix::Dense>* result) #define GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL(_type) \ void compute_sqrt(std::shared_ptr exec, \ @@ -351,7 +351,7 @@ namespace kernels { template \ GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL(ValueType, IndexType); \ template \ - GKO_DECLARE_DENSE_COMPUTE_NORM2_SQR_KERNEL(ValueType); \ + GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL(ValueType); \ template \ GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL(ValueType); \ template \ diff --git a/core/test/base/CMakeLists.txt b/core/test/base/CMakeLists.txt index 56c983f1481..f200d0eb583 100644 --- a/core/test/base/CMakeLists.txt +++ b/core/test/base/CMakeLists.txt @@ -1,7 +1,7 @@ ginkgo_create_test(abstract_factory) ginkgo_create_test(allocator) ginkgo_create_test(array) -ginkgo_create_test(cache) +ginkgo_create_test(dense_cache) ginkgo_create_test(combination) ginkgo_create_test(composition) ginkgo_create_test(dim) diff --git a/core/test/base/cache.cpp b/core/test/base/dense_cache.cpp similarity index 99% rename from core/test/base/cache.cpp rename to core/test/base/dense_cache.cpp index 1f0a8c38a90..6097e3df8e3 100644 --- a/core/test/base/cache.cpp +++ b/core/test/base/dense_cache.cpp @@ -30,12 +30,10 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include - - #include +#include #include diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp index 39318519d8c..c4683206798 100644 --- a/core/test/mpi/distributed/vector.cpp +++ b/core/test/mpi/distributed/vector.cpp @@ -84,7 +84,6 @@ class VectorCreation : public ::testing::Test { md_type md_localized[3]; }; - TYPED_TEST_SUITE(VectorCreation, gko::test::ValueLocalGlobalIndexTypes); @@ -94,18 +93,18 @@ TYPED_TEST(VectorCreation, CanReadGlobalMatrixData) using value_type = typename TestFixture::value_type; auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); auto rank = this->comm.rank(); + I> ref_data[3] = { + {{0, 1}, {2, 3}}, + {{4, 5}, {6, 7}}, + {{8, 9}, {10, 11}}, + }; vec->read_distributed(this->md, this->part.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), gko::dim<2>(2, 2)); - I> ref_data[3] = { - {{0, 1}, {2, 3}}, - {{4, 5}, {6, 7}}, - {{8, 9}, {10, 11}}, - }; - GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], r::value); + GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); } @@ -127,7 +126,7 @@ TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataSomeEmpty) GKO_ASSERT_MTX_NEAR( vec->get_local(), l({{0., 1.}, {2., 3.}, {4., 5.}, {6., 7.}, {8., 9.}, {10., 11.}}), - r::value); + 0.0); } else { GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), gko::dim<2>(0, 2)); @@ -161,7 +160,7 @@ TYPED_TEST(VectorCreation, CanReadGlobalDeviceMatrixData) GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), gko::dim<2>(2, 2)); - GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], r::value); + GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); } @@ -186,7 +185,7 @@ TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataScattered) GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), ref_size[rank]); - GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], r::value); + GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); } @@ -214,7 +213,7 @@ TYPED_TEST(VectorCreation, CanReadLocalMatrixData) GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), gko::dim<2>(2, 2)); - GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], r::value); + GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); } @@ -249,7 +248,7 @@ TYPED_TEST(VectorCreation, CanReadLocalMatrixDataSomeEmpty) vec->get_local(), I>( {{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}), - r::value); + 0.0); } else { GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), gko::dim<2>(0, 2)); diff --git a/include/ginkgo/core/base/cache.hpp b/include/ginkgo/core/base/dense_cache.hpp similarity index 96% rename from include/ginkgo/core/base/cache.hpp rename to include/ginkgo/core/base/dense_cache.hpp index 19c56700fbe..ea2a29ddf3c 100644 --- a/include/ginkgo/core/base/cache.hpp +++ b/include/ginkgo/core/base/dense_cache.hpp @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_PUBLIC_CORE_BASE_CACHE_HPP_ -#define GKO_PUBLIC_CORE_BASE_CACHE_HPP_ +#ifndef GKO_PUBLIC_CORE_BASE_DENSE_CACHE_HPP_ +#define GKO_PUBLIC_CORE_BASE_DENSE_CACHE_HPP_ #include @@ -123,4 +123,4 @@ struct DenseCache { } // namespace gko -#endif // GKO_PUBLIC_CORE_BASE_CACHE_HPP_ +#endif // GKO_PUBLIC_CORE_BASE_DENSE_CACHE_HPP_ diff --git a/include/ginkgo/core/distributed/base.hpp b/include/ginkgo/core/distributed/base.hpp index 2be0e917163..cc3263bf7a4 100644 --- a/include/ginkgo/core/distributed/base.hpp +++ b/include/ginkgo/core/distributed/base.hpp @@ -60,11 +60,23 @@ class DistributedBase { public: virtual ~DistributedBase() = default; + /** + * Copy assignment that doesn't change the used mpi::communicator. + * @return unmodified *this + */ DistributedBase& operator=(const DistributedBase&) { return *this; } + /** + * Access the used mpi::communicator. + * @return used mpi::communicator + */ mpi::communicator get_communicator() const { return comm_; } protected: + /** + * Creates a new DistributedBase with the specified mpi::communicator. + * @param comm used mpi::communicator + */ explicit DistributedBase(mpi::communicator comm) : comm_{std::move(comm)} {} private: diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index ff047d64b15..be08f6c1677 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -40,7 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if GINKGO_BUILD_MPI -#include +#include #include #include #include diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 671cd033ece..499531d8d2c 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -39,9 +39,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#include #include #include +#include #include #include #include diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index 309fa473521..125be200089 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -413,9 +413,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void compute_norm2_sqr(std::shared_ptr exec, - const matrix::Dense* x, - matrix::Dense>* result) +void compute_squared_norm2(std::shared_ptr exec, + const matrix::Dense* x, + matrix::Dense>* result) { for (size_type j = 0; j < x->get_size()[1]; ++j) { result->at(0, j) = zero>(); @@ -427,7 +427,8 @@ void compute_norm2_sqr(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_SQR_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL); template diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index 48f9f69cfe6..1fe9ce08cc0 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -614,13 +614,14 @@ TYPED_TEST(Dense, ComputesNorm2) TYPED_TEST(Dense, ComputesNorm2Mixed) { + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; using MixedMtx = typename TestFixture::MixedMtx; using MixedT = typename MixedMtx::value_type; using MixedT_nc = gko::remove_complex; using MixedNormVector = gko::matrix::Dense; - auto mtx(gko::initialize( - {I{1.0, 0.0}, I{2.0, 3.0}, I{2.0, 4.0}}, - this->exec)); + auto mtx(gko::initialize( + {I{1.0, 0.0}, I{2.0, 3.0}, I{2.0, 4.0}}, this->exec)); auto result = MixedNormVector::create(this->exec, gko::dim<2>{1, 2}); mtx->compute_norm2(result.get()); @@ -640,7 +641,7 @@ TYPED_TEST(Dense, ComputesNorm2Squared) {I{1.0, 0.0}, I{2.0, 3.0}, I{2.0, 4.0}}, this->exec)); auto result = NormVector::create(this->exec, gko::dim<2>{1, 2}); - gko::kernels::reference::dense::compute_norm2_sqr( + gko::kernels::reference::dense::compute_squared_norm2( gko::as(this->exec), mtx.get(), result.get()); EXPECT_EQ(result->at(0, 0), T_nc{9.0}); @@ -648,25 +649,6 @@ TYPED_TEST(Dense, ComputesNorm2Squared) } -TYPED_TEST(Dense, ComputesNorm2SquaredMixed) -{ - using MixedMtx = typename TestFixture::MixedMtx; - using MixedT = typename MixedMtx::value_type; - using MixedT_nc = gko::remove_complex; - using MixedNormVector = gko::matrix::Dense; - auto mtx(gko::initialize( - {I{1.0, 0.0}, I{2.0, 3.0}, I{2.0, 4.0}}, - this->exec)); - auto result = MixedNormVector::create(this->exec, gko::dim<2>{1, 2}); - - gko::kernels::reference::dense::compute_norm2_sqr( - gko::as(this->exec), mtx.get(), result.get()); - - EXPECT_EQ(result->at(0, 0), MixedT_nc{9.0}); - EXPECT_EQ(result->at(0, 1), MixedT_nc{25.0}); -} - - TYPED_TEST(Dense, ComputesSqrt) { using Mtx = typename TestFixture::Mtx; diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp index 43178920308..6183cfcc487 100644 --- a/test/matrix/dense_kernels.cpp +++ b/test/matrix/dense_kernels.cpp @@ -1339,9 +1339,9 @@ TEST_F(Dense, ComputeNorm2SquaredIsEquivalentToRef) auto norm_expected = NormVector::create(ref, norm_size); auto dnorm = NormVector::create(exec, norm_size); - gko::kernels::reference::dense::compute_norm2_sqr(ref, x.get(), + gko::kernels::reference::dense::compute_squared_norm2(ref, x.get(), norm_expected.get()); - gko::kernels::EXEC_NAMESPACE::dense::compute_norm2_sqr(exec, dx.get(), + gko::kernels::EXEC_NAMESPACE::dense::compute_squared_norm2(exec, dx.get(), dnorm.get()); GKO_ASSERT_MTX_NEAR(dnorm, norm_expected, r::value); @@ -1350,8 +1350,11 @@ TEST_F(Dense, ComputeNorm2SquaredIsEquivalentToRef) TEST_F(Dense, ComputesSqrt) { - auto mtx(gko::initialize(I>>{{9.0, 25.0}}, ref)); - auto dmtx(gko::initialize(I>>{{9.0, 25.0}}, exec)); + auto mtx = gko::test::generate_random_matrix( + 1, 7, std::uniform_int_distribution(7, 7), + std::uniform_real_distribution>(0, 10), + rand_engine, ref); + auto dmtx = gko::clone(exec, mtx); gko::kernels::reference::dense::compute_sqrt(ref, mtx.get()); gko::kernels::EXEC_NAMESPACE::dense::compute_sqrt(exec, dmtx.get()); diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/distributed/vector.cpp index b9da3a24545..dc190f142a7 100644 --- a/test/mpi/distributed/vector.cpp +++ b/test/mpi/distributed/vector.cpp @@ -126,7 +126,7 @@ TEST_F(VectorCreation, CanCreateFromLocalVectorAndSize) auto dlocal_vec = gko::clone(exec, local_vec); auto vec = dist_vec_type::create(ref, comm, size, local_vec.get()); - auto dvec = dist_vec_type::create(exec, comm, size, local_vec.get()); + auto dvec = dist_vec_type::create(exec, comm, size, dlocal_vec.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec, dvec); GKO_ASSERT_MTX_NEAR(vec->get_local(), dvec->get_local(), 0); @@ -142,7 +142,7 @@ TEST_F(VectorCreation, CanCreateFromLocalVectorWithoutSize) auto dlocal_vec = gko::clone(exec, local_vec); auto vec = dist_vec_type::create(ref, comm, local_vec.get()); - auto dvec = dist_vec_type::create(exec, comm, local_vec.get()); + auto dvec = dist_vec_type::create(exec, comm, dlocal_vec.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec, dvec); GKO_ASSERT_MTX_NEAR(vec->get_local(), dvec->get_local(), 0); From 1fecf6250747772a9c49aca7e57c9b0da8e6f5a4 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 2 Mar 2022 17:03:29 +0100 Subject: [PATCH 34/38] merge reference and common mpi tests Co-authored-by: Yuhsiang Tsai Co-authored-by: Tobias Ribizel --- cmake/create_test.cmake | 15 +- core/test/mpi/CMakeLists.txt | 1 - core/test/mpi/distributed/CMakeLists.txt | 1 - core/test/mpi/distributed/vector.cpp | 863 ----------------------- test/base/device_matrix_data_kernels.cpp | 4 +- test/mpi/distributed/CMakeLists.txt | 2 +- test/mpi/distributed/vector.cpp | 771 +++++++++++++------- 7 files changed, 549 insertions(+), 1108 deletions(-) delete mode 100644 core/test/mpi/distributed/CMakeLists.txt delete mode 100644 core/test/mpi/distributed/vector.cpp diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index baab8c510c9..0cafb0c0f35 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -241,7 +241,20 @@ function(ginkgo_create_common_and_reference_test test_name) add_executable(${test_target_name} ${test_name}.cpp) target_compile_features(${test_target_name} PUBLIC cxx_std_14) target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) - target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=ReferenceExecutor EXEC_NAMESPACE=reference) + target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=ReferenceExecutor EXEC_NAMESPACE=reference GKO_COMPILING_REFERENCE) target_link_libraries(${test_target_name} PRIVATE ${ARGN}) ginkgo_internal_add_test(${test_name}_reference ${test_target_name}) endfunction() + + +function(ginkgo_create_common_and_reference_mpi_test test_name num_mpi_procs) + ginkgo_create_common_mpi_test(${test_name} ${num_mpi_procs}) + ginkgo_build_test_name(${test_name} test_target_name) + set(test_target_name ${test_target_name}_reference) + add_executable(${test_target_name} ${test_name}.cpp) + target_compile_features(${test_target_name} PUBLIC cxx_std_14) + target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) + target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=ReferenceExecutor EXEC_NAMESPACE=reference GKO_COMPILING_REFERENCE) + target_link_libraries(${test_target_name} PRIVATE ${ARGN}) + ginkgo_internal_add_mpi_test(${test_name}_reference ${test_target_name} ${num_mpi_procs}) +endfunction() diff --git a/core/test/mpi/CMakeLists.txt b/core/test/mpi/CMakeLists.txt index eb2c9192ebc..1ad6a5575b2 100644 --- a/core/test/mpi/CMakeLists.txt +++ b/core/test/mpi/CMakeLists.txt @@ -1,2 +1 @@ add_subdirectory(base) -add_subdirectory(distributed) diff --git a/core/test/mpi/distributed/CMakeLists.txt b/core/test/mpi/distributed/CMakeLists.txt deleted file mode 100644 index d48945a104a..00000000000 --- a/core/test/mpi/distributed/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -ginkgo_create_mpi_test(vector 3) diff --git a/core/test/mpi/distributed/vector.cpp b/core/test/mpi/distributed/vector.cpp deleted file mode 100644 index c4683206798..00000000000 --- a/core/test/mpi/distributed/vector.cpp +++ /dev/null @@ -1,863 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2022, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -#include -#include - - -#include "core/test/utils.hpp" - - -namespace { - - -template -class VectorCreation : public ::testing::Test { -public: - using value_type = - typename std::tuple_element<0, decltype( - ValueLocalGlobalIndexType())>::type; - using local_index_type = - typename std::tuple_element<1, decltype( - ValueLocalGlobalIndexType())>::type; - using global_index_type = - typename std::tuple_element<2, decltype( - ValueLocalGlobalIndexType())>::type; - using part_type = - gko::distributed::Partition; - using md_type = gko::matrix_data; - using d_md_type = gko::device_matrix_data; - using dist_vec_type = gko::distributed::Vector; - using dense_type = gko::matrix::Dense; - - VectorCreation() - : ref(gko::ReferenceExecutor::create()), - comm(MPI_COMM_WORLD), - part(gko::share(part_type::build_from_contiguous( - this->ref, {ref, {0, 2, 4, 6}}))), - md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}, - md_localized{{{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}, {{8, 9}, {10, 11}}} - {} - - void SetUp() override { ASSERT_GE(this->comm.size(), 3); } - - std::shared_ptr ref; - gko::mpi::communicator comm; - std::shared_ptr part; - - md_type md; - md_type md_localized[3]; -}; - -TYPED_TEST_SUITE(VectorCreation, gko::test::ValueLocalGlobalIndexTypes); - - -TYPED_TEST(VectorCreation, CanReadGlobalMatrixData) -{ - using part_type = typename TestFixture::part_type; - using value_type = typename TestFixture::value_type; - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); - auto rank = this->comm.rank(); - I> ref_data[3] = { - {{0, 1}, {2, 3}}, - {{4, 5}, {6, 7}}, - {{8, 9}, {10, 11}}, - }; - - vec->read_distributed(this->md, this->part.get()); - - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), - gko::dim<2>(2, 2)); - GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); -} - - -TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataSomeEmpty) -{ - using part_type = typename TestFixture::part_type; - using value_type = typename TestFixture::value_type; - auto part = gko::share( - part_type::build_from_contiguous(this->ref, {this->ref, {0, 0, 6, 6}})); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); - auto rank = this->comm.rank(); - - vec->read_distributed(this->md, part.get()); - - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); - if (rank == 1) { - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), - gko::dim<2>(6, 2)); - GKO_ASSERT_MTX_NEAR( - vec->get_local(), - l({{0., 1.}, {2., 3.}, {4., 5.}, {6., 7.}, {8., 9.}, {10., 11.}}), - 0.0); - } else { - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), - gko::dim<2>(0, 2)); - } -} - - -TYPED_TEST(VectorCreation, CanReadGlobalDeviceMatrixData) -{ - using it = typename TestFixture::global_index_type; - using d_md_type = typename TestFixture::d_md_type; - using part_type = typename TestFixture::part_type; - using vt = typename TestFixture::value_type; - d_md_type md{ - this->ref, gko::dim<2>{6, 2}, - gko::Array{this->ref, I{0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5}}, - gko::Array{this->ref, I{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}}, - gko::Array{this->ref, I{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}}}; - auto part = gko::share( - part_type::build_from_contiguous(this->ref, {this->ref, {0, 2, 4, 6}})); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); - auto rank = this->comm.rank(); - I> ref_data[3] = { - {{0, 1}, {2, 3}}, - {{4, 5}, {6, 7}}, - {{8, 9}, {10, 11}}, - }; - - vec->read_distributed(md, part.get()); - - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), - gko::dim<2>(2, 2)); - GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); -} - - -TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataScattered) -{ - using md_type = typename TestFixture::md_type; - using part_type = typename TestFixture::part_type; - using value_type = typename TestFixture::value_type; - md_type md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}; - auto part = gko::share(part_type::build_from_mapping( - this->ref, {this->ref, {0, 1, 2, 0, 2, 0}}, 3)); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); - auto rank = this->comm.rank(); - gko::dim<2> ref_size[3] = {{3, 2}, {1, 2}, {2, 2}}; - I> ref_data[3] = { - {{0, 1}, {6, 7}, {10, 11}}, - {{2, 3}}, - {{4, 5}, {8, 9}}, - }; - - vec->read_distributed(md, part.get()); - - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), ref_size[rank]); - GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); -} - - -TYPED_TEST(VectorCreation, CanReadLocalMatrixData) -{ - using md_type = typename TestFixture::md_type; - using part_type = typename TestFixture::part_type; - using value_type = typename TestFixture::value_type; - md_type md[3] = { - {gko::dim<2>{6, 2}, {{0, 0, 0}, {0, 1, 1}, {1, 0, 2}, {1, 1, 3}}}, - {gko::dim<2>{6, 2}, {{2, 0, 4}, {2, 1, 5}, {3, 0, 6}, {3, 1, 7}}}, - {gko::dim<2>{6, 2}, {{4, 0, 8}, {4, 1, 9}, {5, 0, 10}, {5, 1, 11}}}}; - auto part = gko::share( - part_type::build_from_contiguous(this->ref, {this->ref, {0, 2, 4, 6}})); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); - auto rank = this->comm.rank(); - I> ref_data[3] = { - {{0, 1}, {2, 3}}, - {{4, 5}, {6, 7}}, - {{8, 9}, {10, 11}}, - }; - - vec->read_distributed(md[rank], part.get()); - - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), - gko::dim<2>(2, 2)); - GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); -} - - -TYPED_TEST(VectorCreation, CanReadLocalMatrixDataSomeEmpty) -{ - using md_type = typename TestFixture::md_type; - using part_type = typename TestFixture::part_type; - using value_type = typename TestFixture::value_type; - md_type md[3] = {{gko::dim<2>{6, 2}, {}}, - {gko::dim<2>{6, 2}, - // clang-format off - {{0, 0, 0}, {0, 1, 1}, - {1, 0, 2}, {1, 1, 3}, - {2, 0, 4}, {2, 1, 5}, - {3, 0, 6}, {3, 1, 7}, - {4, 0, 8}, {4, 1, 9}, - {5, 0, 10}, {5, 1, 11}}}, - // clang-format on - {gko::dim<2>{6, 2}, {}}}; - auto part = gko::share( - part_type::build_from_contiguous(this->ref, {this->ref, {0, 0, 6, 6}})); - auto vec = TestFixture::dist_vec_type::create(this->ref, this->comm); - auto rank = this->comm.rank(); - - vec->read_distributed(md[rank], part.get()); - - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); - if (rank == 1) { - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), - gko::dim<2>(6, 2)); - GKO_ASSERT_MTX_NEAR( - vec->get_local(), - I>( - {{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}), - 0.0); - } else { - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), - gko::dim<2>(0, 2)); - } -} - - -TYPED_TEST(VectorCreation, CanCreateFromLocalVectorAndSize) -{ - using dist_vec_type = typename TestFixture::dist_vec_type; - using dense_type = typename TestFixture::dense_type; - auto local_vec = dense_type::create(this->ref); - local_vec->read(this->md_localized[this->comm.rank()]); - auto clone_local_vec = gko::clone(local_vec); - - auto vec = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{6, 2}, - local_vec.get()); - - GKO_ASSERT_EQUAL_DIMENSIONS(vec, gko::dim<2>(6, 2)); - GKO_ASSERT_MTX_NEAR(vec->get_local(), clone_local_vec, 0); -} - - -TYPED_TEST(VectorCreation, CanCreateFromLocalVectorWithoutSize) -{ - using dist_vec_type = typename TestFixture::dist_vec_type; - using dense_type = typename TestFixture::dense_type; - auto local_vec = dense_type::create(this->ref); - local_vec->read(this->md_localized[this->comm.rank()]); - auto clone_local_vec = gko::clone(local_vec); - - auto vec = dist_vec_type::create(this->ref, this->comm, local_vec.get()); - - GKO_ASSERT_EQUAL_DIMENSIONS(vec, gko::dim<2>(6, 2)); - GKO_ASSERT_MTX_NEAR(vec->get_local(), clone_local_vec, 0); -} - - -template -class VectorReductions : public ::testing::Test { -public: - using value_type = ValueType; - using local_index_type = gko::int32; - using global_index_type = gko::int64; - using part_type = - gko::distributed::Partition; - using md_type = gko::matrix_data; - using dist_vec_type = gko::distributed::Vector; - using dense_type = gko::matrix::Dense; - - VectorReductions() - : ref(gko::ReferenceExecutor::create()), - comm(MPI_COMM_WORLD), - part(gko::share(part_type::build_from_contiguous( - this->ref, {ref, {0, 2, 4, 6}}))), - vec_a(dist_vec_type::create(ref, comm)), - vec_b(dist_vec_type::create(ref, comm)) - { - md_type md_a{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}; - md_type md_b{{10, -11}, {8, -9}, {-6, 7}, {4, -5}, {2, -3}, {0, 1}}; - - vec_a->read_distributed(md_a, part.get()); - vec_b->read_distributed(md_b, part.get()); - } - - void SetUp() override { ASSERT_GE(this->comm.size(), 3); } - - std::shared_ptr ref; - gko::mpi::communicator comm; - std::shared_ptr part; - - std::unique_ptr vec_a; - std::unique_ptr vec_b; -}; - - -TYPED_TEST_SUITE(VectorReductions, gko::test::ValueTypes); - - -TYPED_TEST(VectorReductions, ComputesDotProduct) -{ - using dense_type = typename TestFixture::dense_type; - using value_type = typename TestFixture::value_type; - auto res = dense_type::create(this->ref, gko::dim<2>{1, 2}); - auto ref_res = - gko::initialize(I>{{32, -54}}, this->ref); - - this->vec_a->compute_dot(this->vec_b.get(), res.get()); - - GKO_ASSERT_MTX_NEAR(res, ref_res, r::value); -} - - -TYPED_TEST(VectorReductions, ComputesConjDot) -{ - using dense_type = typename TestFixture::dense_type; - using value_type = typename TestFixture::value_type; - using index_type = typename TestFixture::global_index_type; - using real_type = typename gko::remove_complex; - using dist_vec_type = typename TestFixture::dist_vec_type; - auto md_a = gko::test::generate_random_matrix_data( - 6, 2, std::uniform_int_distribution(2, 2), - std::normal_distribution(0, 1), - std::default_random_engine{42}); - auto md_b = gko::test::generate_random_matrix_data( - 6, 2, std::uniform_int_distribution(2, 2), - std::normal_distribution(0, 1), - std::default_random_engine{42}); - auto dist_vec_a = dist_vec_type::create(this->ref, this->comm); - auto dist_vec_b = dist_vec_type::create(this->ref, this->comm); - auto dense_vec_a = dense_type::create(this->ref); - auto dense_vec_b = dense_type::create(this->ref); - dist_vec_a->read_distributed(md_a, this->part.get()); - dist_vec_b->read_distributed(md_b, this->part.get()); - dense_vec_a->read(md_a); - dense_vec_b->read(md_b); - auto res = dense_type::create(this->ref, gko::dim<2>{1, 2}); - auto ref_res = dense_type::create(this->ref, gko::dim<2>{1, 2}); - - dist_vec_a->compute_dot(dist_vec_b.get(), res.get()); - dense_vec_a->compute_dot(dense_vec_b.get(), ref_res.get()); - - GKO_ASSERT_MTX_NEAR(res, ref_res, r::value); -} - - -TYPED_TEST(VectorReductions, ComputesNorm2) -{ - using dense_type = typename TestFixture::dense_type; - using value_type = typename TestFixture::value_type; - auto res = dense_type::absolute_type::create(this->ref, gko::dim<2>{1, 2}); - auto ref_res = gko::initialize( - {{static_cast>(std::sqrt(220)), - static_cast>(std::sqrt(286))}}, - this->ref); - - this->vec_a->compute_norm2(res.get()); - - GKO_ASSERT_MTX_NEAR(res, ref_res, r::value); -} - - -TYPED_TEST(VectorReductions, ComputesNorm1) -{ - using dense_type = typename TestFixture::dense_type; - using value_type = typename TestFixture::value_type; - auto res = dense_type::absolute_type::create(this->ref, gko::dim<2>{1, 2}); - auto ref_res = gko::initialize( - {{30, 36}}, this->ref); - - this->vec_a->compute_norm1(res.get()); - - GKO_ASSERT_MTX_NEAR(res, ref_res, r::value); -} - - -template -class VectorLocalOp : public ::testing::Test { -public: - using value_type = ValueType; - using local_index_type = gko::int32; - using global_index_type = gko::int64; - using part_type = - gko::distributed::Partition; - using md_type = gko::matrix_data; - using dist_vec_type = gko::distributed::Vector; - using dense_type = gko::matrix::Dense; - - VectorLocalOp() - : ref(gko::ReferenceExecutor::create()), - comm(MPI_COMM_WORLD), - part(gko::share(part_type::build_from_contiguous( - this->ref, {ref, {0, 2, 4, 6}}))), - vec_a(dist_vec_type::create(ref, comm)), - vec_b(dist_vec_type::create(ref, comm)), - engine(42) - { - md_type md_a{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}; - md_type md_b{{10, -11}, {8, -9}, {-6, 7}, {4, -5}, {2, -3}, {0, 1}}; - - vec_a->read_distributed(md_a, part.get()); - vec_b->read_distributed(md_b, part.get()); - } - - void SetUp() override { ASSERT_GE(this->comm.size(), 3); } - - auto generate_local_and_global_pair(gko::dim<2> local_size) - { - auto local_vec = gko::test::generate_random_matrix( - local_size[0], local_size[1], - std::uniform_int_distribution(0, local_size[1] - 1), - std::normal_distribution>(), - this->engine, this->ref); - auto dist_vec = dist_vec_type::create( - this->ref, this->comm, - gko::dim<2>{local_size[0] * this->comm.size(), local_size[1]}, - local_size); - dist_vec->get_local()->copy_from(local_vec.get()); - - return std::make_pair(std::move(dist_vec), std::move(local_vec)); - } - - - std::shared_ptr ref; - gko::mpi::communicator comm; - std::shared_ptr part; - - std::unique_ptr vec_a; - std::unique_ptr vec_b; - - std::default_random_engine engine; -}; - - -TYPED_TEST_SUITE(VectorLocalOp, gko::test::ValueTypes); - - -TYPED_TEST(VectorLocalOp, ApplyNotSupported) -{ - using dist_vec_type = typename TestFixture::dist_vec_type; - auto a = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, - gko::dim<2>{2, 2}); - auto b = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, - gko::dim<2>{2, 2}); - auto c = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, - gko::dim<2>{2, 2}); - - ASSERT_THROW(a->apply(b.get(), c.get()), gko::NotSupported); -} - - -TYPED_TEST(VectorLocalOp, AdvancedApplyNotSupported) -{ - using dist_vec_type = typename TestFixture::dist_vec_type; - auto a = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, - gko::dim<2>{2, 2}); - auto b = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{1, 1}, - gko::dim<2>{1, 1}); - auto c = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, - gko::dim<2>{2, 2}); - auto d = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{1, 1}, - gko::dim<2>{1, 1}); - auto e = dist_vec_type::create(this->ref, this->comm, gko::dim<2>{2, 2}, - gko::dim<2>{2, 2}); - - ASSERT_THROW(a->apply(b.get(), c.get(), d.get(), e.get()), - gko::NotSupported); -} - - -TYPED_TEST(VectorLocalOp, ConvertsToPrecision) -{ - using Vector = typename TestFixture::dist_vec_type; - using T = typename TestFixture::value_type; - using OtherT = typename gko::next_precision; - using OtherVector = typename gko::distributed::Vector; - auto tmp = OtherVector::create(this->ref, this->comm); - auto res = Vector::create(this->ref, this->comm); - // If OtherT is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; - - this->vec_a->convert_to(tmp.get()); - tmp->convert_to(res.get()); - - GKO_ASSERT_MTX_NEAR(this->vec_a->get_local(), res->get_local(), residual); -} - - -TYPED_TEST(VectorLocalOp, MovesToPrecision) -{ - using Vector = typename TestFixture::dist_vec_type; - using T = typename TestFixture::value_type; - using OtherT = typename gko::next_precision; - using OtherVector = typename gko::distributed::Vector; - auto tmp = OtherVector::create(this->ref, this->comm); - auto res = Vector::create(this->ref, this->comm); - auto clone_vec_a = gko::clone(this->vec_a); - // If OtherT is more precise: 0, otherwise r - auto residual = r::value < r::value - ? gko::remove_complex{0} - : gko::remove_complex{r::value}; - - clone_vec_a->move_to(tmp.get()); - tmp->move_to(res.get()); - - GKO_ASSERT_MTX_NEAR(this->vec_a->get_local(), res->get_local(), residual); -} - - -TYPED_TEST(VectorLocalOp, ComputeAbsoluteSameAsLocal) -{ - gko::size_type local_size = 20; - gko::size_type num_cols = 7; - auto pair = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto& dist = pair.first; - auto& local = pair.second; - - auto dist_absolute = dist->compute_absolute(); - auto local_absolute = local->compute_absolute(); - - GKO_ASSERT_MTX_NEAR(dist_absolute->get_const_local(), local_absolute, 0); -} - - -TYPED_TEST(VectorLocalOp, ComputeAbsoluteInplaceSameAsLocal) -{ - gko::size_type local_size = 20; - gko::size_type num_cols = 7; - auto pair = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto& dist = pair.first; - auto& local = pair.second; - - dist->compute_absolute_inplace(); - local->compute_absolute_inplace(); - - GKO_ASSERT_MTX_NEAR(dist->get_const_local(), local, 0); -} - - -TYPED_TEST(VectorLocalOp, MakeComplexSameAsLocal) -{ - gko::size_type local_size = 20; - gko::size_type num_cols = 7; - auto pair = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto& dist = pair.first; - auto& local = pair.second; - - auto dist_complex = dist->make_complex(); - auto local_complex = local->make_complex(); - - GKO_ASSERT_MTX_NEAR(dist_complex->get_const_local(), local_complex, 0); -} - - -TYPED_TEST(VectorLocalOp, MakeComplexInplaceSameAsLocal) -{ - gko::size_type local_size = 20; - gko::size_type num_cols = 7; - auto pair = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto& dist = pair.first; - auto& local = pair.second; - auto dist_complex = dist->make_complex(); - dist_complex->fill(0); - auto local_complex = local->make_complex(); - local_complex->fill(0); - - dist->make_complex(dist_complex.get()); - local->make_complex(local_complex.get()); - - GKO_ASSERT_MTX_NEAR(dist_complex->get_const_local(), local_complex, 0); -} - - -TYPED_TEST(VectorLocalOp, GetRealSameAsLocal) -{ - gko::size_type local_size = 20; - gko::size_type num_cols = 7; - auto pair = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto& dist = pair.first; - auto& local = pair.second; - - auto dist_real = dist->get_real(); - auto local_real = local->get_real(); - - GKO_ASSERT_MTX_NEAR(dist_real->get_const_local(), local_real, 0); -} - - -TYPED_TEST(VectorLocalOp, GetRealInplaceSameAsLocal) -{ - gko::size_type local_size = 20; - gko::size_type num_cols = 7; - auto pair = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto& dist = pair.first; - auto& local = pair.second; - auto dist_real = dist->get_real(); - dist_real->fill(0); - auto local_real = local->get_real(); - local_real->fill(0); - - dist->get_real(dist_real.get()); - local->get_real(local_real.get()); - - GKO_ASSERT_MTX_NEAR(dist_real->get_const_local(), local_real, 0); -} - - -TYPED_TEST(VectorLocalOp, GetImagSameAsLocal) -{ - gko::size_type local_size = 20; - gko::size_type num_cols = 7; - auto pair = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto& dist = pair.first; - auto& local = pair.second; - - auto dist_imag = dist->get_imag(); - auto local_imag = local->get_imag(); - - GKO_ASSERT_MTX_NEAR(dist_imag->get_const_local(), local_imag, 0); -} - - -TYPED_TEST(VectorLocalOp, GetImagInplaceSameAsLocal) -{ - gko::size_type local_size = 20; - gko::size_type num_cols = 7; - auto pair = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto& dist = pair.first; - auto& local = pair.second; - auto dist_imag = dist->get_imag(); - dist_imag->fill(0); - auto local_imag = local->get_imag(); - local_imag->fill(0); - - dist->get_imag(dist_imag.get()); - local->get_imag(local_imag.get()); - - GKO_ASSERT_MTX_NEAR(dist_imag->get_const_local(), local_imag, 0); -} - - -TYPED_TEST(VectorLocalOp, FillSameAsLocal) -{ - using value_type = typename TestFixture::value_type; - gko::size_type local_size = 20; - gko::size_type num_cols = 7; - auto pair = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto& dist = pair.first; - auto& local = pair.second; - auto value = gko::test::detail::get_rand_value( - std::normal_distribution>(), - this->engine); - - dist->fill(value); - local->fill(value); - - GKO_ASSERT_MTX_NEAR(dist->get_const_local(), local, 0); -} - - -TYPED_TEST(VectorLocalOp, ScaleSameAsLocal) -{ - using value_type = typename TestFixture::value_type; - gko::size_type local_size = 20; - gko::size_type num_cols = 7; - auto pair = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto& dist = pair.first; - auto& local = pair.second; - auto value = gko::test::generate_random_matrix( - 1, num_cols, - std::uniform_int_distribution(num_cols, num_cols), - std::normal_distribution>(), - this->engine, this->ref); - - dist->scale(value.get()); - local->scale(value.get()); - - GKO_ASSERT_MTX_NEAR(dist->get_const_local(), local, 0); -} - - -TYPED_TEST(VectorLocalOp, InvScaleSameAsLocal) -{ - using value_type = typename TestFixture::value_type; - gko::size_type local_size = 20; - gko::size_type num_cols = 7; - auto pair = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto& dist = pair.first; - auto& local = pair.second; - auto value = gko::test::generate_random_matrix( - 1, num_cols, - std::uniform_int_distribution(num_cols, num_cols), - std::uniform_real_distribution>(1.0, - 2.0), - this->engine, this->ref); - - dist->inv_scale(value.get()); - local->inv_scale(value.get()); - - GKO_ASSERT_MTX_NEAR(dist->get_const_local(), local, 0); -} - - -TYPED_TEST(VectorLocalOp, AddScaleSameAsLocal) -{ - using value_type = typename TestFixture::value_type; - gko::size_type local_size = 20; - gko::size_type num_cols = 7; - auto pair = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto pair_b = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto& dist = pair.first; - auto& local = pair.second; - auto& dist_b = pair_b.first; - auto& local_b = pair_b.second; - auto value = gko::test::generate_random_matrix( - 1, num_cols, - std::uniform_int_distribution(num_cols, num_cols), - std::normal_distribution>(), - this->engine, this->ref); - - dist->add_scaled(value.get(), dist_b.get()); - local->add_scaled(value.get(), local_b.get()); - - GKO_ASSERT_MTX_NEAR(dist->get_const_local(), local, 0); -} - - -TYPED_TEST(VectorLocalOp, SubScaleSameAsLocal) -{ - using value_type = typename TestFixture::value_type; - gko::size_type local_size = 20; - gko::size_type num_cols = 7; - auto pair = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto pair_b = - this->generate_local_and_global_pair(gko::dim<2>{local_size, num_cols}); - auto& dist = pair.first; - auto& local = pair.second; - auto& dist_b = pair_b.first; - auto& local_b = pair_b.second; - auto value = gko::test::generate_random_matrix( - 1, num_cols, - std::uniform_int_distribution(num_cols, num_cols), - std::normal_distribution>(), - this->engine, this->ref); - - dist->sub_scaled(value.get(), dist_b.get()); - local->sub_scaled(value.get(), local_b.get()); - - GKO_ASSERT_MTX_NEAR(dist->get_const_local(), local, 0); -} - - -TYPED_TEST(VectorLocalOp, CreateRealViewSameAsLocal) -{ - using value_type = typename TestFixture::value_type; - using real_type = gko::remove_complex; - - auto real_view = this->vec_a->create_real_view(); - auto local_real_view = this->vec_a->get_const_local()->create_real_view(); - - if (gko::is_complex()) { - EXPECT_EQ(real_view->get_size()[0], this->vec_a->get_size()[0]); - EXPECT_EQ(real_view->get_size()[1], 2 * this->vec_a->get_size()[1]); - EXPECT_EQ(real_view->get_const_local()->get_stride(), - 2 * this->vec_a->get_const_local()->get_stride()); - GKO_ASSERT_MTX_NEAR(real_view->get_const_local(), local_real_view, 0.); - } else { - EXPECT_EQ(real_view->get_size()[0], this->vec_a->get_size()[0]); - EXPECT_EQ(real_view->get_size()[1], this->vec_a->get_size()[1]); - GKO_ASSERT_MTX_NEAR(real_view->get_const_local(), local_real_view, 0.); - } -} - - -class AllocCounter : public gko::log::Logger { -public: - void on_allocation_started(const gko::Executor* exec, - const gko::size_type& num_bytes) const override - { - count_++; - } - - int get_count() const { return count_; } - - static std::unique_ptr create( - std::shared_ptr exec) - { - return std::unique_ptr(new AllocCounter(std::move(exec))); - } - -private: - explicit AllocCounter(std::shared_ptr exec) - : gko::log::Logger(std::move(exec), - gko::log::Logger::allocation_started_mask), - count_(0) - {} - - mutable int count_; -}; - - -TYPED_TEST(VectorLocalOp, CreateRealViewIsView) -{ - using value_type = typename TestFixture::value_type; - using real_type = gko::remove_complex; - auto log = gko::share(AllocCounter::create(this->ref)); - - this->ref->add_logger(log); - auto real_view = this->vec_a->create_real_view(); - this->ref->remove_logger(log.get()); - - ASSERT_EQ(log->get_count(), 0); -} - - -} // namespace diff --git a/test/base/device_matrix_data_kernels.cpp b/test/base/device_matrix_data_kernels.cpp index 3d55ae021a6..fd106a84231 100644 --- a/test/base/device_matrix_data_kernels.cpp +++ b/test/base/device_matrix_data_kernels.cpp @@ -65,7 +65,7 @@ class DeviceMatrixData : public ::testing::Test { DeviceMatrixData() : rand{82754} {} - void SetUp() + void SetUp() override { init_executor(gko::ReferenceExecutor::create(), exec); host_data.size = {100, 200}; @@ -115,7 +115,7 @@ class DeviceMatrixData : public ::testing::Test { deduplicated_data.sum_duplicates(); } - void TearDown() + void TearDown() override { if (exec != nullptr) { ASSERT_NO_THROW(exec->synchronize()); diff --git a/test/mpi/distributed/CMakeLists.txt b/test/mpi/distributed/CMakeLists.txt index f6e8195ebc6..c0d932cb776 100644 --- a/test/mpi/distributed/CMakeLists.txt +++ b/test/mpi/distributed/CMakeLists.txt @@ -1 +1 @@ -ginkgo_create_common_mpi_test(vector 3) +ginkgo_create_common_and_reference_mpi_test(vector 3) diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/distributed/vector.cpp index dc190f142a7..bd82b594537 100644 --- a/test/mpi/distributed/vector.cpp +++ b/test/mpi/distributed/vector.cpp @@ -86,72 +86,274 @@ class HostToDeviceLogger : public gko::log::Logger { }; +template class VectorCreation : public ::testing::Test { public: - using value_type = float; + using value_type = typename std::tuple_element< + 0, decltype(ValueLocalGlobalIndexType())>::type; + using local_index_type = typename std::tuple_element< + 1, decltype(ValueLocalGlobalIndexType())>::type; + using global_index_type = typename std::tuple_element< + 2, decltype(ValueLocalGlobalIndexType())>::type; + using part_type = + gko::distributed::Partition; + using md_type = gko::matrix_data; + using d_md_type = gko::device_matrix_data; using dist_vec_type = gko::distributed::Vector; - using dense_type = dist_vec_type::local_vector_type; + using dense_type = gko::matrix::Dense; VectorCreation() : ref(gko::ReferenceExecutor::create()), - exec(), comm(MPI_COMM_WORLD), + part(gko::share(part_type::build_from_contiguous( + this->ref, {ref, {0, 2, 4, 6}}))), local_size{4, 11}, size{local_size[1] * comm.size(), 11}, - engine(42) + md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}, + md_localized{{{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}, {{8, 9}, {10, 11}}} + {} + + void SetUp() override { - init_executor(ref, exec, comm); + ASSERT_GE(this->comm.size(), 3); + init_executor(gko::ReferenceExecutor::create(), exec); } - void SetUp() override { ASSERT_GT(comm.size(), 0); } + void TearDown() override + { + if (exec != nullptr) { + ASSERT_NO_THROW(exec->synchronize()); + } + } - std::shared_ptr ref; + std::shared_ptr ref; std::shared_ptr exec; - gko::mpi::communicator comm; + std::shared_ptr part; gko::dim<2> local_size; gko::dim<2> size; + md_type md; + md_type md_localized[3]; + std::default_random_engine engine; }; +TYPED_TEST_SUITE(VectorCreation, gko::test::ValueLocalGlobalIndexTypes); + + +#ifdef GKO_COMPILING_REFERENCE + -TEST_F(VectorCreation, CanCreateFromLocalVectorAndSize) +TYPED_TEST(VectorCreation, CanReadGlobalMatrixData) { - auto local_vec = gko::test::generate_random_matrix( - local_size[0], local_size[1], - std::uniform_int_distribution(0, local_size[1]), - std::normal_distribution(), engine, ref); - auto dlocal_vec = gko::clone(exec, local_vec); + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); + auto rank = this->comm.rank(); + I> ref_data[3] = { + {{0, 1}, {2, 3}}, + {{4, 5}, {6, 7}}, + {{8, 9}, {10, 11}}, + }; + + vec->read_distributed(this->md, this->part.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(2, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); +} - auto vec = dist_vec_type::create(ref, comm, size, local_vec.get()); - auto dvec = dist_vec_type::create(exec, comm, size, dlocal_vec.get()); - GKO_ASSERT_EQUAL_DIMENSIONS(vec, dvec); - GKO_ASSERT_MTX_NEAR(vec->get_local(), dvec->get_local(), 0); +TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataSomeEmpty) +{ + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + auto part = gko::share(part_type::build_from_contiguous( + this->exec, {this->exec, {0, 0, 6, 6}})); + auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); + auto rank = this->comm.rank(); + + vec->read_distributed(this->md, part.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + if (rank == 1) { + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(6, 2)); + GKO_ASSERT_MTX_NEAR( + vec->get_local(), + l({{0., 1.}, {2., 3.}, {4., 5.}, {6., 7.}, {8., 9.}, {10., 11.}}), + 0.0); + } else { + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(0, 2)); + } } -TEST_F(VectorCreation, CanCreateFromLocalVectorWithoutSize) +TYPED_TEST(VectorCreation, CanReadGlobalDeviceMatrixData) { - auto local_vec = gko::test::generate_random_matrix( - local_size[0], local_size[1], - std::uniform_int_distribution(0, local_size[1]), - std::normal_distribution(), engine, ref); - auto dlocal_vec = gko::clone(exec, local_vec); + using it = typename TestFixture::global_index_type; + using d_md_type = typename TestFixture::d_md_type; + using part_type = typename TestFixture::part_type; + using vt = typename TestFixture::value_type; + d_md_type md{ + this->exec, gko::dim<2>{6, 2}, + gko::Array{this->exec, I{0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5}}, + gko::Array{this->exec, I{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}}, + gko::Array{this->exec, + I{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}}}; + auto part = gko::share(part_type::build_from_contiguous( + this->exec, {this->exec, {0, 2, 4, 6}})); + auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); + auto rank = this->comm.rank(); + I> ref_data[3] = { + {{0, 1}, {2, 3}}, + {{4, 5}, {6, 7}}, + {{8, 9}, {10, 11}}, + }; + + vec->read_distributed(md, part.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(2, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); +} - auto vec = dist_vec_type::create(ref, comm, local_vec.get()); - auto dvec = dist_vec_type::create(exec, comm, dlocal_vec.get()); - GKO_ASSERT_EQUAL_DIMENSIONS(vec, dvec); - GKO_ASSERT_MTX_NEAR(vec->get_local(), dvec->get_local(), 0); +TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataScattered) +{ + using md_type = typename TestFixture::md_type; + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + md_type md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}; + auto part = gko::share(part_type::build_from_mapping( + this->exec, {this->exec, {0, 1, 2, 0, 2, 0}}, 3)); + auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); + auto rank = this->comm.rank(); + gko::dim<2> ref_size[3] = {{3, 2}, {1, 2}, {2, 2}}; + I> ref_data[3] = { + {{0, 1}, {6, 7}, {10, 11}}, + {{2, 3}}, + {{4, 5}, {8, 9}}, + }; + + vec->read_distributed(md, part.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), ref_size[rank]); + GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); } +TYPED_TEST(VectorCreation, CanReadLocalMatrixData) +{ + using md_type = typename TestFixture::md_type; + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + md_type md[3] = { + {gko::dim<2>{6, 2}, {{0, 0, 0}, {0, 1, 1}, {1, 0, 2}, {1, 1, 3}}}, + {gko::dim<2>{6, 2}, {{2, 0, 4}, {2, 1, 5}, {3, 0, 6}, {3, 1, 7}}}, + {gko::dim<2>{6, 2}, {{4, 0, 8}, {4, 1, 9}, {5, 0, 10}, {5, 1, 11}}}}; + auto part = gko::share(part_type::build_from_contiguous( + this->exec, {this->exec, {0, 2, 4, 6}})); + auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); + auto rank = this->comm.rank(); + I> ref_data[3] = { + {{0, 1}, {2, 3}}, + {{4, 5}, {6, 7}}, + {{8, 9}, {10, 11}}, + }; + + vec->read_distributed(md[rank], part.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(2, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); +} + + +TYPED_TEST(VectorCreation, CanReadLocalMatrixDataSomeEmpty) +{ + using md_type = typename TestFixture::md_type; + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + md_type md[3] = {{gko::dim<2>{6, 2}, {}}, + {gko::dim<2>{6, 2}, + // clang-format off + {{0, 0, 0}, {0, 1, 1}, + {1, 0, 2}, {1, 1, 3}, + {2, 0, 4}, {2, 1, 5}, + {3, 0, 6}, {3, 1, 7}, + {4, 0, 8}, {4, 1, 9}, + {5, 0, 10}, {5, 1, 11}}}, + // clang-format on + {gko::dim<2>{6, 2}, {}}}; + auto part = gko::share(part_type::build_from_contiguous( + this->exec, {this->exec, {0, 0, 6, 6}})); + auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); + auto rank = this->comm.rank(); + + vec->read_distributed(md[rank], part.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + if (rank == 1) { + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(6, 2)); + GKO_ASSERT_MTX_NEAR( + vec->get_local(), + I>( + {{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}), + 0.0); + } else { + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + gko::dim<2>(0, 2)); + } +} + + +#endif + + +TYPED_TEST(VectorCreation, CanCreateFromLocalVectorAndSize) +{ + using dist_vec_type = typename TestFixture::dist_vec_type; + using dense_type = typename TestFixture::dense_type; + auto local_vec = dense_type::create(this->exec); + local_vec->read(this->md_localized[this->comm.rank()]); + auto clone_local_vec = gko::clone(local_vec); + + auto vec = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{6, 2}, + local_vec.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec, gko::dim<2>(6, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local(), clone_local_vec, 0); +} + + +TYPED_TEST(VectorCreation, CanCreateFromLocalVectorWithoutSize) +{ + using dist_vec_type = typename TestFixture::dist_vec_type; + using dense_type = typename TestFixture::dense_type; + auto local_vec = dense_type::create(this->exec); + local_vec->read(this->md_localized[this->comm.rank()]); + auto clone_local_vec = gko::clone(local_vec); + + auto vec = dist_vec_type::create(this->exec, this->comm, local_vec.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec, gko::dim<2>(6, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local(), clone_local_vec, 0); +} + + +template class VectorReductions : public ::testing::Test { public: - using value_type = float; + using value_type = ValueType; using local_index_type = gko::int32; using global_index_type = gko::int64; using part_type = @@ -159,22 +361,29 @@ class VectorReductions : public ::testing::Test { using md_type = gko::matrix_data; using dist_vec_type = gko::distributed::Vector; using dense_type = gko::matrix::Dense; + using real_dense_type = typename dense_type::real_type; VectorReductions() : ref(gko::ReferenceExecutor::create()), exec(), comm(MPI_COMM_WORLD), size{53, 11}, - x(dist_vec_type::create(ref, comm)), - dx(dist_vec_type::create(exec, comm)), - y(dist_vec_type::create(ref, comm)), - dy(dist_vec_type::create(exec, comm)), - logger(gko::share(HostToDeviceLogger::create(exec))), engine(42) { init_executor(ref, exec, comm); + + logger = gko::share(HostToDeviceLogger::create(exec)); exec->add_logger(logger); + dense_x = dense_type::create(exec); + dense_y = dense_type::create(exec); + x = dist_vec_type::create(exec, comm); + y = dist_vec_type::create(exec, comm); + dense_res = dense_type ::create(exec); + res = dense_type ::create(exec); + dense_real_res = real_dense_type ::create(exec); + real_res = real_dense_type ::create(exec); + auto num_parts = static_cast(comm.size()); auto mapping = @@ -191,8 +400,10 @@ class VectorReductions : public ::testing::Test { std::uniform_int_distribution(size[1], size[1]), std::normal_distribution>(), engine); - x->read_distributed(md_x, part.get()); - dx = gko::clone(exec, x); + dense_x->read(md_x); + auto tmp_x = dist_vec_type::create(ref, comm); + tmp_x->read_distributed(md_x, part.get()); + x = gko::clone(exec, tmp_x); auto md_y = gko::test::generate_random_matrix_data( @@ -200,18 +411,35 @@ class VectorReductions : public ::testing::Test { std::uniform_int_distribution(size[1], size[1]), std::normal_distribution>(), engine); - y->read_distributed(md_y, part.get()); - dy = gko::clone(exec, y); + dense_y->read(md_y); + auto tmp_y = dist_vec_type::create(ref, comm); + tmp_y->read_distributed(md_y, part.get()); + y = gko::clone(exec, tmp_y); + } + + void SetUp() override + { + ASSERT_GT(comm.size(), 0); + init_executor(gko::ReferenceExecutor::create(), exec); } - void SetUp() override { ASSERT_GT(comm.size(), 0); } + void TearDown() override + { + if (exec != nullptr) { + ASSERT_NO_THROW(exec->synchronize()); + } + } void init_result() { - res = dense_type::create(ref, gko::dim<2>{1, size[1]}); - dres = dense_type::create(exec, gko::dim<2>{1, size[1]}); + res = dense_type::create(exec, gko::dim<2>{1, size[1]}); + dense_res = dense_type::create(exec, gko::dim<2>{1, size[1]}); + real_res = real_dense_type::create(exec, gko::dim<2>{1, size[1]}); + dense_real_res = real_dense_type::create(exec, gko::dim<2>{1, size[1]}); res->fill(0.0); - dres->fill(0.0); + dense_res->fill(0.0); + real_res->fill(0.0); + dense_real_res->fill(0.0); } std::shared_ptr ref; @@ -221,403 +449,468 @@ class VectorReductions : public ::testing::Test { gko::dim<2> size; + std::unique_ptr dense_x; + std::unique_ptr dense_y; std::unique_ptr x; - std::unique_ptr dx; std::unique_ptr y; - std::unique_ptr dy; + std::unique_ptr dense_res; std::unique_ptr res; - std::unique_ptr dres; + std::unique_ptr dense_real_res; + std::unique_ptr real_res; std::shared_ptr logger; std::default_random_engine engine; }; +TYPED_TEST_SUITE(VectorReductions, gko::test::ValueTypes); + -TEST_F(VectorReductions, ComputesDotProductIsSameAsRef) +TYPED_TEST(VectorReductions, ComputesDotProductIsSameAsDense) { - init_result(); + using value_type = typename TestFixture::value_type; + this->init_result(); - x->compute_dot(y.get(), res.get()); - dx->compute_dot(dy.get(), dres.get()); + this->x->compute_dot(this->y.get(), this->res.get()); + this->dense_x->compute_dot(this->dense_y.get(), this->dense_res.get()); - GKO_ASSERT_MTX_NEAR(res, dres, r::value); + GKO_ASSERT_MTX_NEAR(this->res, this->dense_res, r::value); } -TEST_F(VectorReductions, ComputesConjDotProductIsSameAsRef) +TYPED_TEST(VectorReductions, ComputesConjDotProductIsSameAsDense) { - init_result(); + using value_type = typename TestFixture::value_type; + this->init_result(); - x->compute_conj_dot(y.get(), res.get()); - dx->compute_conj_dot(dy.get(), dres.get()); + this->x->compute_conj_dot(this->y.get(), this->res.get()); + this->dense_x->compute_conj_dot(this->dense_y.get(), this->dense_res.get()); - GKO_ASSERT_MTX_NEAR(res, dres, r::value); + GKO_ASSERT_MTX_NEAR(this->res, this->dense_res, r::value); } -TEST_F(VectorReductions, ComputesNorm2IsSameAsRef) +TYPED_TEST(VectorReductions, ComputesNorm2IsSameAsDense) { - init_result(); + using value_type = typename TestFixture::value_type; + this->init_result(); - x->compute_norm2(res.get()); - dx->compute_norm2(dres.get()); + this->x->compute_norm2(this->real_res.get()); + this->dense_x->compute_norm2(this->dense_real_res.get()); - GKO_ASSERT_MTX_NEAR(res, dres, r::value); + GKO_ASSERT_MTX_NEAR(this->real_res, this->dense_real_res, + r::value); } -TEST_F(VectorReductions, ComputesNorm1IsSameAsRef) +TYPED_TEST(VectorReductions, ComputesNorm1IsSameAsDense) { - init_result(); + using value_type = typename TestFixture::value_type; + this->init_result(); - x->compute_norm1(res.get()); - dx->compute_norm1(dres.get()); + this->x->compute_norm1(this->real_res.get()); + this->dense_x->compute_norm1(this->dense_real_res.get()); - GKO_ASSERT_MTX_NEAR(res, dres, r::value); + GKO_ASSERT_MTX_NEAR(this->real_res, this->dense_real_res, + r::value); } -TEST_F(VectorReductions, ComputeDotCopiesToHostOnlyIfNecessary) +TYPED_TEST(VectorReductions, ComputeDotCopiesToHostOnlyIfNecessary) { - init_result(); - auto transfer_count_before = logger->get_transfer_count(); + using value_type = typename TestFixture::value_type; + this->init_result(); + auto transfer_count_before = this->logger->get_transfer_count(); - dx->compute_dot(dy.get(), dres.get()); + this->x->compute_dot(this->y.get(), this->res.get()); - ASSERT_EQ(logger->get_transfer_count() > transfer_count_before, - needs_transfers(exec)); + ASSERT_EQ(this->logger->get_transfer_count() > transfer_count_before, + needs_transfers(this->exec)); } -TEST_F(VectorReductions, ComputeConjDotCopiesToHostOnlyIfNecessary) +TYPED_TEST(VectorReductions, ComputeConjDotCopiesToHostOnlyIfNecessary) { - init_result(); - auto transfer_count_before = logger->get_transfer_count(); + using value_type = typename TestFixture::value_type; + this->init_result(); + auto transfer_count_before = this->logger->get_transfer_count(); - dx->compute_conj_dot(dy.get(), dres.get()); + this->x->compute_conj_dot(this->y.get(), this->res.get()); - ASSERT_EQ(logger->get_transfer_count() > transfer_count_before, - needs_transfers(exec)); + ASSERT_EQ(this->logger->get_transfer_count() > transfer_count_before, + needs_transfers(this->exec)); } -TEST_F(VectorReductions, ComputeNorm2CopiesToHostOnlyIfNecessary) +TYPED_TEST(VectorReductions, ComputeNorm2CopiesToHostOnlyIfNecessary) { - init_result(); - auto transfer_count_before = logger->get_transfer_count(); + using value_type = typename TestFixture::value_type; + this->init_result(); + auto transfer_count_before = this->logger->get_transfer_count(); - dx->compute_norm2(dres.get()); + this->x->compute_norm2(this->real_res.get()); - ASSERT_EQ(logger->get_transfer_count() > transfer_count_before, - needs_transfers(exec)); + ASSERT_EQ(this->logger->get_transfer_count() > transfer_count_before, + needs_transfers(this->exec)); } -TEST_F(VectorReductions, ComputeNorm1CopiesToHostOnlyIfNecessary) +TYPED_TEST(VectorReductions, ComputeNorm1CopiesToHostOnlyIfNecessary) { - init_result(); - auto transfer_count_before = logger->get_transfer_count(); + using value_type = typename TestFixture::value_type; + this->init_result(); + auto transfer_count_before = this->logger->get_transfer_count(); - dx->compute_norm1(dres.get()); + this->x->compute_norm1(this->real_res.get()); - ASSERT_EQ(logger->get_transfer_count() > transfer_count_before, - needs_transfers(exec)); + ASSERT_EQ(this->logger->get_transfer_count() > transfer_count_before, + needs_transfers(this->exec)); } - +template class VectorLocalOps : public ::testing::Test { public: - using value_type = float; - using mixed_type = double; + using value_type = ValueType; + using mixed_type = gko::next_precision; using local_index_type = gko::int32; using global_index_type = gko::int64; using part_type = gko::distributed::Partition; - using md_type = gko::matrix_data; using dist_vec_type = gko::distributed::Vector; - using complex_dist_vec_type = - gko::distributed::Vector::complex_type; + using complex_dist_vec_type = typename dist_vec_type::complex_type; + using real_dist_vec_type = typename dist_vec_type ::real_type; using dense_type = gko::matrix::Dense; + using complex_dense_type = typename dense_type::complex_type; + using real_dense_type = typename dense_type ::real_type; VectorLocalOps() : ref(gko::ReferenceExecutor::create()), exec(), comm(MPI_COMM_WORLD), - size{53, 11}, + local_size{4, 11}, + size{local_size[0] * comm.size(), 11}, engine(42) { init_executor(ref, exec, comm); - x = dist_vec_type::create(ref, comm); - dx = dist_vec_type::create(exec, comm); - y = dist_vec_type::create(ref, comm); - dy = dist_vec_type::create(exec, comm); - alpha = dense_type ::create(ref); - dalpha = dense_type ::create(exec); - complex = complex_dist_vec_type::create(ref, comm); - dcomplex = complex_dist_vec_type::create(exec, comm); + x = dist_vec_type::create(exec, comm); + y = dist_vec_type::create(exec, comm); + alpha = dense_type ::create(exec); + local_complex = complex_dense_type ::create(exec); + complex = complex_dist_vec_type::create(exec, comm); + } - auto num_parts = - static_cast(comm.size()); - auto mapping = - gko::test::generate_random_array( - size[0], - std::uniform_int_distribution< - gko::distributed::comm_index_type>(0, num_parts - 1), - engine, ref); - part = part_type::build_from_mapping(ref, mapping, num_parts); + void SetUp() override + { + ASSERT_GT(comm.size(), 0); + init_executor(gko::ReferenceExecutor::create(), exec); } - void SetUp() override { ASSERT_GT(comm.size(), 0); } + void TearDown() override + { + if (exec != nullptr) { + ASSERT_NO_THROW(exec->synchronize()); + } + } - template - void generate_vector_pair(std::unique_ptr& host, - std::unique_ptr& device) + template + void generate_vector_pair(std::unique_ptr& local, + std::unique_ptr& dist) { - using vtype = typename VectorType::value_type; - auto md = - gko::test::generate_random_matrix_data( - size[0], size[1], - std::uniform_int_distribution(size[1], size[1]), - std::normal_distribution>(), engine); - host->read_distributed(md, part.get()); - device = gko::clone(exec, host); + using vtype = typename LocalVectorType::value_type; + local = gko::test::generate_random_matrix( + local_size[0], local_size[1], + std::uniform_int_distribution(local_size[1], + local_size[1]), + std::normal_distribution>(), engine, + exec); + dist = + DistVectorType::create(exec, comm, size, gko::clone(local).get()); } void init_vectors() { - generate_vector_pair(x, dx); - generate_vector_pair(y, dy); + generate_vector_pair(local_x, x); + generate_vector_pair(local_y, y); alpha = gko::test::generate_random_matrix( 1, size[1], std::uniform_int_distribution(size[1], size[1]), - std::normal_distribution(), engine, ref); - dalpha = gko::clone(exec, alpha); + std::normal_distribution>(), engine, + exec); } - void init_complex_vectors() { generate_vector_pair(complex, dcomplex); } + void init_complex_vectors() + { + generate_vector_pair(local_real, real); + generate_vector_pair(local_complex, complex); + } std::shared_ptr ref; std::shared_ptr exec; gko::mpi::communicator comm; + gko::dim<2> local_size; gko::dim<2> size; - std::unique_ptr part; - + std::unique_ptr local_x; + std::unique_ptr local_y; + std::unique_ptr local_complex; + std::unique_ptr local_real; std::unique_ptr x; - std::unique_ptr dx; std::unique_ptr y; - std::unique_ptr dy; std::unique_ptr alpha; - std::unique_ptr dalpha; std::unique_ptr complex; - std::unique_ptr dcomplex; + std::unique_ptr real; std::default_random_engine engine; }; +TYPED_TEST_SUITE(VectorLocalOps, gko::test::ValueTypes); + -TEST_F(VectorLocalOps, ConvertsToPrecision) +TYPED_TEST(VectorLocalOps, ApplyNotSupported) { - using OtherVector = typename gko::distributed::Vector; - auto tmp = OtherVector::create(ref, comm); - auto dtmp = OtherVector::create(exec, comm); - init_vectors(); + using dist_vec_type = typename TestFixture::dist_vec_type; + auto a = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{2, 2}); + auto b = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{2, 2}); + auto c = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{2, 2}); + + ASSERT_THROW(a->apply(b.get(), c.get()), gko::NotSupported); +} - x->convert_to(tmp.get()); - dx->convert_to(dtmp.get()); - GKO_ASSERT_MTX_NEAR(tmp->get_local(), dtmp->get_local(), - r::value); +TYPED_TEST(VectorLocalOps, AdvancedApplyNotSupported) +{ + using dist_vec_type = typename TestFixture::dist_vec_type; + auto a = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{2, 2}); + auto b = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{1, 1}, + gko::dim<2>{1, 1}); + auto c = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{2, 2}); + auto d = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{1, 1}, + gko::dim<2>{1, 1}); + auto e = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{2, 2}); + + ASSERT_THROW(a->apply(b.get(), c.get(), d.get(), e.get()), + gko::NotSupported); } -TEST_F(VectorLocalOps, MovesToPrecision) +TYPED_TEST(VectorLocalOps, ConvertsToPrecision) { - using OtherVector = typename gko::distributed::Vector; - auto tmp = OtherVector::create(ref, comm); - auto dtmp = OtherVector::create(exec, comm); - init_vectors(); + using Vector = typename TestFixture::dist_vec_type; + using T = typename TestFixture::value_type; + using OtherT = typename gko::next_precision; + using OtherVector = typename gko::distributed::Vector; + auto local_tmp = OtherVector::local_vector_type::create(this->exec); + auto tmp = OtherVector::create(this->exec, this->comm); + this->init_vectors(); + + this->local_x->convert_to(local_tmp.get()); + this->x->convert_to(tmp.get()); + + GKO_ASSERT_MTX_NEAR(tmp->get_local(), local_tmp, 0.0); +} - x->move_to(tmp.get()); - dx->move_to(dtmp.get()); - GKO_ASSERT_MTX_NEAR(tmp->get_local(), dtmp->get_local(), - r::value); +TYPED_TEST(VectorLocalOps, MovesToPrecision) +{ + using Vector = typename TestFixture::dist_vec_type; + using T = typename TestFixture::value_type; + using OtherT = typename gko::next_precision; + using OtherVector = typename gko::distributed::Vector; + auto local_tmp = OtherVector::local_vector_type::create(this->exec); + auto tmp = OtherVector::create(this->exec, this->comm); + this->init_vectors(); + + this->local_x->move_to(local_tmp.get()); + this->x->move_to(tmp.get()); + + GKO_ASSERT_MTX_NEAR(tmp->get_local(), local_tmp, 0.0); } -TEST_F(VectorLocalOps, ComputeAbsoluteSameAsLocal) +TYPED_TEST(VectorLocalOps, ComputeAbsoluteSameAsLocal) { - init_vectors(); + using value_type = typename TestFixture::value_type; + this->init_vectors(); - auto abs = x->compute_absolute(); - auto dabs = dx->compute_absolute(); + auto local_abs = this->local_x->compute_absolute(); + auto abs = this->x->compute_absolute(); - GKO_ASSERT_MTX_NEAR(abs->get_local(), dabs->get_local(), - r::value); + GKO_ASSERT_MTX_NEAR(abs->get_local(), local_abs, r::value); } -TEST_F(VectorLocalOps, ComputeAbsoluteInplaceSameAsLocal) +TYPED_TEST(VectorLocalOps, ComputeAbsoluteInplaceSameAsLocal) { - init_vectors(); + using value_type = typename TestFixture::value_type; + this->init_vectors(); - x->compute_absolute_inplace(); - dx->compute_absolute_inplace(); + this->local_x->compute_absolute_inplace(); + this->x->compute_absolute_inplace(); - GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); + GKO_ASSERT_MTX_NEAR(this->x->get_local(), this->local_x, + r::value); } -TEST_F(VectorLocalOps, MakeComplexSameAsLocal) +TYPED_TEST(VectorLocalOps, MakeComplexSameAsLocal) { - init_vectors(); - init_complex_vectors(); + this->init_vectors(); + this->init_complex_vectors(); - complex = x->make_complex(); - dcomplex = dx->make_complex(); + this->complex = this->x->make_complex(); + this->local_complex = this->local_x->make_complex(); - GKO_ASSERT_MTX_NEAR(complex->get_local(), dcomplex->get_local(), - r::value); + GKO_ASSERT_MTX_NEAR(this->complex->get_local(), this->local_complex, 0.0); } -TEST_F(VectorLocalOps, MakeComplexInplaceSameAsLocal) +TYPED_TEST(VectorLocalOps, MakeComplexInplaceSameAsLocal) { - init_vectors(); - init_complex_vectors(); + this->init_vectors(); + this->init_complex_vectors(); - x->make_complex(complex.get()); - dx->make_complex(dcomplex.get()); + this->x->make_complex(this->complex.get()); + this->local_x->make_complex(this->local_complex.get()); - GKO_ASSERT_MTX_NEAR(complex->get_local(), dcomplex->get_local(), - r::value); + GKO_ASSERT_MTX_NEAR(this->complex->get_local(), this->local_complex, 0.0); } -TEST_F(VectorLocalOps, GetRealSameAsLocal) +TYPED_TEST(VectorLocalOps, GetRealSameAsLocal) { - init_vectors(); - init_complex_vectors(); + this->init_vectors(); + this->init_complex_vectors(); - x = complex->get_real(); - dx = dcomplex->get_real(); + this->real = this->complex->get_real(); + this->local_real = this->local_complex->get_real(); - GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); + GKO_ASSERT_MTX_NEAR(this->real->get_local(), this->local_real, 0.0); } -TEST_F(VectorLocalOps, GetRealInplaceSameAsLocal) +TYPED_TEST(VectorLocalOps, GetRealInplaceSameAsLocal) { - init_vectors(); - init_complex_vectors(); + this->init_vectors(); + this->init_complex_vectors(); - complex->get_real(x.get()); - dcomplex->get_real(dx.get()); + this->complex->get_real(this->real.get()); + this->local_complex->get_real(this->local_real.get()); - GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); + GKO_ASSERT_MTX_NEAR(this->real->get_local(), this->local_real, 0.0); } -TEST_F(VectorLocalOps, GetImagSameAsLocal) +TYPED_TEST(VectorLocalOps, GetImagSameAsLocal) { - init_vectors(); - init_complex_vectors(); + this->init_complex_vectors(); - x = complex->get_imag(); - dx = dcomplex->get_imag(); + this->real = this->complex->get_imag(); + this->local_real = this->local_complex->get_imag(); - GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); + GKO_ASSERT_MTX_NEAR(this->real->get_local(), this->local_real, 0.0); } -TEST_F(VectorLocalOps, GetImagInplaceSameAsLocal) +TYPED_TEST(VectorLocalOps, GetImagInplaceSameAsLocal) { - init_vectors(); - init_complex_vectors(); + this->init_complex_vectors(); - complex->get_imag(x.get()); - dcomplex->get_imag(dx.get()); + this->complex->get_imag(this->real.get()); + this->local_complex->get_imag(this->local_real.get()); - GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); + GKO_ASSERT_MTX_NEAR(this->real->get_local(), this->local_real, 0.0); } -TEST_F(VectorLocalOps, FillSameAsLocal) +TYPED_TEST(VectorLocalOps, FillSameAsLocal) { - init_vectors(); + using value_type = typename TestFixture::value_type; auto value = gko::test::detail::get_rand_value( - std::normal_distribution>(), engine); + std::normal_distribution>(), + this->engine); + this->init_vectors(); - x->fill(value); - dx->fill(value); + this->x->fill(value); + this->local_x->fill(value); - GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); + GKO_ASSERT_MTX_NEAR(this->x->get_local(), this->local_x, 0.0); } -TEST_F(VectorLocalOps, ScaleSameAsLocal) +TYPED_TEST(VectorLocalOps, ScaleSameAsLocal) { - init_vectors(); + using value_type = typename TestFixture::value_type; + this->init_vectors(); - x->scale(alpha.get()); - dx->scale(dalpha.get()); + this->x->scale(this->alpha.get()); + this->local_x->scale(this->alpha.get()); - GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); + GKO_ASSERT_MTX_NEAR(this->x->get_local(), this->local_x, + r::value); } -TEST_F(VectorLocalOps, InvScaleSameAsLocal) +TYPED_TEST(VectorLocalOps, InvScaleSameAsLocal) { - init_vectors(); + using value_type = typename TestFixture::value_type; + this->init_vectors(); - x->inv_scale(alpha.get()); - dx->inv_scale(dalpha.get()); + this->x->inv_scale(this->alpha.get()); + this->local_x->inv_scale(this->alpha.get()); - GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); + GKO_ASSERT_MTX_NEAR(this->x->get_local(), this->local_x, + r::value); } -TEST_F(VectorLocalOps, AddScaleSameAsLocal) +TYPED_TEST(VectorLocalOps, AddScaleSameAsLocal) { - init_vectors(); + using value_type = typename TestFixture::value_type; + this->init_vectors(); - x->add_scaled(alpha.get(), y.get()); - dx->add_scaled(dalpha.get(), dy.get()); + this->x->add_scaled(this->alpha.get(), this->y.get()); + this->local_x->add_scaled(this->alpha.get(), this->local_y.get()); - GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); + GKO_ASSERT_MTX_NEAR(this->x->get_local(), this->local_x, + r::value); } -TEST_F(VectorLocalOps, SubScaleSameAsLocal) +TYPED_TEST(VectorLocalOps, SubScaleSameAsLocal) { - init_vectors(); + using value_type = typename TestFixture::value_type; + this->init_vectors(); - x->sub_scaled(alpha.get(), y.get()); - dx->sub_scaled(dalpha.get(), dy.get()); + this->x->sub_scaled(this->alpha.get(), this->y.get()); + this->local_x->sub_scaled(this->alpha.get(), this->local_y.get()); - GKO_ASSERT_MTX_NEAR(x->get_local(), dx->get_local(), r::value); + GKO_ASSERT_MTX_NEAR(this->x->get_local(), this->local_x, + r::value); } -TEST_F(VectorLocalOps, CreateRealViewSameAsLocal) +TYPED_TEST(VectorLocalOps, CreateRealViewSameAsLocal) { - using real_type = gko::remove_complex; - init_vectors(); + this->init_vectors(); - auto rv = x->create_real_view(); - auto drv = dx->create_real_view(); + auto rv = this->x->create_real_view(); + auto local_rv = this->local_x->create_real_view(); - EXPECT_EQ(rv->get_size()[0], drv->get_size()[0]); - EXPECT_EQ(rv->get_size()[1], drv->get_size()[1]); - EXPECT_EQ(rv->get_const_local()->get_stride(), - drv->get_const_local()->get_stride()); - GKO_ASSERT_MTX_NEAR(rv->get_const_local(), drv->get_const_local(), 0.0); + GKO_ASSERT_EQUAL_ROWS(rv, this->x); + GKO_ASSERT_EQUAL_ROWS(rv->get_local(), local_rv); + GKO_ASSERT_EQUAL_COLS(rv->get_local(), local_rv); + EXPECT_EQ(rv->get_const_local()->get_stride(), local_rv->get_stride()); + GKO_ASSERT_MTX_NEAR(rv->get_const_local(), local_rv, 0.0); } From 40234e7121471439107ec4c0038ed73100a8b7ee Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Thu, 3 Mar 2022 17:46:39 +0100 Subject: [PATCH 35/38] review updates - formating - fix DenseCache::init_from - fix tests if comm.size != 3 Co-authored-by: Yuhsiang Tsai --- core/CMakeLists.txt | 2 +- core/base/dense_cache.cpp | 3 +- core/distributed/vector.cpp | 15 +++++----- core/test/base/dense_cache.cpp | 10 +++++-- include/ginkgo/core/distributed/base.hpp | 2 +- include/ginkgo/core/distributed/vector.hpp | 3 +- omp/test/distributed/vector_kernels.cpp | 2 +- reference/test/distributed/vector_kernels.cpp | 28 ++++++++----------- test/mpi/distributed/vector.cpp | 3 +- 9 files changed, 33 insertions(+), 35 deletions(-) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 16156993a0a..251af568862 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -4,9 +4,9 @@ add_library(ginkgo "") target_sources(ginkgo PRIVATE base/array.cpp - base/dense_cache.cpp base/combination.cpp base/composition.cpp + base/dense_cache.cpp base/device_matrix_data.cpp base/executor.cpp base/index_set.cpp diff --git a/core/base/dense_cache.cpp b/core/base/dense_cache.cpp index cb426f0ee65..91e4a4247cd 100644 --- a/core/base/dense_cache.cpp +++ b/core/base/dense_cache.cpp @@ -31,6 +31,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ #include + + #include @@ -56,7 +58,6 @@ void DenseCache::init_from( vec->get_executor() != template_vec->get_executor()) { vec = matrix::Dense::create_with_config_of(template_vec); } - vec->copy_from(template_vec); } diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 7e7e03cd866..180839587c6 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -428,9 +428,9 @@ Vector::create_real_view() const auto num_cols = is_complex() ? 2 * this->get_size()[1] : this->get_size()[1]; - return real_type ::create(this->get_executor(), this->get_communicator(), - dim<2>{num_global_rows, num_cols}, - local_.create_real_view().get()); + return real_type::create(this->get_executor(), this->get_communicator(), + dim<2>{num_global_rows, num_cols}, + local_.create_real_view().get()); } @@ -442,11 +442,10 @@ Vector::create_real_view() const const auto num_cols = is_complex() ? 2 * this->get_size()[1] : this->get_size()[1]; - return real_type ::create( - this->get_executor(), this->get_communicator(), - dim<2>{num_global_rows, num_cols}, - const_cast( - local_.create_real_view().get())); + return real_type::create(this->get_executor(), this->get_communicator(), + dim<2>{num_global_rows, num_cols}, + const_cast( + local_.create_real_view().get())); } diff --git a/core/test/base/dense_cache.cpp b/core/test/base/dense_cache.cpp index 6097e3df8e3..41bac8c01c6 100644 --- a/core/test/base/dense_cache.cpp +++ b/core/test/base/dense_cache.cpp @@ -30,10 +30,12 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ +#include + + #include -#include #include @@ -204,7 +206,8 @@ TYPED_TEST(DenseCache, VectorIsNotCopyAssigned) { using value_type = typename TestFixture::value_type; this->gen_cache({1, 1}); - gko::detail::DenseCache cache = this->cache; + gko::detail::DenseCache cache; + cache = this->cache; ASSERT_EQ(cache.get(), nullptr); GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), gko::dim<2>(1, 1)); @@ -215,7 +218,8 @@ TYPED_TEST(DenseCache, VectorIsNotMoveAssigned) { using value_type = typename TestFixture::value_type; this->gen_cache({1, 1}); - gko::detail::DenseCache cache = std::move(this->cache); + gko::detail::DenseCache cache; + cache = std::move(this->cache); ASSERT_EQ(cache.get(), nullptr); GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), gko::dim<2>(1, 1)); diff --git a/include/ginkgo/core/distributed/base.hpp b/include/ginkgo/core/distributed/base.hpp index cc3263bf7a4..6bbdbdf7a2d 100644 --- a/include/ginkgo/core/distributed/base.hpp +++ b/include/ginkgo/core/distributed/base.hpp @@ -88,7 +88,7 @@ class DistributedBase { } // namespace gko -#endif +#endif // GINKGO_BUILD_MPI #endif // GKO_PUBLIC_CORE_DISTRIBUTED_BASE_HPP_ diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index be08f6c1677..885edd2e776 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -131,7 +131,6 @@ class Vector const matrix_data& data, const Partition* partition); - void convert_to(Vector>* result) const override; void move_to(Vector>* result) override; @@ -369,7 +368,7 @@ class Vector } // namespace gko -#endif +#endif // GINKGO_BUILD_MPI #endif // GKO_PUBLIC_CORE_DISTRIBUTED_VECTOR_HPP_ diff --git a/omp/test/distributed/vector_kernels.cpp b/omp/test/distributed/vector_kernels.cpp index ecd06a7e551..5ee65cfb24c 100644 --- a/omp/test/distributed/vector_kernels.cpp +++ b/omp/test/distributed/vector_kernels.cpp @@ -49,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { + using comm_index_type = gko::distributed::comm_index_type; @@ -121,7 +122,6 @@ gko::device_matrix_data generate_random_matrix_data_array( md); } - TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); diff --git a/reference/test/distributed/vector_kernels.cpp b/reference/test/distributed/vector_kernels.cpp index ad0bf5710a0..53f4d60b2e8 100644 --- a/reference/test/distributed/vector_kernels.cpp +++ b/reference/test/distributed/vector_kernels.cpp @@ -49,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { + using comm_index_type = gko::distributed::comm_index_type; @@ -64,24 +65,18 @@ class Vector : public ::testing::Test { using global_index_type = typename std::tuple_element<2, decltype( ValueLocalGlobalIndexType())>::type; - using global_entry = gko::matrix_data_entry; using mtx = gko::matrix::Dense; - Vector() : ref(gko::ReferenceExecutor::create()), mapping{ref} {} + Vector() : ref(gko::ReferenceExecutor::create()) {} void validate( const gko::dim<2> size, const gko::distributed::Partition* partition, - std::initializer_list input_rows, - std::initializer_list input_cols, - std::initializer_list input_vals, - std::initializer_list< - std::initializer_list>> - output_entries) + I input_rows, I input_cols, + I input_vals, I>> output_entries) { - std::vector>> - ref_outputs; + std::vector>> ref_outputs; auto input = gko::device_matrix_data{ ref, size, input_rows, input_cols, input_vals}; for (auto entry : output_entries) { @@ -102,7 +97,6 @@ class Vector : public ::testing::Test { } std::shared_ptr ref; - gko::Array mapping; }; TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); @@ -112,11 +106,11 @@ TYPED_TEST(Vector, BuildsLocalEmpty) { using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; - this->mapping = {this->ref, {1, 0, 2, 2, 0, 1, 1, 2}}; + gko::Array mapping{this->ref, {1, 0, 2, 2, 0, 1, 1, 2}}; comm_index_type num_parts = 3; auto partition = gko::distributed::Partition< local_index_type, global_index_type>::build_from_mapping(this->ref, - this->mapping, + mapping, num_parts); this->validate(gko::dim<2>{0, 0}, partition.get(), {}, {}, {}, @@ -128,11 +122,11 @@ TYPED_TEST(Vector, BuildsLocalSmall) { using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; - this->mapping = {this->ref, {1, 0}}; + gko::Array mapping{this->ref, {1, 0}}; comm_index_type num_parts = 2; auto partition = gko::distributed::Partition< local_index_type, global_index_type>::build_from_mapping(this->ref, - this->mapping, + mapping, num_parts); this->validate(gko::dim<2>{2, 2}, partition.get(), {0, 0, 1, 1}, @@ -144,11 +138,11 @@ TYPED_TEST(Vector, BuildsLocal) { using local_index_type = typename TestFixture::local_index_type; using global_index_type = typename TestFixture::global_index_type; - this->mapping = {this->ref, {1, 2, 0, 0, 2, 1}}; + gko::Array mapping{this->ref, {1, 2, 0, 0, 2, 1}}; comm_index_type num_parts = 3; auto partition = gko::distributed::Partition< local_index_type, global_index_type>::build_from_mapping(this->ref, - this->mapping, + mapping, num_parts); this->validate(gko::dim<2>{6, 8}, partition.get(), {0, 0, 1, 1, 2, 3, 4, 5}, diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/distributed/vector.cpp index bd82b594537..5dd2fbff85f 100644 --- a/test/mpi/distributed/vector.cpp +++ b/test/mpi/distributed/vector.cpp @@ -115,7 +115,7 @@ class VectorCreation : public ::testing::Test { void SetUp() override { - ASSERT_GE(this->comm.size(), 3); + ASSERT_EQ(this->comm.size(), 3); init_executor(gko::ReferenceExecutor::create(), exec); } @@ -567,6 +567,7 @@ TYPED_TEST(VectorReductions, ComputeNorm1CopiesToHostOnlyIfNecessary) needs_transfers(this->exec)); } + template class VectorLocalOps : public ::testing::Test { public: From c5514f542e4ace5cecc409f9e871544e57d15fa5 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 4 Mar 2022 15:48:39 +0100 Subject: [PATCH 36/38] only allow const access to local vector this adds in turn mutable access through get_local_values and at_local Co-authored-by: Tobias Ribizel --- core/distributed/vector.cpp | 110 +++++++++++---------- include/ginkgo/core/distributed/vector.hpp | 68 ++++++++++--- test/mpi/distributed/vector.cpp | 70 ++++++------- 3 files changed, 147 insertions(+), 101 deletions(-) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 180839587c6..6607a6bc67f 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -133,10 +133,9 @@ void Vector::read_distributed( global_cols)); auto rank = this->get_communicator().rank(); - this->get_local()->fill(zero()); + local_.fill(zero()); exec->run(vector::make_build_local( - data, make_temporary_clone(exec, partition).get(), rank, - this->get_local())); + data, make_temporary_clone(exec, partition).get(), rank, &local_)); } @@ -157,7 +156,7 @@ void Vector::read_distributed( template void Vector::fill(const ValueType value) { - this->get_local()->fill(value); + local_.fill(value); } @@ -168,7 +167,7 @@ void Vector::convert_to( GKO_ASSERT(this->get_communicator().size() == result->get_communicator().size()); result->set_size(this->get_size()); - this->get_const_local()->convert_to(result->get_local()); + this->get_local_vector()->convert_to(&result->local_); } @@ -187,10 +186,10 @@ Vector::compute_absolute() const auto result = absolute_type::create(exec, this->get_communicator(), this->get_size(), - this->get_const_local()->get_size()); + this->get_local_vector()->get_size()); - exec->run(vector::make_outplace_absolute_dense(this->get_const_local(), - result->get_local())); + exec->run(vector::make_outplace_absolute_dense(this->get_local_vector(), + &result->local_)); return result; } @@ -199,20 +198,13 @@ Vector::compute_absolute() const template void Vector::compute_absolute_inplace() { - this->get_local()->compute_absolute_inplace(); + local_.compute_absolute_inplace(); } template const typename Vector::local_vector_type* -Vector::get_const_local() const -{ - return &local_; -} - - -template -typename Vector::local_vector_type* Vector::get_local() +Vector::get_local_vector() const { return &local_; } @@ -224,8 +216,8 @@ Vector::make_complex() const { auto result = complex_type::create( this->get_executor(), this->get_communicator(), this->get_size(), - this->get_const_local()->get_size(), - this->get_const_local()->get_stride()); + this->get_local_vector()->get_size(), + this->get_local_vector()->get_stride()); this->make_complex(result.get()); return result; } @@ -234,7 +226,7 @@ Vector::make_complex() const template void Vector::make_complex(Vector::complex_type* result) const { - this->get_const_local()->make_complex(result->get_local()); + this->get_local_vector()->make_complex(&result->local_); } @@ -242,10 +234,10 @@ template std::unique_ptr::real_type> Vector::get_real() const { - auto result = - real_type::create(this->get_executor(), this->get_communicator(), - this->get_size(), this->get_const_local()->get_size(), - this->get_const_local()->get_stride()); + auto result = real_type::create(this->get_executor(), + this->get_communicator(), this->get_size(), + this->get_local_vector()->get_size(), + this->get_local_vector()->get_stride()); this->get_real(result.get()); return result; } @@ -254,7 +246,7 @@ Vector::get_real() const template void Vector::get_real(Vector::real_type* result) const { - this->get_const_local()->get_real(result->get_local()); + this->get_local_vector()->get_real(&result->local_); } @@ -262,10 +254,10 @@ template std::unique_ptr::real_type> Vector::get_imag() const { - auto result = - real_type::create(this->get_executor(), this->get_communicator(), - this->get_size(), this->get_const_local()->get_size(), - this->get_const_local()->get_stride()); + auto result = real_type::create(this->get_executor(), + this->get_communicator(), this->get_size(), + this->get_local_vector()->get_size(), + this->get_local_vector()->get_stride()); this->get_imag(result.get()); return result; } @@ -274,21 +266,21 @@ Vector::get_imag() const template void Vector::get_imag(Vector::real_type* result) const { - this->get_const_local()->get_imag(result->get_local()); + this->get_local_vector()->get_imag(&result->local_); } template void Vector::scale(const LinOp* alpha) { - this->get_local()->scale(alpha); + local_.scale(alpha); } template void Vector::inv_scale(const LinOp* alpha) { - this->get_local()->inv_scale(alpha); + local_.inv_scale(alpha); } @@ -296,7 +288,7 @@ template void Vector::add_scaled(const LinOp* alpha, const LinOp* b) { auto dense_b = as>(b); - this->get_local()->add_scaled(alpha, dense_b->get_const_local()); + local_.add_scaled(alpha, dense_b->get_local_vector()); } @@ -304,7 +296,7 @@ template void Vector::sub_scaled(const LinOp* alpha, const LinOp* b) { auto dense_b = as>(b); - this->get_local()->sub_scaled(alpha, dense_b->get_const_local()); + local_.sub_scaled(alpha, dense_b->get_local_vector()); } @@ -315,8 +307,8 @@ void Vector::compute_dot(const LinOp* b, LinOp* result) const const auto comm = this->get_communicator(); auto dense_res = make_temporary_clone(exec, as>(result)); - this->get_const_local()->compute_dot(as(b)->get_const_local(), - dense_res.get()); + this->get_local_vector()->compute_dot(as(b)->get_local_vector(), + dense_res.get()); exec->synchronize(); auto use_host_buffer = exec->get_master() != exec && !gko::mpi::is_gpu_aware(); @@ -340,8 +332,8 @@ void Vector::compute_conj_dot(const LinOp* b, LinOp* result) const const auto comm = this->get_communicator(); auto dense_res = make_temporary_clone(exec, as>(result)); - this->get_const_local()->compute_conj_dot(as(b)->get_const_local(), - dense_res.get()); + this->get_local_vector()->compute_conj_dot( + as(b)->get_local_vector(), dense_res.get()); exec->synchronize(); auto use_host_buffer = exec->get_master() != exec && !gko::mpi::is_gpu_aware(); @@ -366,7 +358,7 @@ void Vector::compute_norm2(LinOp* result) const auto exec = this->get_executor(); const auto comm = this->get_communicator(); auto dense_res = make_temporary_clone(exec, as(result)); - exec->run(vector::make_compute_squared_norm2(this->get_const_local(), + exec->run(vector::make_compute_squared_norm2(this->get_local_vector(), dense_res.get())); exec->synchronize(); auto use_host_buffer = @@ -393,7 +385,7 @@ void Vector::compute_norm1(LinOp* result) const auto exec = this->get_executor(); const auto comm = this->get_communicator(); auto dense_res = make_temporary_clone(exec, as(result)); - this->get_const_local()->compute_norm1(dense_res.get()); + this->get_local_vector()->compute_norm1(dense_res.get()); exec->synchronize(); auto use_host_buffer = exec->get_master() != exec && !gko::mpi::is_gpu_aware(); @@ -411,26 +403,38 @@ void Vector::compute_norm1(LinOp* result) const template -void Vector::resize(dim<2> global_size, dim<2> local_size) +ValueType& Vector::at_local(size_type row, size_type col) noexcept { - if (this->get_size() != global_size) { - this->set_size(global_size); - } - this->get_local()->resize(local_size); + return local_.at(row, col); } +template +ValueType Vector::at_local(size_type row, + size_type col) const noexcept +{ + return local_.at(row, col); +} template -std::unique_ptr::real_type> -Vector::create_real_view() +ValueType& Vector::at_local(size_type idx) noexcept { - const auto num_global_rows = this->get_size()[0]; - const auto num_cols = - is_complex() ? 2 * this->get_size()[1] : this->get_size()[1]; + return local_.at(idx); +} - return real_type::create(this->get_executor(), this->get_communicator(), - dim<2>{num_global_rows, num_cols}, - local_.create_real_view().get()); +template +ValueType Vector::at_local(size_type idx) const noexcept +{ + return local_.at(idx); +} + + +template +void Vector::resize(dim<2> global_size, dim<2> local_size) +{ + if (this->get_size() != global_size) { + this->set_size(global_size); + } + local_.resize(local_size); } diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index 885edd2e776..9897834e92e 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -66,7 +66,7 @@ namespace distributed { * Using this approach the size of the global vectors, as well as the size of * the local vectors, will be automatically inferred. It is possible to create a * vector with specified global and local sizes and fill the local vectors using - * the accessor get_local. + * the accessor get_local_vector. * * @note Operations between two vectors (axpy, dot product, etc.) are only valid * if both vectors where created using the same partition. @@ -86,6 +86,7 @@ class Vector friend class EnableCreateMethod>; friend class EnablePolymorphicObject, LinOp>; friend class Vector>; + friend class Vector>; friend class Vector>; public: @@ -267,19 +268,65 @@ class Vector void compute_norm1(LinOp* result) const; /** - * Direct (read) access to the underlying local local_vector_type vectors. + * Returns a single element of the multi-vector. * - * @return a constant pointer to the underlying local_vector_type vectors + * @param row the local row of the requested element + * @param col the local column of the requested element + * + * @note the method has to be called on the same Executor the multi-vector + * is stored at (e.g. trying to call this method on a GPU multi-vector from + * the OMP results in a runtime error) */ - const local_vector_type* get_const_local() const; + value_type& at_local(size_type row, size_type col) noexcept; - /* - * Direct (read/write) access to the underlying local_vector_type Dense - * vectors. + /** + * @copydoc Vector::at(size_type, size_type) + */ + value_type at_local(size_type row, size_type col) const noexcept; + + /** + * Returns a single element of the multi-vector. + * + * Useful for iterating across all elements of the multi-vector. + * However, it is less efficient than the two-parameter variant of this + * method. + * + * @param idx a linear index of the requested element + * (ignoring the stride) + * + * @note the method has to be called on the same Executor the matrix is + * stored at (e.g. trying to call this method on a GPU matrix from + * the OMP results in a runtime error) + */ + ValueType& at_local(size_type idx) noexcept; + + /** + * @copydoc Vector::at(size_type) + */ + ValueType at_local(size_type idx) const noexcept; + + /** + * Returns a pointer to the array of local values of the multi-vector. + * + * @return the pointer to the array of local values + */ + value_type* get_local_values(); + + /** + * @copydoc get_local_values() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const value_type* get_const_local_values(); + + /** + * Direct (read) access to the underlying local local_vector_type vectors. * * @return a constant pointer to the underlying local_vector_type vectors */ - local_vector_type* get_local(); + const local_vector_type* get_local_vector() const; /** * Create a real view of the (potentially) complex original multi-vector. @@ -288,11 +335,6 @@ class Vector * real with a reinterpret_cast with twice the number of columns and * double the stride. */ - std::unique_ptr create_real_view(); - - /** - * @copydoc create_real_view() - */ std::unique_ptr create_real_view() const; protected: diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/distributed/vector.cpp index 5dd2fbff85f..0ef7f103c4e 100644 --- a/test/mpi/distributed/vector.cpp +++ b/test/mpi/distributed/vector.cpp @@ -161,9 +161,9 @@ TYPED_TEST(VectorCreation, CanReadGlobalMatrixData) vec->read_distributed(this->md, this->part.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), gko::dim<2>(2, 2)); - GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); + GKO_ASSERT_MTX_NEAR(vec->get_local_vector(), ref_data[rank], 0.0); } @@ -180,14 +180,14 @@ TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataSomeEmpty) GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); if (rank == 1) { - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_MTX_NEAR( - vec->get_local(), + vec->get_local_vector(), l({{0., 1.}, {2., 3.}, {4., 5.}, {6., 7.}, {8., 9.}, {10., 11.}}), 0.0); } else { - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), gko::dim<2>(0, 2)); } } @@ -218,9 +218,9 @@ TYPED_TEST(VectorCreation, CanReadGlobalDeviceMatrixData) vec->read_distributed(md, part.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), gko::dim<2>(2, 2)); - GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); + GKO_ASSERT_MTX_NEAR(vec->get_local_vector(), ref_data[rank], 0.0); } @@ -244,8 +244,8 @@ TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataScattered) vec->read_distributed(md, part.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), ref_size[rank]); - GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), ref_size[rank]); + GKO_ASSERT_MTX_NEAR(vec->get_local_vector(), ref_data[rank], 0.0); } @@ -271,9 +271,9 @@ TYPED_TEST(VectorCreation, CanReadLocalMatrixData) vec->read_distributed(md[rank], part.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), gko::dim<2>(2, 2)); - GKO_ASSERT_MTX_NEAR(vec->get_local(), ref_data[rank], 0.0); + GKO_ASSERT_MTX_NEAR(vec->get_local_vector(), ref_data[rank], 0.0); } @@ -302,15 +302,15 @@ TYPED_TEST(VectorCreation, CanReadLocalMatrixDataSomeEmpty) GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); if (rank == 1) { - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), gko::dim<2>(6, 2)); GKO_ASSERT_MTX_NEAR( - vec->get_local(), + vec->get_local_vector(), I>( {{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}), 0.0); } else { - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local()->get_size(), + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), gko::dim<2>(0, 2)); } } @@ -331,7 +331,7 @@ TYPED_TEST(VectorCreation, CanCreateFromLocalVectorAndSize) local_vec.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec, gko::dim<2>(6, 2)); - GKO_ASSERT_MTX_NEAR(vec->get_local(), clone_local_vec, 0); + GKO_ASSERT_MTX_NEAR(vec->get_local_vector(), clone_local_vec, 0); } @@ -346,7 +346,7 @@ TYPED_TEST(VectorCreation, CanCreateFromLocalVectorWithoutSize) auto vec = dist_vec_type::create(this->exec, this->comm, local_vec.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec, gko::dim<2>(6, 2)); - GKO_ASSERT_MTX_NEAR(vec->get_local(), clone_local_vec, 0); + GKO_ASSERT_MTX_NEAR(vec->get_local_vector(), clone_local_vec, 0); } @@ -717,7 +717,7 @@ TYPED_TEST(VectorLocalOps, ConvertsToPrecision) this->local_x->convert_to(local_tmp.get()); this->x->convert_to(tmp.get()); - GKO_ASSERT_MTX_NEAR(tmp->get_local(), local_tmp, 0.0); + GKO_ASSERT_MTX_NEAR(tmp->get_local_vector(), local_tmp, 0.0); } @@ -734,7 +734,7 @@ TYPED_TEST(VectorLocalOps, MovesToPrecision) this->local_x->move_to(local_tmp.get()); this->x->move_to(tmp.get()); - GKO_ASSERT_MTX_NEAR(tmp->get_local(), local_tmp, 0.0); + GKO_ASSERT_MTX_NEAR(tmp->get_local_vector(), local_tmp, 0.0); } @@ -746,7 +746,7 @@ TYPED_TEST(VectorLocalOps, ComputeAbsoluteSameAsLocal) auto local_abs = this->local_x->compute_absolute(); auto abs = this->x->compute_absolute(); - GKO_ASSERT_MTX_NEAR(abs->get_local(), local_abs, r::value); + GKO_ASSERT_MTX_NEAR(abs->get_local_vector(), local_abs, r::value); } @@ -758,7 +758,7 @@ TYPED_TEST(VectorLocalOps, ComputeAbsoluteInplaceSameAsLocal) this->local_x->compute_absolute_inplace(); this->x->compute_absolute_inplace(); - GKO_ASSERT_MTX_NEAR(this->x->get_local(), this->local_x, + GKO_ASSERT_MTX_NEAR(this->x->get_local_vector(), this->local_x, r::value); } @@ -771,7 +771,7 @@ TYPED_TEST(VectorLocalOps, MakeComplexSameAsLocal) this->complex = this->x->make_complex(); this->local_complex = this->local_x->make_complex(); - GKO_ASSERT_MTX_NEAR(this->complex->get_local(), this->local_complex, 0.0); + GKO_ASSERT_MTX_NEAR(this->complex->get_local_vector(), this->local_complex, 0.0); } @@ -783,7 +783,7 @@ TYPED_TEST(VectorLocalOps, MakeComplexInplaceSameAsLocal) this->x->make_complex(this->complex.get()); this->local_x->make_complex(this->local_complex.get()); - GKO_ASSERT_MTX_NEAR(this->complex->get_local(), this->local_complex, 0.0); + GKO_ASSERT_MTX_NEAR(this->complex->get_local_vector(), this->local_complex, 0.0); } @@ -795,7 +795,7 @@ TYPED_TEST(VectorLocalOps, GetRealSameAsLocal) this->real = this->complex->get_real(); this->local_real = this->local_complex->get_real(); - GKO_ASSERT_MTX_NEAR(this->real->get_local(), this->local_real, 0.0); + GKO_ASSERT_MTX_NEAR(this->real->get_local_vector(), this->local_real, 0.0); } @@ -807,7 +807,7 @@ TYPED_TEST(VectorLocalOps, GetRealInplaceSameAsLocal) this->complex->get_real(this->real.get()); this->local_complex->get_real(this->local_real.get()); - GKO_ASSERT_MTX_NEAR(this->real->get_local(), this->local_real, 0.0); + GKO_ASSERT_MTX_NEAR(this->real->get_local_vector(), this->local_real, 0.0); } @@ -818,7 +818,7 @@ TYPED_TEST(VectorLocalOps, GetImagSameAsLocal) this->real = this->complex->get_imag(); this->local_real = this->local_complex->get_imag(); - GKO_ASSERT_MTX_NEAR(this->real->get_local(), this->local_real, 0.0); + GKO_ASSERT_MTX_NEAR(this->real->get_local_vector(), this->local_real, 0.0); } @@ -829,7 +829,7 @@ TYPED_TEST(VectorLocalOps, GetImagInplaceSameAsLocal) this->complex->get_imag(this->real.get()); this->local_complex->get_imag(this->local_real.get()); - GKO_ASSERT_MTX_NEAR(this->real->get_local(), this->local_real, 0.0); + GKO_ASSERT_MTX_NEAR(this->real->get_local_vector(), this->local_real, 0.0); } @@ -844,7 +844,7 @@ TYPED_TEST(VectorLocalOps, FillSameAsLocal) this->x->fill(value); this->local_x->fill(value); - GKO_ASSERT_MTX_NEAR(this->x->get_local(), this->local_x, 0.0); + GKO_ASSERT_MTX_NEAR(this->x->get_local_vector(), this->local_x, 0.0); } @@ -856,7 +856,7 @@ TYPED_TEST(VectorLocalOps, ScaleSameAsLocal) this->x->scale(this->alpha.get()); this->local_x->scale(this->alpha.get()); - GKO_ASSERT_MTX_NEAR(this->x->get_local(), this->local_x, + GKO_ASSERT_MTX_NEAR(this->x->get_local_vector(), this->local_x, r::value); } @@ -869,7 +869,7 @@ TYPED_TEST(VectorLocalOps, InvScaleSameAsLocal) this->x->inv_scale(this->alpha.get()); this->local_x->inv_scale(this->alpha.get()); - GKO_ASSERT_MTX_NEAR(this->x->get_local(), this->local_x, + GKO_ASSERT_MTX_NEAR(this->x->get_local_vector(), this->local_x, r::value); } @@ -882,7 +882,7 @@ TYPED_TEST(VectorLocalOps, AddScaleSameAsLocal) this->x->add_scaled(this->alpha.get(), this->y.get()); this->local_x->add_scaled(this->alpha.get(), this->local_y.get()); - GKO_ASSERT_MTX_NEAR(this->x->get_local(), this->local_x, + GKO_ASSERT_MTX_NEAR(this->x->get_local_vector(), this->local_x, r::value); } @@ -895,7 +895,7 @@ TYPED_TEST(VectorLocalOps, SubScaleSameAsLocal) this->x->sub_scaled(this->alpha.get(), this->y.get()); this->local_x->sub_scaled(this->alpha.get(), this->local_y.get()); - GKO_ASSERT_MTX_NEAR(this->x->get_local(), this->local_x, + GKO_ASSERT_MTX_NEAR(this->x->get_local_vector(), this->local_x, r::value); } @@ -908,10 +908,10 @@ TYPED_TEST(VectorLocalOps, CreateRealViewSameAsLocal) auto local_rv = this->local_x->create_real_view(); GKO_ASSERT_EQUAL_ROWS(rv, this->x); - GKO_ASSERT_EQUAL_ROWS(rv->get_local(), local_rv); - GKO_ASSERT_EQUAL_COLS(rv->get_local(), local_rv); - EXPECT_EQ(rv->get_const_local()->get_stride(), local_rv->get_stride()); - GKO_ASSERT_MTX_NEAR(rv->get_const_local(), local_rv, 0.0); + GKO_ASSERT_EQUAL_ROWS(rv->get_local_vector(), local_rv); + GKO_ASSERT_EQUAL_COLS(rv->get_local_vector(), local_rv); + EXPECT_EQ(rv->get_local_vector()->get_stride(), local_rv->get_stride()); + GKO_ASSERT_MTX_NEAR(rv->get_local_vector(), local_rv, 0.0); } From ca079576c38e58bcb603fdd10de79b9c5170fd24 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 9 Mar 2022 10:55:29 +0100 Subject: [PATCH 37/38] fixes sonarcloud issues where it makes sense --- core/distributed/vector.cpp | 4 +-- include/ginkgo/core/distributed/base.hpp | 10 ++++++ include/ginkgo/core/distributed/vector.hpp | 8 +++++ omp/distributed/vector_kernels.cpp | 6 ++-- reference/distributed/vector_kernels.cpp | 6 ++-- test/mpi/distributed/vector.cpp | 40 +++++++++------------- 6 files changed, 45 insertions(+), 29 deletions(-) diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp index 6607a6bc67f..322676c6ec0 100644 --- a/core/distributed/vector.cpp +++ b/core/distributed/vector.cpp @@ -409,8 +409,8 @@ ValueType& Vector::at_local(size_type row, size_type col) noexcept } template -ValueType Vector::at_local(size_type row, - size_type col) const noexcept +ValueType Vector::at_local(size_type row, size_type col) const + noexcept { return local_.at(row, col); } diff --git a/include/ginkgo/core/distributed/base.hpp b/include/ginkgo/core/distributed/base.hpp index 6bbdbdf7a2d..217b80d3fcc 100644 --- a/include/ginkgo/core/distributed/base.hpp +++ b/include/ginkgo/core/distributed/base.hpp @@ -60,12 +60,22 @@ class DistributedBase { public: virtual ~DistributedBase() = default; + DistributedBase(const DistributedBase& other) = default; + + DistributedBase(DistributedBase&& other) = default; + /** * Copy assignment that doesn't change the used mpi::communicator. * @return unmodified *this */ DistributedBase& operator=(const DistributedBase&) { return *this; } + /** + * Move assignment that doesn't change the used mpi::communicator. + * @return unmodified *this + */ + DistributedBase& operator=(DistributedBase&&) noexcept { return *this; } + /** * Access the used mpi::communicator. * @return used mpi::communicator diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp index 9897834e92e..6f9c9ea44ff 100644 --- a/include/ginkgo/core/distributed/vector.hpp +++ b/include/ginkgo/core/distributed/vector.hpp @@ -370,6 +370,10 @@ class Vector /** * Creates a distributed vector from local vectors with a specified size. * + * @note The data form the local_vector will be moved into the new + * distributed vector. This means, access to local_vector + * will be invalid after this call. + * * @param exec Executor associated with this vector * @param comm Communicator associated with this vector * @param global_size The global size of the vector @@ -384,6 +388,10 @@ class Vector * be deduced from the local sizes, which will incur a collective * communication. * + * @note The data form the local_vector will be moved into the new + * distributed vector. This means, access to local_vector + * will be invalid after this call. + * * @param exec Executor associated with this vector * @param comm Communicator associated with this vector * @param local_vector The underlying local vector, the data will be moved diff --git a/omp/distributed/vector_kernels.cpp b/omp/distributed/vector_kernels.cpp index aa716e4f778..8aaba548912 100644 --- a/omp/distributed/vector_kernels.cpp +++ b/omp/distributed/vector_kernels.cpp @@ -57,7 +57,8 @@ void build_local( auto range_starting_indices = partition->get_range_starting_indices(); auto num_ranges = partition->get_num_ranges(); - auto find_range = [&](GlobalIndexType idx, size_type hint) { + auto find_range = [range_bounds, num_ranges](GlobalIndexType idx, + size_type hint) { if (range_bounds[hint] <= idx && idx < range_bounds[hint + 1]) { return hint; } else { @@ -66,7 +67,8 @@ void build_local( return static_cast(std::distance(range_bounds + 1, it)); } }; - auto map_to_local = [&](GlobalIndexType idx, + auto map_to_local = [range_bounds, range_starting_indices]( + GlobalIndexType idx, size_type range_id) -> LocalIndexType { return static_cast(idx - range_bounds[range_id]) + range_starting_indices[range_id]; diff --git a/reference/distributed/vector_kernels.cpp b/reference/distributed/vector_kernels.cpp index d9eb90f80ff..a818b73be04 100644 --- a/reference/distributed/vector_kernels.cpp +++ b/reference/distributed/vector_kernels.cpp @@ -57,7 +57,8 @@ void build_local( auto range_starting_indices = partition->get_range_starting_indices(); auto num_ranges = partition->get_num_ranges(); - auto find_range = [&](GlobalIndexType idx, size_type hint) { + auto find_range = [range_bounds, num_ranges](GlobalIndexType idx, + size_type hint) { if (range_bounds[hint] <= idx && idx < range_bounds[hint + 1]) { return hint; } else { @@ -66,7 +67,8 @@ void build_local( return static_cast(std::distance(range_bounds + 1, it)); } }; - auto map_to_local = [&](GlobalIndexType idx, + auto map_to_local = [range_bounds, range_starting_indices]( + GlobalIndexType idx, size_type range_id) -> LocalIndexType { return static_cast(idx - range_bounds[range_id]) + range_starting_indices[range_id]; diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/distributed/vector.cpp index 0ef7f103c4e..0200ffbfee2 100644 --- a/test/mpi/distributed/vector.cpp +++ b/test/mpi/distributed/vector.cpp @@ -148,7 +148,6 @@ TYPED_TEST_SUITE(VectorCreation, gko::test::ValueLocalGlobalIndexTypes); TYPED_TEST(VectorCreation, CanReadGlobalMatrixData) { - using part_type = typename TestFixture::part_type; using value_type = typename TestFixture::value_type; auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); auto rank = this->comm.rank(); @@ -170,7 +169,6 @@ TYPED_TEST(VectorCreation, CanReadGlobalMatrixData) TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataSomeEmpty) { using part_type = typename TestFixture::part_type; - using value_type = typename TestFixture::value_type; auto part = gko::share(part_type::build_from_contiguous( this->exec, {this->exec, {0, 0, 6, 6}})); auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); @@ -195,21 +193,23 @@ TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataSomeEmpty) TYPED_TEST(VectorCreation, CanReadGlobalDeviceMatrixData) { - using it = typename TestFixture::global_index_type; + using index_type = typename TestFixture::global_index_type; using d_md_type = typename TestFixture::d_md_type; using part_type = typename TestFixture::part_type; - using vt = typename TestFixture::value_type; + using value_type = typename TestFixture::value_type; d_md_type md{ this->exec, gko::dim<2>{6, 2}, - gko::Array{this->exec, I{0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5}}, - gko::Array{this->exec, I{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}}, - gko::Array{this->exec, - I{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}}}; + gko::Array{ + this->exec, I{0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5}}, + gko::Array{ + this->exec, I{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}}, + gko::Array{ + this->exec, I{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}}}; auto part = gko::share(part_type::build_from_contiguous( this->exec, {this->exec, {0, 2, 4, 6}})); auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); auto rank = this->comm.rank(); - I> ref_data[3] = { + I> ref_data[3] = { {{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}, {{8, 9}, {10, 11}}, @@ -244,7 +244,8 @@ TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataScattered) vec->read_distributed(md, part.get()); GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); - GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), ref_size[rank]); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), + ref_size[rank]); GKO_ASSERT_MTX_NEAR(vec->get_local_vector(), ref_data[rank], 0.0); } @@ -358,7 +359,6 @@ class VectorReductions : public ::testing::Test { using global_index_type = gko::int64; using part_type = gko::distributed::Partition; - using md_type = gko::matrix_data; using dist_vec_type = gko::distributed::Vector; using dense_type = gko::matrix::Dense; using real_dense_type = typename dense_type::real_type; @@ -518,7 +518,6 @@ TYPED_TEST(VectorReductions, ComputesNorm1IsSameAsDense) TYPED_TEST(VectorReductions, ComputeDotCopiesToHostOnlyIfNecessary) { - using value_type = typename TestFixture::value_type; this->init_result(); auto transfer_count_before = this->logger->get_transfer_count(); @@ -531,7 +530,6 @@ TYPED_TEST(VectorReductions, ComputeDotCopiesToHostOnlyIfNecessary) TYPED_TEST(VectorReductions, ComputeConjDotCopiesToHostOnlyIfNecessary) { - using value_type = typename TestFixture::value_type; this->init_result(); auto transfer_count_before = this->logger->get_transfer_count(); @@ -544,7 +542,6 @@ TYPED_TEST(VectorReductions, ComputeConjDotCopiesToHostOnlyIfNecessary) TYPED_TEST(VectorReductions, ComputeNorm2CopiesToHostOnlyIfNecessary) { - using value_type = typename TestFixture::value_type; this->init_result(); auto transfer_count_before = this->logger->get_transfer_count(); @@ -557,7 +554,6 @@ TYPED_TEST(VectorReductions, ComputeNorm2CopiesToHostOnlyIfNecessary) TYPED_TEST(VectorReductions, ComputeNorm1CopiesToHostOnlyIfNecessary) { - using value_type = typename TestFixture::value_type; this->init_result(); auto transfer_count_before = this->logger->get_transfer_count(); @@ -572,11 +568,8 @@ template class VectorLocalOps : public ::testing::Test { public: using value_type = ValueType; - using mixed_type = gko::next_precision; using local_index_type = gko::int32; using global_index_type = gko::int64; - using part_type = - gko::distributed::Partition; using dist_vec_type = gko::distributed::Vector; using complex_dist_vec_type = typename dist_vec_type::complex_type; using real_dist_vec_type = typename dist_vec_type ::real_type; @@ -706,7 +699,6 @@ TYPED_TEST(VectorLocalOps, AdvancedApplyNotSupported) TYPED_TEST(VectorLocalOps, ConvertsToPrecision) { - using Vector = typename TestFixture::dist_vec_type; using T = typename TestFixture::value_type; using OtherT = typename gko::next_precision; using OtherVector = typename gko::distributed::Vector; @@ -723,7 +715,6 @@ TYPED_TEST(VectorLocalOps, ConvertsToPrecision) TYPED_TEST(VectorLocalOps, MovesToPrecision) { - using Vector = typename TestFixture::dist_vec_type; using T = typename TestFixture::value_type; using OtherT = typename gko::next_precision; using OtherVector = typename gko::distributed::Vector; @@ -746,7 +737,8 @@ TYPED_TEST(VectorLocalOps, ComputeAbsoluteSameAsLocal) auto local_abs = this->local_x->compute_absolute(); auto abs = this->x->compute_absolute(); - GKO_ASSERT_MTX_NEAR(abs->get_local_vector(), local_abs, r::value); + GKO_ASSERT_MTX_NEAR(abs->get_local_vector(), local_abs, + r::value); } @@ -771,7 +763,8 @@ TYPED_TEST(VectorLocalOps, MakeComplexSameAsLocal) this->complex = this->x->make_complex(); this->local_complex = this->local_x->make_complex(); - GKO_ASSERT_MTX_NEAR(this->complex->get_local_vector(), this->local_complex, 0.0); + GKO_ASSERT_MTX_NEAR(this->complex->get_local_vector(), this->local_complex, + 0.0); } @@ -783,7 +776,8 @@ TYPED_TEST(VectorLocalOps, MakeComplexInplaceSameAsLocal) this->x->make_complex(this->complex.get()); this->local_x->make_complex(this->local_complex.get()); - GKO_ASSERT_MTX_NEAR(this->complex->get_local_vector(), this->local_complex, 0.0); + GKO_ASSERT_MTX_NEAR(this->complex->get_local_vector(), this->local_complex, + 0.0); } From 5edadcf7f9d9252cefd61f39483a267d0a5eceff Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Thu, 21 Apr 2022 11:29:56 +0000 Subject: [PATCH 38/38] Format files Co-authored-by: Marcel Koch --- test/matrix/dense_kernels.cpp | 4 ++-- test/mpi/distributed/vector.cpp | 15 +++++++++------ test/utils/executor.hpp | 12 ++++++++---- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp index 6183cfcc487..b00a9045085 100644 --- a/test/matrix/dense_kernels.cpp +++ b/test/matrix/dense_kernels.cpp @@ -1340,9 +1340,9 @@ TEST_F(Dense, ComputeNorm2SquaredIsEquivalentToRef) auto dnorm = NormVector::create(exec, norm_size); gko::kernels::reference::dense::compute_squared_norm2(ref, x.get(), - norm_expected.get()); + norm_expected.get()); gko::kernels::EXEC_NAMESPACE::dense::compute_squared_norm2(exec, dx.get(), - dnorm.get()); + dnorm.get()); GKO_ASSERT_MTX_NEAR(dnorm, norm_expected, r::value); } diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/distributed/vector.cpp index 0200ffbfee2..3f8abcc8422 100644 --- a/test/mpi/distributed/vector.cpp +++ b/test/mpi/distributed/vector.cpp @@ -89,12 +89,15 @@ class HostToDeviceLogger : public gko::log::Logger { template class VectorCreation : public ::testing::Test { public: - using value_type = typename std::tuple_element< - 0, decltype(ValueLocalGlobalIndexType())>::type; - using local_index_type = typename std::tuple_element< - 1, decltype(ValueLocalGlobalIndexType())>::type; - using global_index_type = typename std::tuple_element< - 2, decltype(ValueLocalGlobalIndexType())>::type; + using value_type = + typename std::tuple_element<0, decltype( + ValueLocalGlobalIndexType())>::type; + using local_index_type = + typename std::tuple_element<1, decltype( + ValueLocalGlobalIndexType())>::type; + using global_index_type = + typename std::tuple_element<2, decltype( + ValueLocalGlobalIndexType())>::type; using part_type = gko::distributed::Partition; using md_type = gko::matrix_data; diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp index 41b41460b40..9e28237f625 100644 --- a/test/utils/executor.hpp +++ b/test/utils/executor.hpp @@ -115,7 +115,8 @@ void init_executor(std::shared_ptr ref, gko::mpi::communicator comm) { ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); - auto device_id = comm.node_local_rank() % gko::CudaExecutor::get_num_devices(); + auto device_id = + comm.node_local_rank() % gko::CudaExecutor::get_num_devices(); exec = gko::CudaExecutor::create(device_id, ref); } @@ -125,7 +126,8 @@ void init_executor(std::shared_ptr ref, gko::mpi::communicator comm) { ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); - auto device_id = comm.node_local_rank() % gko::HipExecutor::get_num_devices(); + auto device_id = + comm.node_local_rank() % gko::HipExecutor::get_num_devices(); exec = gko::HipExecutor::create(device_id, ref); } @@ -135,10 +137,12 @@ void init_executor(std::shared_ptr ref, gko::mpi::communicator comm) { if (gko::DpcppExecutor::get_num_devices("gpu") > 0) { - auto device_id = comm.node_local_rank() % gko::DpcppExecutor::get_num_devices("gpu"); + auto device_id = + comm.node_local_rank() % gko::DpcppExecutor::get_num_devices("gpu"); exec = gko::DpcppExecutor::create(device_id, ref); } else if (gko::DpcppExecutor::get_num_devices("cpu") > 0) { - auto device_id = comm.node_local_rank() % gko::DpcppExecutor::get_num_devices("cpu"); + auto device_id = + comm.node_local_rank() % gko::DpcppExecutor::get_num_devices("cpu"); exec = gko::DpcppExecutor::create(device_id, ref); } else { FAIL() << "No suitable DPC++ devices";