diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index de5fd2bbcda..32162604869 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -163,6 +163,7 @@ ginkgo_build_test_name(${test_name} test_target_name) endfunction(ginkgo_create_hip_test) function(ginkgo_create_common_test test_name) + cmake_parse_arguments(PARSE_ARGV 1 common_test "" "" "DISABLE_EXECUTORS;ADDITIONAL_LIBRARIES") set(executors) if(GINKGO_BUILD_OMP) list(APPEND executors omp) @@ -176,6 +177,9 @@ function(ginkgo_create_common_test test_name) if(GINKGO_BUILD_DPCPP) list(APPEND executors dpcpp) endif() + foreach(disabled_exec ${common_test_DISABLE_EXECUTORS}) + list(REMOVE_ITEM executors ${disabled_exec}) + endforeach() foreach(exec ${executors}) ginkgo_build_test_name(${test_name} test_target_name) # build executor typename out of shorthand @@ -189,7 +193,7 @@ function(ginkgo_create_common_test test_name) target_compile_features(${test_target_name} PUBLIC cxx_std_14) target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=${exec_type} EXEC_NAMESPACE=${exec}) - target_link_libraries(${test_target_name} PRIVATE ${ARGN}) + target_link_libraries(${test_target_name} PRIVATE ${common_test_ADDITIONAL_LIBRARIES}) # use float for DPC++ if necessary if((exec STREQUAL "dpcpp") AND GINKGO_DPCPP_SINGLE_MODE) target_compile_definitions(${test_target_name} PRIVATE GINKGO_COMMON_SINGLE_MODE=1) @@ -209,4 +213,4 @@ function(ginkgo_create_common_and_reference_test test_name) target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=ReferenceExecutor EXEC_NAMESPACE=reference) target_link_libraries(${test_target_name} PRIVATE ${ARGN}) ginkgo_set_test_target_properties(${test_name}_reference ${test_target_name}) -endfunction() \ No newline at end of file +endfunction() diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 26115b184c0..cafffb708f6 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -5,6 +5,7 @@ set(UNIFIED_SOURCES components/fill_array_kernels.cpp components/precision_conversion_kernels.cpp components/reduce_array_kernels.cpp + distributed/partition_kernels.cpp matrix/coo_kernels.cpp matrix/csr_kernels.cpp matrix/dense_kernels.cpp diff --git a/common/cuda_hip/distributed/partition_kernels.hpp.inc b/common/cuda_hip/distributed/partition_kernels.hpp.inc new file mode 100644 index 00000000000..e8c0359533f --- /dev/null +++ b/common/cuda_hip/distributed/partition_kernels.hpp.inc @@ -0,0 +1,144 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +namespace kernel { + + +template +void setup_sizes_ids_permutation( + std::shared_ptr exec, size_type num_ranges, + comm_index_type num_parts, const GlobalIndexType* range_offsets, + const comm_index_type* range_parts, Array& range_sizes, + Array& part_ids, Array& permutation) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto num_ranges, auto num_parts, + auto range_offsets, auto range_parts, auto range_sizes, + auto part_ids, auto permutation) { + if (i == 0) { + // set sentinel value at the end + part_ids[num_ranges] = num_parts; + } + range_sizes[i] = range_offsets[i + 1] - range_offsets[i]; + part_ids[i] = range_parts[i]; + permutation[i] = static_cast(i); + }, + num_ranges, num_ranges, num_parts, range_offsets, range_parts, + range_sizes.get_data(), part_ids.get_data(), permutation.get_data()); +} + + +template +void compute_part_sizes_and_starting_indices( + std::shared_ptr exec, size_type num_ranges, + const Array& range_sizes, + const Array& part_ids, + const Array& permutation, LocalIndexType* starting_indices, + LocalIndexType* part_sizes) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto grouped_starting_indices, + auto grouped_part_ids, auto orig_idxs, + auto starting_indices, auto part_sizes) { + auto prev_part = i > 0 ? grouped_part_ids[i - 1] + : invalid_index(); + auto cur_part = grouped_part_ids[i]; + auto next_part = + grouped_part_ids[i + 1]; // last element has to be num_parts + if (cur_part != next_part) { + part_sizes[cur_part] = grouped_starting_indices[i]; + } + // write result shifted by one entry to get exclusive prefix sum + starting_indices[orig_idxs[i]] = + prev_part == cur_part ? grouped_starting_indices[i - 1] + : LocalIndexType{}; + }, + num_ranges, range_sizes.get_const_data(), part_ids.get_const_data(), + permutation.get_const_data(), starting_indices, part_sizes); +} + + +} // namespace kernel + + +template +void build_starting_indices(std::shared_ptr exec, + const GlobalIndexType* range_offsets, + const comm_index_type* range_parts, + size_type num_ranges, comm_index_type num_parts, + comm_index_type& num_empty_parts, + LocalIndexType* starting_indices, + LocalIndexType* part_sizes) +{ + if (num_ranges > 0) { + Array range_sizes{exec, num_ranges}; + // num_parts sentinel at the end + Array tmp_part_ids{exec, num_ranges + 1}; + Array permutation{exec, num_ranges}; + // set part_sizes to 0 in case of empty parts + components::fill_array(exec, part_sizes, num_parts, LocalIndexType{}); + + kernel::setup_sizes_ids_permutation( + exec, num_ranges, num_parts, range_offsets, range_parts, + range_sizes, tmp_part_ids, permutation); + + auto tmp_part_id_ptr = + thrust::device_pointer_cast(tmp_part_ids.get_data()); + auto range_sizes_ptr = + thrust::device_pointer_cast(range_sizes.get_data()); + auto permutation_ptr = + thrust::device_pointer_cast(permutation.get_data()); + auto value_it = thrust::make_zip_iterator( + thrust::make_tuple(range_sizes_ptr, permutation_ptr)); + // group range_sizes by part ID + thrust::stable_sort_by_key(thrust::device, tmp_part_id_ptr, + tmp_part_id_ptr + num_ranges, value_it); + // compute inclusive prefix sum for each part + thrust::inclusive_scan_by_key(thrust::device, tmp_part_id_ptr, + tmp_part_id_ptr + num_ranges, + range_sizes_ptr, range_sizes_ptr); + // write back the results + kernel::compute_part_sizes_and_starting_indices( + exec, num_ranges, range_sizes, tmp_part_ids, permutation, + starting_indices, part_sizes); + num_empty_parts = thrust::count(thrust::device, part_sizes, + part_sizes + num_parts, 0); + } else { + num_empty_parts = num_parts; + } +} + +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); diff --git a/common/unified/distributed/partition_kernels.cpp b/common/unified/distributed/partition_kernels.cpp new file mode 100644 index 00000000000..08d79e17f43 --- /dev/null +++ b/common/unified/distributed/partition_kernels.cpp @@ -0,0 +1,182 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/partition_kernels.hpp" + + +#include "common/unified/base/kernel_launch.hpp" +#include "common/unified/base/kernel_launch_reduction.hpp" +#include "core/components/prefix_sum_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace GKO_DEVICE_NAMESPACE { +namespace partition { + + +using distributed::comm_index_type; + +void count_ranges(std::shared_ptr exec, + const Array& mapping, size_type& num_ranges) +{ + Array result{exec, 1}; + run_kernel_reduction( + exec, + [] GKO_KERNEL(auto i, auto mapping) { + auto cur_part = mapping[i]; + auto prev_part = i == 0 ? comm_index_type{-1} : mapping[i - 1]; + return cur_part != prev_part ? 1 : 0; + }, + [] GKO_KERNEL(auto a, auto b) { return a + b; }, + [] GKO_KERNEL(auto a) { return a; }, size_type{}, result.get_data(), + mapping.get_num_elems(), mapping); + num_ranges = exec->copy_val_to_host(result.get_const_data()); +} + + +template +void build_from_contiguous(std::shared_ptr exec, + const Array& ranges, + GlobalIndexType* range_bounds, + comm_index_type* part_ids) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto ranges, auto bounds, auto ids) { + if (i == 0) { + bounds[0] = 0; + } + bounds[i + 1] = ranges[i + 1]; + ids[i] = i; + }, + ranges.get_num_elems() - 1, ranges, range_bounds, part_ids); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_CONTIGUOUS); + + +template +void build_from_mapping(std::shared_ptr exec, + const Array& mapping, + GlobalIndexType* range_bounds, + comm_index_type* part_ids) +{ + Array range_starting_index{exec, mapping.get_num_elems() + 1}; + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto mapping, auto range_starting_index) { + const auto prev_part = + i > 0 ? mapping[i - 1] : invalid_index(); + const auto cur_part = mapping[i]; + range_starting_index[i] = cur_part != prev_part ? 1 : 0; + }, + mapping.get_num_elems(), mapping, range_starting_index); + components::prefix_sum(exec, range_starting_index.get_data(), + mapping.get_num_elems() + 1); + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto size, auto mapping, + auto range_starting_index, auto ranges, + auto range_parts) { + const auto prev_part = + i > 0 ? mapping[i - 1] : invalid_index(); + const auto cur_part = + i < size ? mapping[i] : invalid_index(); + if (cur_part != prev_part) { + auto out_idx = range_starting_index[i]; + ranges[out_idx] = i; + if (i < size) { + range_parts[out_idx] = cur_part; + } + } + }, + mapping.get_num_elems() + 1, mapping.get_num_elems(), mapping, + range_starting_index, range_bounds, part_ids); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_MAPPING); + + +template +void build_ranges_from_global_size(std::shared_ptr exec, + comm_index_type num_parts, + GlobalIndexType global_size, + Array& ranges) +{ + const auto size_per_part = global_size / num_parts; + const auto rest = global_size - (num_parts * size_per_part); + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto size_per_part, auto rest, auto ranges) { + ranges[i] = size_per_part + (i < rest ? 1 : 0); + }, + ranges.get_num_elems() - 1, size_per_part, rest, ranges.get_data()); + components::prefix_sum(exec, ranges.get_data(), ranges.get_num_elems()); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE); + + +template +void has_ordered_parts( + std::shared_ptr exec, + const distributed::Partition* partition, + bool* result) +{ + const auto part_ids = partition->get_part_ids(); + const auto num_ranges = partition->get_num_ranges(); + // it is necessary to use uint32 as a temporary result, since + // bool can't be used with suffles + Array result_uint32{exec, 1}; + run_kernel_reduction( + exec, + [] GKO_KERNEL(auto i, const auto part_ids) { + return static_cast(part_ids[i] < part_ids[i + 1]); + }, + [] GKO_KERNEL(const auto a, const auto b) { + return static_cast(a && b); + }, + [] GKO_KERNEL(const auto a) { return a; }, uint32(1), + result_uint32.get_data(), num_ranges - 1, part_ids); + *result = static_cast( + exec->copy_val_to_host(result_uint32.get_const_data())); +} + +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_PARTITION_IS_ORDERED); + + +} // namespace partition +} // namespace GKO_DEVICE_NAMESPACE +} // namespace kernels +} // namespace gko diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 68b116315a4..3115e7a928b 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -12,6 +12,7 @@ target_sources(ginkgo base/mtx_io.cpp base/perturbation.cpp base/version.cpp + distributed/partition.cpp factorization/ic.cpp factorization/ilu.cpp factorization/par_ic.cpp diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index fa7221e2d09..bbd9a43161a 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/precision_conversion_kernels.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "core/components/reduce_array_kernels.hpp" +#include "core/distributed/partition_kernels.hpp" #include "core/factorization/factorization_kernels.hpp" #include "core/factorization/ic_kernels.hpp" #include "core/factorization/ilu_kernels.hpp" @@ -111,6 +112,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. _macro(IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(_macro) +#define GKO_STUB_LOCAL_GLOBAL_TYPE(_macro) \ + template \ + _macro(LocalIndexType, GlobalIndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ + GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(_macro) + #define GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE(_macro) \ template \ _macro(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ @@ -210,6 +216,20 @@ GKO_STUB_INDEX_TYPE(GKO_DECLARE_INDEX_SET_LOCAL_TO_GLOBAL_KERNEL); } // namespace index_set +namespace partition { + + +GKO_STUB(GKO_PARTITION_COUNT_RANGES); +GKO_STUB_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_CONTIGUOUS); +GKO_STUB_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_MAPPING); +GKO_STUB_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE); +GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); +GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); + + +} // namespace partition + + namespace dense { diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp new file mode 100644 index 00000000000..7de36e380db --- /dev/null +++ b/core/distributed/partition.cpp @@ -0,0 +1,145 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include "core/distributed/partition_kernels.hpp" + + +namespace gko { +namespace distributed { +namespace partition { + + +GKO_REGISTER_OPERATION(count_ranges, partition::count_ranges); +GKO_REGISTER_OPERATION(build_from_mapping, partition::build_from_mapping); +GKO_REGISTER_OPERATION(build_from_contiguous, partition::build_from_contiguous); +GKO_REGISTER_OPERATION(build_ranges_from_global_size, + partition::build_ranges_from_global_size); +GKO_REGISTER_OPERATION(build_starting_indices, + partition::build_starting_indices); +GKO_REGISTER_OPERATION(has_ordered_parts, partition::has_ordered_parts); + + +} // namespace partition + + +template +std::unique_ptr> +Partition::build_from_mapping( + std::shared_ptr exec, const Array& mapping, + comm_index_type num_parts) +{ + auto local_mapping = make_temporary_clone(exec, &mapping); + size_type num_ranges{}; + exec->run(partition::make_count_ranges(*local_mapping.get(), num_ranges)); + auto result = Partition::create(exec, num_parts, num_ranges); + exec->run(partition::make_build_from_mapping(*local_mapping.get(), + result->offsets_.get_data(), + result->part_ids_.get_data())); + result->finalize_construction(); + return result; +} + + +template +std::unique_ptr> +Partition::build_from_contiguous( + std::shared_ptr exec, const Array& ranges) +{ + auto local_ranges = make_temporary_clone(exec, &ranges); + auto result = Partition::create( + exec, static_cast(ranges.get_num_elems() - 1), + ranges.get_num_elems() - 1); + exec->run(partition::make_build_from_contiguous( + *local_ranges.get(), result->offsets_.get_data(), + result->part_ids_.get_data())); + result->finalize_construction(); + return result; +} + + +template +std::unique_ptr> +Partition::build_from_global_size_uniform( + std::shared_ptr exec, comm_index_type num_parts, + GlobalIndexType global_size) +{ + Array ranges(exec, num_parts + 1); + exec->run(partition::make_build_ranges_from_global_size( + num_parts, global_size, ranges)); + return Partition::build_from_contiguous(exec, ranges); +} + + +template +void Partition::finalize_construction() +{ + auto exec = offsets_.get_executor(); + exec->run(partition::make_build_starting_indices( + offsets_.get_const_data(), part_ids_.get_const_data(), get_num_ranges(), + get_num_parts(), num_empty_parts_, starting_indices_.get_data(), + part_sizes_.get_data())); + size_ = offsets_.get_executor()->copy_val_to_host( + offsets_.get_const_data() + get_num_ranges()); +} + + +template +bool Partition::has_connected_parts() +{ + return this->get_num_parts() - this->get_num_empty_parts() == + this->get_num_ranges(); +} + + +template +bool Partition::has_ordered_parts() +{ + if (this->has_connected_parts()) { + auto exec = this->get_executor(); + bool has_ordered_parts; + exec->run(partition::make_has_ordered_parts(this, &has_ordered_parts)); + return has_ordered_parts; + } else { + return false; + } +} + + +#define GKO_DECLARE_PARTITION(_local, _global) class Partition<_local, _global> +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(GKO_DECLARE_PARTITION); + + +} // namespace distributed +} // namespace gko diff --git a/core/distributed/partition_kernels.hpp b/core/distributed/partition_kernels.hpp new file mode 100644 index 00000000000..e3aa6c11a00 --- /dev/null +++ b/core/distributed/partition_kernels.hpp @@ -0,0 +1,113 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_DISTRIBUTED_PARTITION_KERNELS_HPP_ +#define GKO_CORE_DISTRIBUTED_PARTITION_KERNELS_HPP_ + + +#include + + +#include "core/base/kernel_declaration.hpp" + + +namespace gko { +namespace kernels { + + +#define GKO_PARTITION_COUNT_RANGES \ + void count_ranges(std::shared_ptr exec, \ + const Array& mapping, \ + size_type& num_ranges) + +#define GKO_PARTITION_BUILD_FROM_CONTIGUOUS(GlobalIndexType) \ + void build_from_contiguous(std::shared_ptr exec, \ + const Array& ranges, \ + GlobalIndexType* range_bounds, \ + comm_index_type* part_ids) + +#define GKO_PARTITION_BUILD_FROM_MAPPING(GlobalIndexType) \ + void build_from_mapping(std::shared_ptr exec, \ + const Array& mapping, \ + GlobalIndexType* range_bounds, \ + comm_index_type* part_ids) + +#define GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE(GlobalIndexType) \ + void build_ranges_from_global_size( \ + std::shared_ptr exec, \ + comm_index_type num_parts, GlobalIndexType global_size, \ + Array& ranges) + +#define GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES(LocalIndexType, \ + GlobalIndexType) \ + void build_starting_indices(std::shared_ptr exec, \ + const GlobalIndexType* range_offsets, \ + const int* range_parts, size_type num_ranges, \ + comm_index_type num_parts, \ + comm_index_type& num_empty_parts, \ + LocalIndexType* ranks, LocalIndexType* sizes) + +#define GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType, GlobalIndexType) \ + void has_ordered_parts( \ + std::shared_ptr exec, \ + const distributed::Partition* \ + partition, \ + bool* result) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + using comm_index_type = distributed::comm_index_type; \ + GKO_PARTITION_COUNT_RANGES; \ + template \ + GKO_PARTITION_BUILD_FROM_CONTIGUOUS(GlobalIndexType); \ + template \ + GKO_PARTITION_BUILD_FROM_MAPPING(GlobalIndexType); \ + template \ + GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE(GlobalIndexType); \ + template \ + GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES(LocalIndexType, \ + GlobalIndexType); \ + template \ + GKO_DECLARE_PARTITION_IS_ORDERED(LocalIndexType, GlobalIndexType) + +GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(partition, + GKO_DECLARE_ALL_AS_TEMPLATES); + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_DISTRIBUTED_PARTITION_KERNELS_HPP_ diff --git a/core/test/utils.hpp b/core/test/utils.hpp index 2bcae20d065..a848d51765a 100644 --- a/core/test/utils.hpp +++ b/core/test/utils.hpp @@ -85,6 +85,12 @@ using RealValueTypes = using IndexTypes = ::testing::Types; + +using LocalGlobalIndexTypes = + ::testing::Types, std::tuple, + std::tuple>; + + using PODTypes = #if GINKGO_DPCPP_SINGLE_MODE ::testing::Types; diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index c5e8a7f44b9..fd1ed8ca3a4 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -75,6 +75,7 @@ target_sources(ginkgo_cuda base/version.cpp components/device_matrix_data_kernels.cu components/prefix_sum_kernels.cu + distributed/partition_kernels.cu factorization/factorization_kernels.cu factorization/ic_kernels.cu factorization/ilu_kernels.cu diff --git a/cuda/base/kernel_launch.cuh b/cuda/base/kernel_launch.cuh index 5179a5cc27d..7166a817e13 100644 --- a/cuda/base/kernel_launch.cuh +++ b/cuda/base/kernel_launch.cuh @@ -79,23 +79,27 @@ template void run_kernel(std::shared_ptr exec, KernelFunction fn, size_type size, KernelArgs&&... args) { - gko::cuda::device_guard guard{exec->get_device_id()}; - constexpr auto block_size = default_block_size; - auto num_blocks = ceildiv(size, block_size); - generic_kernel_1d<<>>(static_cast(size), fn, - map_to_device(args)...); + if (size > 0) { + gko::cuda::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size, block_size); + generic_kernel_1d<<>>( + static_cast(size), fn, map_to_device(args)...); + } } template void run_kernel(std::shared_ptr exec, KernelFunction fn, dim<2> size, KernelArgs&&... args) { - gko::cuda::device_guard guard{exec->get_device_id()}; - constexpr auto block_size = default_block_size; - auto num_blocks = ceildiv(size[0] * size[1], block_size); - generic_kernel_2d<<>>(static_cast(size[0]), - static_cast(size[1]), - fn, map_to_device(args)...); + if (size[0] * size[1] > 0) { + gko::cuda::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size[0] * size[1], block_size); + generic_kernel_2d<<>>( + static_cast(size[0]), static_cast(size[1]), fn, + map_to_device(args)...); + } } diff --git a/cuda/base/kernel_launch_solver.cuh b/cuda/base/kernel_launch_solver.cuh index f4da60ddede..16f0540cff7 100644 --- a/cuda/base/kernel_launch_solver.cuh +++ b/cuda/base/kernel_launch_solver.cuh @@ -62,12 +62,14 @@ void run_kernel_solver(std::shared_ptr exec, KernelFunction fn, dim<2> size, size_type default_stride, KernelArgs&&... args) { - gko::cuda::device_guard guard{exec->get_device_id()}; - constexpr auto block_size = default_block_size; - auto num_blocks = ceildiv(size[0] * size[1], block_size); - generic_kernel_2d_solver<<>>( - static_cast(size[0]), static_cast(size[1]), - static_cast(default_stride), fn, map_to_device(args)...); + if (size[0] * size[1] > 0) { + gko::cuda::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size[0] * size[1], block_size); + generic_kernel_2d_solver<<>>( + static_cast(size[0]), static_cast(size[1]), + static_cast(default_stride), fn, map_to_device(args)...); + } } diff --git a/cuda/distributed/partition_kernels.cu b/cuda/distributed/partition_kernels.cu new file mode 100644 index 00000000000..2ab5bab2ebe --- /dev/null +++ b/cuda/distributed/partition_kernels.cu @@ -0,0 +1,59 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/partition_kernels.hpp" + + +#include +#include +#include +#include +#include + + +#include "common/unified/base/kernel_launch.hpp" +#include "core/components/fill_array_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { +namespace partition { + + +#include "common/cuda_hip/distributed/partition_kernels.hpp.inc" + + +} // namespace partition +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index 8122aec30cf..f896ac396a8 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -20,6 +20,7 @@ target_sources(ginkgo_dpcpp base/index_set_kernels.dp.cpp components/device_matrix_data_kernels.dp.cpp components/prefix_sum_kernels.dp.cpp + distributed/partition_kernels.dp.cpp factorization/ic_kernels.dp.cpp factorization/ilu_kernels.dp.cpp factorization/factorization_kernels.dp.cpp diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp new file mode 100644 index 00000000000..c52c31f5241 --- /dev/null +++ b/dpcpp/distributed/partition_kernels.dp.cpp @@ -0,0 +1,60 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/partition_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +namespace partition { + + +// TODO: wait until https://github.com/oneapi-src/oneDPL/pull/388 is release to +// implement it similar to cuda/hip +template +void build_starting_indices(std::shared_ptr exec, + const GlobalIndexType* range_offsets, + const comm_index_type* range_parts, + size_type num_ranges, comm_index_type num_parts, + comm_index_type& num_empty_parts, + LocalIndexType* starting_indices, + LocalIndexType* part_sizes) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); + + +} // namespace partition +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index 3124ab0775c..55b2c4605b8 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -160,6 +160,7 @@ set(GINKGO_HIP_SOURCES base/version.hip.cpp components/device_matrix_data_kernels.hip.cpp components/prefix_sum_kernels.hip.cpp + distributed/partition_kernels.hip.cpp factorization/factorization_kernels.hip.cpp factorization/ic_kernels.hip.cpp factorization/ilu_kernels.hip.cpp diff --git a/hip/base/kernel_launch.hip.hpp b/hip/base/kernel_launch.hip.hpp index 6c627838fea..7831b5925a2 100644 --- a/hip/base/kernel_launch.hip.hpp +++ b/hip/base/kernel_launch.hip.hpp @@ -82,23 +82,29 @@ template void run_kernel(std::shared_ptr exec, KernelFunction fn, size_type size, KernelArgs&&... args) { - gko::hip::device_guard guard{exec->get_device_id()}; - constexpr auto block_size = default_block_size; - auto num_blocks = ceildiv(size, block_size); - hipLaunchKernelGGL(generic_kernel_1d, num_blocks, block_size, 0, 0, - static_cast(size), fn, map_to_device(args)...); + if (size > 0) { + gko::hip::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size, block_size); + hipLaunchKernelGGL(generic_kernel_1d, num_blocks, block_size, 0, 0, + static_cast(size), fn, + map_to_device(args)...); + } } template void run_kernel(std::shared_ptr exec, KernelFunction fn, dim<2> size, KernelArgs&&... args) { - gko::hip::device_guard guard{exec->get_device_id()}; - constexpr auto block_size = default_block_size; - auto num_blocks = ceildiv(size[0] * size[1], block_size); - hipLaunchKernelGGL(generic_kernel_2d, num_blocks, block_size, 0, 0, - static_cast(size[0]), static_cast(size[1]), - fn, map_to_device(args)...); + if (size[0] * size[1] > 0) { + gko::hip::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size[0] * size[1], block_size); + hipLaunchKernelGGL(generic_kernel_2d, num_blocks, block_size, 0, 0, + static_cast(size[0]), + static_cast(size[1]), fn, + map_to_device(args)...); + } } diff --git a/hip/base/kernel_launch_solver.hip.hpp b/hip/base/kernel_launch_solver.hip.hpp index 9798f6c4fbc..46abe82c415 100644 --- a/hip/base/kernel_launch_solver.hip.hpp +++ b/hip/base/kernel_launch_solver.hip.hpp @@ -65,14 +65,16 @@ void run_kernel_solver(std::shared_ptr exec, KernelFunction fn, dim<2> size, size_type default_stride, KernelArgs&&... args) { - gko::hip::device_guard guard{exec->get_device_id()}; - constexpr auto block_size = kernels::hip::default_block_size; - auto num_blocks = ceildiv(size[0] * size[1], block_size); - hipLaunchKernelGGL(kernels::hip::generic_kernel_2d_solver, num_blocks, - block_size, 0, 0, static_cast(size[0]), - static_cast(size[1]), - static_cast(default_stride), fn, - kernels::hip::map_to_device(args)...); + if (size[0] * size[1] > 0) { + gko::hip::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = kernels::hip::default_block_size; + auto num_blocks = ceildiv(size[0] * size[1], block_size); + hipLaunchKernelGGL(kernels::hip::generic_kernel_2d_solver, num_blocks, + block_size, 0, 0, static_cast(size[0]), + static_cast(size[1]), + static_cast(default_stride), fn, + kernels::hip::map_to_device(args)...); + } } diff --git a/hip/distributed/partition_kernels.hip.cpp b/hip/distributed/partition_kernels.hip.cpp new file mode 100644 index 00000000000..c4d0044dc33 --- /dev/null +++ b/hip/distributed/partition_kernels.hip.cpp @@ -0,0 +1,59 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/partition_kernels.hpp" + + +#include +#include +#include +#include +#include + + +#include "common/unified/base/kernel_launch.hpp" +#include "core/components/fill_array_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +namespace partition { + + +#include "common/cuda_hip/distributed/partition_kernels.hpp.inc" + + +} // namespace partition +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/include/ginkgo/core/base/array.hpp b/include/ginkgo/core/base/array.hpp index bd3180cca85..e714697cc40 100644 --- a/include/ginkgo/core/base/array.hpp +++ b/include/ginkgo/core/base/array.hpp @@ -725,6 +725,25 @@ template void reduce_add(const Array& input_arr, Array& result); +/** + * Helper function to create an array view deducing the value type. + * + * @param exec the executor on which the array resides + * @param size the number of elements for the array + * @param data the pointer to the array we create a view on. + * + * @tparam ValueType the type of the array elements + * + * @return `Array::view(exec, size, data)` + */ +template +Array make_array_view(std::shared_ptr exec, + size_type size, ValueType* data) +{ + return Array::view(exec, size, data); +} + + namespace detail { diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index d30739d5161..4a46c3924c0 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -699,6 +699,33 @@ inline constexpr GKO_ATTRIBUTES IndexType invalid_index() } +namespace distributed { + + +/** + * Index type for enumerating processes in a distributed application + * + * Conforms to the MPI C interface of e.g. MPI rank or size + */ +using comm_index_type = int; + + +/** + * Instantiates a template for each valid combination of local and global index + * type + * + * @param _macro A macro which expands the template instantiation + * (not including the leading `template` specifier). + * Should take two arguments, where the first is replaced by the + * local index type and the second by the global index type. + */ +#define GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(_macro) \ + template _macro(int32, int32); \ + template _macro(int32, int64); \ + template _macro(int64, int64) + + +} // namespace distributed } // namespace gko diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp new file mode 100644 index 00000000000..36faf35bf37 --- /dev/null +++ b/include/ginkgo/core/distributed/partition.hpp @@ -0,0 +1,322 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_DISTRIBUTED_PARTITION_HPP_ +#define GKO_PUBLIC_CORE_DISTRIBUTED_PARTITION_HPP_ + + +#include +#include +#include + + +namespace gko { +namespace distributed { + + +/** + * Represents a partition of a range of indices [0, size) into a disjoint set of + * parts. The partition is stored as a set of consecutive ranges [begin, end) + * with an associated part ID and local index (number of indices in this part + * before `begin`). + * Global indices are stored as 64 bit signed integers (int64), part-local + * indices use LocalIndexType, Part IDs use 32 bit signed integers (int). + * + * For example, consider the interval [0, 13) that is partitioned into the + * following ranges: + * ``` + * [0,3), [3, 6), [6, 8), [8, 10), [10, 13). + * ``` + * These ranges are distributed on three part with: + * ``` + * p_0 = [0, 3) + [6, 8) + [10, 13), + * p_1 = [3, 6), + * p_2 = [8, 10). + * ``` + * The part ids can be queried from the @ref get_part_ids array, and the ranges + * are represented as offsets, accessed by @ref get_range_bounds, leading to the + * offset array: + * ``` + * r = [0, 3, 6, 8, 10, 13] + * ``` + * so that individual ranges are given by `[r[i], r[i + 1])`. + * Since each part may be associated with multiple ranges, it is possible to get + * the starting index for each range that is local to the owning part, see @ref + * get_range_starting_indices. These indices can be used to easily iterate over + * part local data. For example, the above partition has the following starting + * indices + * ``` + * starting_index[0] = 0, + * starting_index[1] = 0, + * starting_index[2] = 3, // second range of part 1 + * starting_index[3] = 0, + * starting_index[4] = 5, // third range of part 1 + * ``` + * which you can use to iterate only over the the second range of part 1 (the + * third global range) with + * ``` + * for(int i = 0; i < r[3] - r[2]; ++i){ + * data[starting_index[2] + i] = val; + * } + * + * @tparam LocalIndexType The index type used for part-local indices. + * To prevent overflows, no single part's size may + * exceed this index type's maximum value. + * @tparam GlobalIndexType The index type used for the global indices. Needs + * to be at least as large a type as LocalIndexType. + */ +template +class Partition + : public EnablePolymorphicObject< + Partition>, + public EnablePolymorphicAssignment< + Partition>, + public EnableCreateMethod> { + friend class EnableCreateMethod; + friend class EnablePolymorphicObject; + static_assert(sizeof(GlobalIndexType) >= sizeof(LocalIndexType), + "GlobalIndexType must be at least as large as " + "LocalIndexType"); + +public: + using EnableCreateMethod::create; + using EnablePolymorphicAssignment::convert_to; + using EnablePolymorphicAssignment::move_to; + + using local_index_type = LocalIndexType; + using global_index_type = GlobalIndexType; + + /** + * Returns the total number of elements represented by this partition. + * + * @return number elements. + */ + size_type get_size() const { return size_; } + + /** + * Returns the number of ranges stored by this partition. + * This size refers to the data returned by get_range_bounds(). + * + * @return number of ranges. + */ + size_type get_num_ranges() const noexcept + { + return offsets_.get_num_elems() - 1; + } + + /** + * Returns the number of parts represented in this partition. + * + * @return number of parts. + */ + comm_index_type get_num_parts() const noexcept { return num_parts_; } + + /** + * Returns the number of empty parts within this partition. + * + * @return number of empty parts. + */ + comm_index_type get_num_empty_parts() const noexcept + { + return num_empty_parts_; + } + + /** + * Returns the ranges boundary array stored by this partition. + * `range_bounds[i]` is the beginning (inclusive) and + * `range_bounds[i + 1]` is the end (exclusive) of the ith range. + * + * @return range boundaries array. + */ + const global_index_type* get_range_bounds() const noexcept + { + return offsets_.get_const_data(); + } + + /** + * Returns the part IDs of the ranges in this partition. + * For each range from get_range_bounds(), it stores the part ID in the + * interval [0, get_num_parts() - 1]. + * + * @return part ID array. + */ + const comm_index_type* get_part_ids() const noexcept + { + return part_ids_.get_const_data(); + } + + /** + * Returns the part-local starting index for each range in this partition. + * + * Consider the partition on `[0, 10)` with + * ``` + * p_1 = [0-4) + [7-10), + * p_2 = [4-7). + * ``` + * Then `range_starting_indices[0] = 0`, `range_starting_indices[1] = 0`, + * `range_starting_indices[2] = 4`. + * + * @return part-local starting index array. + */ + const local_index_type* get_range_starting_indices() const noexcept + { + return starting_indices_.get_const_data(); + } + + /** + * Returns the part size array. + * part_sizes[p] stores the total number of indices in part `p`. + * + * @return part size array. + */ + const local_index_type* get_part_sizes() const noexcept + { + return part_sizes_.get_const_data(); + } + + /** + * Returns the size of a part given by its part ID. + * @warning Triggers a copy from device to host. + * + * @param part the part ID. + * + * @return size of part. + */ + local_index_type get_part_size(comm_index_type part) const + { + return this->get_executor()->copy_val_to_host( + part_sizes_.get_const_data() + part); + } + + /** + * Checks if each part has no more than one contiguous range. + * + * @return true if each part has no more than one contiguous range. + */ + bool has_connected_parts(); + + /** + * Checks if the ranges are ordered by their part index. + * + * Implies that the partition is connected. + * + * @return true if the ranges are ordered by their part index. + */ + bool has_ordered_parts(); + + /** + * Builds a partition from a given mapping global_index -> part_id. + * + * @param exec the Executor on which the partition should be built + * @param mapping the mapping from global indices to part IDs. + * @param num_parts the number of parts used in the mapping. + * + * @return a Partition representing the given mapping as a set of ranges + */ + static std::unique_ptr build_from_mapping( + std::shared_ptr exec, + const Array& mapping, comm_index_type num_parts); + + /** + * Builds a partition consisting of contiguous ranges, one for each part. + * + * @param exec the Executor on which the partition should be built + * @param ranges the boundaries of the ranges representing each part. + * Part i contains the indices [ranges[i], ranges[i + 1]). + * Has to contain at least one element. + * The first element has to be 0. + * + * @return a Partition representing the given contiguous partitioning. + */ + static std::unique_ptr build_from_contiguous( + std::shared_ptr exec, + const Array& ranges); + + /** + * Builds a partition by evenly distributing the global range. + * + * @param exec the Executor on which the partition should be built + * @param num_parts the number of parst used in this partition + * @param global_size the global size of this partition + * + * @return a Partition where each range has either + * `floor(global_size/num_parts)` or `floor(global_size/num_parts) + 1` + * indices. + */ + static std::unique_ptr build_from_global_size_uniform( + std::shared_ptr exec, comm_index_type num_parts, + global_index_type global_size); + +private: + /** + * Creates a partition stored on the given executor with the given number of + * consecutive ranges and parts. + */ + Partition(std::shared_ptr exec, + comm_index_type num_parts = 0, size_type num_ranges = 0) + : EnablePolymorphicObject{exec}, + num_parts_{num_parts}, + num_empty_parts_{0}, + size_{0}, + offsets_{exec, num_ranges + 1}, + starting_indices_{exec, num_ranges}, + part_sizes_{exec, static_cast(num_parts)}, + part_ids_{exec, num_ranges} + { + offsets_.fill(0); + starting_indices_.fill(0); + part_sizes_.fill(0); + part_ids_.fill(0); + } + + /** + * Finalizes the construction in the create_* methods, by computing the + * range_starting_indices_ and part_sizes_ based on the current + * range_bounds_ and part_ids_, and setting size_ correctly. + */ + void finalize_construction(); + + comm_index_type num_parts_; + comm_index_type num_empty_parts_; + global_index_type size_; + Array offsets_; + Array starting_indices_; + Array part_sizes_; + Array part_ids_; +}; + + +} // namespace distributed +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_DISTRIBUTED_PARTITION_HPP_ diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index b4014ff6ccf..5c154644773 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -69,6 +69,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include + #include #include #include diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index 9492915892d..79b29e053c3 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -7,6 +7,7 @@ target_sources(ginkgo_omp base/version.cpp components/device_matrix_data_kernels.cpp components/prefix_sum_kernels.cpp + distributed/partition_kernels.cpp factorization/factorization_kernels.cpp factorization/ic_kernels.cpp factorization/ilu_kernels.cpp diff --git a/omp/distributed/partition_kernels.cpp b/omp/distributed/partition_kernels.cpp new file mode 100644 index 00000000000..d237d9783cd --- /dev/null +++ b/omp/distributed/partition_kernels.cpp @@ -0,0 +1,106 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/partition_kernels.hpp" + + +#include + + +#include + + +#include "core/base/allocator.hpp" + + +namespace gko { +namespace kernels { +namespace omp { +namespace partition { + + +template +void build_starting_indices(std::shared_ptr exec, + const GlobalIndexType* range_offsets, + const int* range_parts, size_type num_ranges, + int num_parts, int& num_empty_parts, + LocalIndexType* ranks, LocalIndexType* sizes) +{ + std::fill_n(sizes, num_parts, 0); + auto num_threads = static_cast(omp_get_max_threads()); + auto size_per_thread = + static_cast(ceildiv(num_ranges, num_threads)); + vector local_sizes(num_parts * num_threads, 0, {exec}); +#pragma omp parallel + { + auto thread_id = static_cast(omp_get_thread_num()); + auto thread_begin = size_per_thread * thread_id; + auto thread_end = std::min(num_ranges, thread_begin + size_per_thread); + auto base = num_parts * thread_id; + // local exclusive prefix sum + for (auto range = thread_begin; range < thread_end; range++) { + auto begin = range_offsets[range]; + auto end = range_offsets[range + 1]; + auto part = range_parts[range]; + ranks[range] = local_sizes[part + base]; + local_sizes[part + base] += end - begin; + } +#pragma omp barrier + // exclusive prefix sum over local sizes +#pragma omp for reduction(+ : num_empty_parts) + for (comm_index_type part = 0; part < num_parts; ++part) { + LocalIndexType size{}; + for (size_type thread = 0; thread < num_threads; ++thread) { + auto idx = num_parts * thread + part; + auto local_size = local_sizes[idx]; + local_sizes[idx] = size; + size += local_size; + } + sizes[part] = size; + num_empty_parts += size == 0 ? 1 : 0; + } + // add global baselines to local ranks + for (auto range = thread_begin; range < thread_end; range++) { + auto part = range_parts[range]; + ranks[range] += local_sizes[part + base]; + } + } +} + +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); + + +} // namespace partition +} // namespace omp +} // namespace kernels +} // namespace gko diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index 11787edc228..83f770bc941 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -9,6 +9,7 @@ target_sources(ginkgo_reference components/reduce_array_kernels.cpp components/precision_conversion_kernels.cpp components/prefix_sum_kernels.cpp + distributed/partition_kernels.cpp factorization/factorization_kernels.cpp factorization/ic_kernels.cpp factorization/ilu_kernels.cpp diff --git a/reference/distributed/partition_kernels.cpp b/reference/distributed/partition_kernels.cpp new file mode 100644 index 00000000000..c2f411f3c22 --- /dev/null +++ b/reference/distributed/partition_kernels.cpp @@ -0,0 +1,162 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/partition_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace reference { +namespace partition { + + +void count_ranges(std::shared_ptr exec, + const Array& mapping, size_type& num_ranges) +{ + num_ranges = 0; + comm_index_type prev_part{-1}; + for (size_type i = 0; i < mapping.get_num_elems(); i++) { + auto cur_part = mapping.get_const_data()[i]; + num_ranges += cur_part != prev_part; + prev_part = cur_part; + } +} + + +template +void build_from_contiguous(std::shared_ptr exec, + const Array& ranges, + GlobalIndexType* range_bounds, + comm_index_type* part_ids) +{ + range_bounds[0] = 0; + for (comm_index_type i = 0; i < ranges.get_num_elems() - 1; i++) { + auto end = ranges.get_const_data()[i + 1]; + range_bounds[i + 1] = end; + part_ids[i] = i; + } +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_CONTIGUOUS); + + +template +void build_from_mapping(std::shared_ptr exec, + const Array& mapping, + GlobalIndexType* range_bounds, + comm_index_type* part_ids) +{ + size_type range_idx{}; + comm_index_type range_part{-1}; + for (size_type i = 0; i < mapping.get_num_elems(); i++) { + auto cur_part = mapping.get_const_data()[i]; + if (cur_part != range_part) { + range_bounds[range_idx] = i; + part_ids[range_idx] = cur_part; + range_idx++; + range_part = cur_part; + } + } + range_bounds[range_idx] = + static_cast(mapping.get_num_elems()); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_MAPPING); + + +template +void build_ranges_from_global_size(std::shared_ptr exec, + comm_index_type num_parts, + GlobalIndexType global_size, + Array& ranges) +{ + const auto size_per_part = global_size / num_parts; + const auto rest = global_size - (num_parts * size_per_part); + + auto* ranges_ptr = ranges.get_data(); + + ranges_ptr[0] = 0; + for (int i = 1; i < num_parts + 1; ++i) { + ranges_ptr[i] = + ranges_ptr[i - 1] + size_per_part + ((i - 1) < rest ? 1 : 0); + } +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE); + + +template +void build_starting_indices(std::shared_ptr exec, + const GlobalIndexType* range_offsets, + const int* range_parts, size_type num_ranges, + int num_parts, int& num_empty_parts, + LocalIndexType* ranks, LocalIndexType* sizes) +{ + std::fill_n(sizes, num_parts, 0); + for (size_type range = 0; range < num_ranges; ++range) { + auto begin = range_offsets[range]; + auto end = range_offsets[range + 1]; + auto part = range_parts[range]; + auto rank = sizes[part]; + ranks[range] = rank; + sizes[part] += end - begin; + } + num_empty_parts = std::count(sizes, sizes + num_parts, 0); +} + +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES); + +template +void has_ordered_parts( + std::shared_ptr exec, + const distributed::Partition* partition, + bool* result) +{ + *result = true; + auto part_ids = partition->get_part_ids(); + + for (comm_index_type i = 1; i < partition->get_num_ranges(); ++i) { + if (part_ids[i] < part_ids[i - 1]) { + *result = false; + return; + } + } +} + +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_PARTITION_IS_ORDERED); + +} // namespace partition +} // namespace reference +} // namespace kernels +} // namespace gko diff --git a/reference/test/CMakeLists.txt b/reference/test/CMakeLists.txt index e145507be4a..b7cb46408b5 100644 --- a/reference/test/CMakeLists.txt +++ b/reference/test/CMakeLists.txt @@ -2,6 +2,7 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(base) add_subdirectory(components) +add_subdirectory(distributed) add_subdirectory(factorization) add_subdirectory(log) add_subdirectory(matrix) diff --git a/reference/test/distributed/CMakeLists.txt b/reference/test/distributed/CMakeLists.txt new file mode 100644 index 00000000000..78a626512af --- /dev/null +++ b/reference/test/distributed/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(partition_kernels) diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp new file mode 100644 index 00000000000..521736d4b42 --- /dev/null +++ b/reference/test/distributed/partition_kernels.cpp @@ -0,0 +1,295 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include + + +#include +#include + + +#include + + +#include "core/distributed/partition_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +using comm_index_type = gko::distributed::comm_index_type; + + +template +void assert_equal_data(const T* data, std::initializer_list reference_data) +{ + std::vector ref(std::move(reference_data)); + for (auto i = 0; i < ref.size(); ++i) { + EXPECT_EQ(data[i], ref[i]); + } +} + + +template +class Partition : public ::testing::Test { +protected: + using local_index_type = + typename std::tuple_element<0, decltype(LocalGlobalIndexType())>::type; + using global_index_type = + typename std::tuple_element<1, decltype(LocalGlobalIndexType())>::type; + using part_type = + gko::distributed::Partition; + + Partition() : ref(gko::ReferenceExecutor::create()) {} + + std::shared_ptr ref; +}; + +TYPED_TEST_SUITE(Partition, gko::test::LocalGlobalIndexTypes); + + +TYPED_TEST(Partition, BuildsFromMapping) +{ + using part_type = typename TestFixture::part_type; + gko::Array mapping{ + this->ref, {2, 2, 0, 1, 1, 2, 0, 0, 1, 0, 1, 1, 1, 2, 2, 0}}; + comm_index_type num_parts = 3; + gko::size_type num_ranges = 10; + + auto partition = + part_type::build_from_mapping(this->ref, mapping, num_parts); + + EXPECT_EQ(partition->get_size(), mapping.get_num_elems()); + EXPECT_EQ(partition->get_num_ranges(), num_ranges); + EXPECT_EQ(partition->get_num_parts(), num_parts); + EXPECT_EQ(partition->get_num_empty_parts(), 0); + assert_equal_data(partition->get_range_bounds(), + {0, 2, 3, 5, 6, 8, 9, 10, 13, 15, 16}); + assert_equal_data(partition->get_part_ids(), + {2, 0, 1, 2, 0, 1, 0, 1, 2, 0}); + assert_equal_data(partition->get_range_starting_indices(), + {0, 0, 0, 2, 1, 2, 3, 3, 3, 4}); + assert_equal_data(partition->get_part_sizes(), {5, 6, 5}); +} + + +TYPED_TEST(Partition, BuildsFromMappingWithEmptyParts) +{ + using part_type = typename TestFixture::part_type; + gko::Array mapping{ + this->ref, {3, 3, 0, 1, 1, 3, 0, 0, 1, 0, 1, 1, 1, 3, 3, 0}}; + comm_index_type num_parts = 5; + gko::size_type num_ranges = 10; + + auto partition = + part_type::build_from_mapping(this->ref, mapping, num_parts); + + EXPECT_EQ(partition->get_size(), mapping.get_num_elems()); + EXPECT_EQ(partition->get_num_ranges(), num_ranges); + EXPECT_EQ(partition->get_num_parts(), num_parts); + EXPECT_EQ(partition->get_num_empty_parts(), 2); + assert_equal_data(partition->get_range_bounds(), + {0, 2, 3, 5, 6, 8, 9, 10, 13, 15, 16}); + assert_equal_data(partition->get_part_ids(), + {3, 0, 1, 3, 0, 1, 0, 1, 3, 0}); + assert_equal_data(partition->get_range_starting_indices(), + {0, 0, 0, 2, 1, 2, 3, 3, 3, 4}); + assert_equal_data(partition->get_part_sizes(), {5, 6, 0, 5, 0}); +} + + +TYPED_TEST(Partition, BuildsFromRanges) +{ + using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; + gko::Array ranges{this->ref, {0, 5, 5, 7, 9, 10}}; + + auto partition = part_type::build_from_contiguous(this->ref, ranges); + + EXPECT_EQ(partition->get_size(), + ranges.get_data()[ranges.get_num_elems() - 1]); + EXPECT_EQ(partition->get_num_ranges(), ranges.get_num_elems() - 1); + EXPECT_EQ(partition->get_num_parts(), ranges.get_num_elems() - 1); + EXPECT_EQ(partition->get_num_empty_parts(), 1); + assert_equal_data(partition->get_range_bounds(), {0, 5, 5, 7, 9, 10}); + assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); + assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); + assert_equal_data(partition->get_part_sizes(), {5, 0, 2, 2, 1}); +} + + +TYPED_TEST(Partition, BuildsFromRangeWithSingleElement) +{ + using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; + gko::Array ranges{this->ref, {0}}; + + auto partition = part_type::build_from_contiguous(this->ref, ranges); + + EXPECT_EQ(partition->get_size(), 0); + EXPECT_EQ(partition->get_num_ranges(), 0); + EXPECT_EQ(partition->get_num_parts(), 0); + EXPECT_EQ(partition->get_num_empty_parts(), 0); + assert_equal_data(partition->get_range_bounds(), {0}); +} + + +TYPED_TEST(Partition, BuildsFromGlobalSize) +{ + using part_type = typename TestFixture::part_type; + + auto partition = + part_type::build_from_global_size_uniform(this->ref, 5, 13); + + EXPECT_EQ(partition->get_size(), 13); + EXPECT_EQ(partition->get_num_ranges(), 5); + EXPECT_EQ(partition->get_num_parts(), 5); + EXPECT_EQ(partition->get_num_empty_parts(), 0); + assert_equal_data(partition->get_range_bounds(), {0, 3, 6, 9, 11, 13}); + assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); + assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); + assert_equal_data(partition->get_part_sizes(), {3, 3, 3, 2, 2}); +} + + +TYPED_TEST(Partition, BuildsFromGlobalSizeEmptySize) +{ + using part_type = typename TestFixture::part_type; + + auto partition = part_type::build_from_global_size_uniform(this->ref, 5, 0); + + EXPECT_EQ(partition->get_size(), 0); + EXPECT_EQ(partition->get_num_ranges(), 5); + EXPECT_EQ(partition->get_num_parts(), 5); + EXPECT_EQ(partition->get_num_empty_parts(), 5); + assert_equal_data(partition->get_range_bounds(), {0, 0, 0, 0, 0, 0}); + assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); + assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); + assert_equal_data(partition->get_part_sizes(), {0, 0, 0, 0, 0}); +} + + +TYPED_TEST(Partition, BuildsFromGlobalSizeWithEmptyParts) +{ + using part_type = typename TestFixture::part_type; + + auto partition = part_type::build_from_global_size_uniform(this->ref, 5, 3); + + EXPECT_EQ(partition->get_size(), 3); + EXPECT_EQ(partition->get_num_ranges(), 5); + EXPECT_EQ(partition->get_num_parts(), 5); + EXPECT_EQ(partition->get_num_empty_parts(), 2); + assert_equal_data(partition->get_range_bounds(), {0, 1, 2, 3, 3, 3}); + assert_equal_data(partition->get_part_ids(), {0, 1, 2, 3, 4}); + assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0}); + assert_equal_data(partition->get_part_sizes(), {1, 1, 1, 0, 0}); +} + + +TYPED_TEST(Partition, IsConnected) +{ + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 0, 1, 1, 2}}, 3); + + ASSERT_TRUE(part->has_connected_parts()); +} + + +TYPED_TEST(Partition, IsConnectedWithEmptyParts) +{ + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 0, 2, 2, 5}}, 6); + + ASSERT_TRUE(part->has_connected_parts()); +} + + +TYPED_TEST(Partition, IsConnectedUnordered) +{ + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, 3); + + ASSERT_TRUE(part->has_connected_parts()); + ASSERT_FALSE(part->has_ordered_parts()); +} + + +TYPED_TEST(Partition, IsConnectedFail) +{ + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 1, 2, 0, 1}}, 3); + + ASSERT_FALSE(part->has_connected_parts()); +} + + +TYPED_TEST(Partition, IsOrdered) +{ + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 1, 1, 2, 2}}, 3); + + ASSERT_TRUE(part->has_ordered_parts()); +} + + +TYPED_TEST(Partition, IsOrderedWithEmptyParts) +{ + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {0, 2, 2, 5, 5}}, 6); + + ASSERT_TRUE(part->has_ordered_parts()); +} + + +TYPED_TEST(Partition, IsOrderedFail) +{ + using part_type = typename TestFixture::part_type; + auto part = part_type::build_from_mapping( + this->ref, gko::Array{this->ref, {1, 1, 0, 0, 2}}, 3); + + ASSERT_FALSE(part->has_ordered_parts()); +} + + +} // namespace diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e2a07ca43a2..a5f43b2faab 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,5 +1,6 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(components) +add_subdirectory(distributed) add_subdirectory(matrix) -add_subdirectory(solver) \ No newline at end of file +add_subdirectory(solver) diff --git a/test/distributed/CMakeLists.txt b/test/distributed/CMakeLists.txt new file mode 100644 index 00000000000..b4e2fbff054 --- /dev/null +++ b/test/distributed/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_common_test(partition_kernels DISABLE_EXECUTORS dpcpp) diff --git a/test/distributed/partition_kernels.cpp b/test/distributed/partition_kernels.cpp new file mode 100644 index 00000000000..bc3187ab1bc --- /dev/null +++ b/test/distributed/partition_kernels.cpp @@ -0,0 +1,393 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include + + +#include +#include + + +#include + + +#include "core/distributed/partition_kernels.hpp" +#include "core/test/utils.hpp" +#include "test/utils/executor.hpp" + + +namespace { + + +using comm_index_type = gko::distributed::comm_index_type; + + +template +class Partition : public ::testing::Test { +protected: + using local_index_type = + typename std::tuple_element<0, decltype(LocalGlobalIndexType())>::type; + using global_index_type = + typename std::tuple_element<1, decltype(LocalGlobalIndexType())>::type; + using part_type = + gko::distributed::Partition; + + Partition() : rand_engine(96457) {} + + void SetUp() + { + ref = gko::ReferenceExecutor::create(); + init_executor(ref, exec); + } + + void TearDown() + { + if (exec != nullptr) { + ASSERT_NO_THROW(exec->synchronize()); + } + } + + void assert_equal(std::unique_ptr& part, + std::unique_ptr& dpart) + { + ASSERT_EQ(part->get_size(), dpart->get_size()); + ASSERT_EQ(part->get_num_ranges(), dpart->get_num_ranges()); + ASSERT_EQ(part->get_num_parts(), dpart->get_num_parts()); + ASSERT_EQ(part->get_num_empty_parts(), dpart->get_num_empty_parts()); + GKO_ASSERT_ARRAY_EQ( + gko::make_array_view( + this->ref, part->get_num_ranges() + 1, + const_cast(part->get_range_bounds())), + gko::make_array_view( + this->exec, dpart->get_num_ranges() + 1, + const_cast(dpart->get_range_bounds()))); + GKO_ASSERT_ARRAY_EQ( + gko::make_array_view( + this->ref, part->get_num_ranges(), + const_cast(part->get_part_ids())), + gko::make_array_view( + this->exec, dpart->get_num_ranges(), + const_cast(dpart->get_part_ids()))); + GKO_ASSERT_ARRAY_EQ( + gko::make_array_view(this->ref, part->get_num_ranges(), + const_cast( + part->get_range_starting_indices())), + gko::make_array_view(this->exec, dpart->get_num_ranges(), + const_cast( + dpart->get_range_starting_indices()))); + GKO_ASSERT_ARRAY_EQ( + gko::make_array_view( + this->ref, part->get_num_parts(), + const_cast(part->get_part_sizes())), + gko::make_array_view( + this->exec, dpart->get_num_parts(), + const_cast(dpart->get_part_sizes()))); + } + + std::ranlux48 rand_engine; + + std::shared_ptr ref; + std::shared_ptr exec; +}; + +TYPED_TEST_SUITE(Partition, gko::test::LocalGlobalIndexTypes); + + +TYPED_TEST(Partition, BuildsFromMapping) +{ + using part_type = typename TestFixture::part_type; + comm_index_type num_parts = 7; + std::uniform_int_distribution part_dist{0, num_parts - 1}; + auto mapping = gko::test::generate_random_array( + 10000, part_dist, this->rand_engine, this->ref); + gko::Array dmapping{this->exec, mapping}; + + auto part = part_type::build_from_mapping(this->ref, mapping, num_parts); + auto dpart = part_type::build_from_mapping(this->exec, dmapping, num_parts); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromMappingWithEmptyPart) +{ + using part_type = typename TestFixture::part_type; + comm_index_type num_parts = 7; + // skip part 0 + std::uniform_int_distribution part_dist{1, num_parts - 1}; + auto mapping = gko::test::generate_random_array( + 10000, part_dist, this->rand_engine, this->ref); + gko::Array dmapping{this->exec, mapping}; + + auto part = part_type::build_from_mapping(this->ref, mapping, num_parts); + auto dpart = part_type::build_from_mapping(this->exec, dmapping, num_parts); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromMappingWithAlmostAllPartsEmpty) +{ + using part_type = typename TestFixture::part_type; + comm_index_type num_parts = 7; + // return only part 1 + std::uniform_int_distribution part_dist{1, 1}; + auto mapping = gko::test::generate_random_array( + 10000, part_dist, this->rand_engine, this->ref); + gko::Array dmapping{this->exec, mapping}; + + auto part = part_type::build_from_mapping(this->ref, mapping, num_parts); + auto dpart = part_type::build_from_mapping(this->exec, dmapping, num_parts); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromMappingWithAllPartsEmpty) +{ + using part_type = typename TestFixture::part_type; + comm_index_type num_parts = 7; + gko::Array mapping{this->ref, 0}; + gko::Array dmapping{this->exec, 0}; + + auto part = part_type::build_from_mapping(this->ref, mapping, num_parts); + auto dpart = part_type::build_from_mapping(this->exec, dmapping, num_parts); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromMappingWithOnePart) +{ + using part_type = typename TestFixture::part_type; + comm_index_type num_parts = 1; + gko::Array mapping{this->ref, 10000}; + mapping.fill(0); + gko::Array dmapping{this->exec, mapping}; + + auto part = part_type::build_from_mapping(this->ref, mapping, num_parts); + auto dpart = part_type::build_from_mapping(this->exec, dmapping, num_parts); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromContiguous) +{ + using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; + gko::Array ranges{this->ref, + {0, 1234, 3134, 4578, 16435, 60000}}; + gko::Array dranges{this->exec, ranges}; + + auto part = part_type::build_from_contiguous(this->ref, ranges); + auto dpart = part_type::build_from_contiguous(this->exec, dranges); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromContiguousWithSomeEmptyParts) +{ + using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; + gko::Array ranges{ + this->ref, {0, 1234, 3134, 3134, 4578, 16435, 16435, 60000}}; + gko::Array dranges{this->exec, ranges}; + + auto part = part_type::build_from_contiguous(this->ref, ranges); + auto dpart = part_type::build_from_contiguous(this->exec, dranges); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromContiguousWithMostlyEmptyParts) +{ + using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; + gko::Array ranges{ + this->ref, {0, 0, 3134, 4578, 4578, 4578, 4578, 4578}}; + gko::Array dranges{this->exec, ranges}; + + auto part = part_type::build_from_contiguous(this->ref, ranges); + auto dpart = part_type::build_from_contiguous(this->exec, dranges); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromContiguousWithOnlyEmptyParts) +{ + using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; + gko::Array ranges{this->ref, {0, 0, 0, 0, 0, 0, 0}}; + gko::Array dranges{this->exec, ranges}; + + auto part = part_type::build_from_contiguous(this->ref, ranges); + auto dpart = part_type::build_from_contiguous(this->exec, dranges); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromContiguousWithOnlyOneEmptyPart) +{ + using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; + gko::Array ranges{this->ref, {0, 0}}; + gko::Array dranges{this->exec, ranges}; + + auto part = part_type::build_from_contiguous(this->ref, ranges); + auto dpart = part_type::build_from_contiguous(this->exec, dranges); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromContiguousWithSingleEntry) +{ + using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; + gko::Array ranges{this->ref, {0}}; + gko::Array dranges{this->exec, ranges}; + + auto part = part_type::build_from_contiguous(this->ref, ranges); + auto dpart = part_type::build_from_contiguous(this->exec, dranges); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromGlobalSize) +{ + using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; + const int num_parts = 7; + const global_index_type global_size = 708; + + auto part = part_type::build_from_global_size_uniform(this->ref, num_parts, + global_size); + auto dpart = part_type::build_from_global_size_uniform( + this->exec, num_parts, global_size); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromGlobalSizeEmpty) +{ + using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; + const int num_parts = 7; + const global_index_type global_size = 0; + + auto part = part_type::build_from_global_size_uniform(this->ref, num_parts, + global_size); + auto dpart = part_type::build_from_global_size_uniform( + this->exec, num_parts, global_size); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, BuildsFromGlobalSizeMorePartsThanSize) +{ + using global_index_type = typename TestFixture::global_index_type; + using part_type = typename TestFixture::part_type; + const int num_parts = 77; + const global_index_type global_size = 13; + + auto part = part_type::build_from_global_size_uniform(this->ref, num_parts, + global_size); + auto dpart = part_type::build_from_global_size_uniform( + this->exec, num_parts, global_size); + + this->assert_equal(part, dpart); +} + + +TYPED_TEST(Partition, IsOrderedTrue) +{ + using part_type = typename TestFixture::part_type; + comm_index_type num_parts = 7; + gko::size_type size_per_part = 1000; + gko::size_type global_size = num_parts * size_per_part; + gko::Array mapping{this->ref, global_size}; + for (comm_index_type i = 0; i < num_parts; ++i) { + std::fill(mapping.get_data() + i * size_per_part, + mapping.get_data() + (i + 1) * size_per_part, i); + } + auto dpart = part_type::build_from_mapping(this->exec, mapping, num_parts); + + ASSERT_TRUE(dpart->has_ordered_parts()); +} + + +TYPED_TEST(Partition, IsOrderedFail) +{ + using part_type = typename TestFixture::part_type; + comm_index_type num_parts = 7; + gko::size_type size_per_part = 1000; + gko::size_type global_size = num_parts * size_per_part; + gko::Array mapping{this->ref, global_size}; + for (comm_index_type i = 0; i < num_parts; ++i) { + std::fill(mapping.get_data() + i * size_per_part, + mapping.get_data() + (i + 1) * size_per_part, + num_parts - 1 - i); + } + auto dpart = part_type::build_from_mapping(this->exec, mapping, num_parts); + + ASSERT_FALSE(dpart->has_ordered_parts()); +} + + +TYPED_TEST(Partition, IsOrderedRandom) +{ + using part_type = typename TestFixture::part_type; + comm_index_type num_parts = 7; + std::uniform_int_distribution part_dist{0, num_parts - 1}; + auto mapping = gko::test::generate_random_array( + 10000, part_dist, this->rand_engine, this->ref); + auto part = part_type::build_from_mapping(this->ref, mapping, num_parts); + auto dpart = part_type::build_from_mapping(this->exec, mapping, num_parts); + + ASSERT_EQ(part->has_ordered_parts(), dpart->has_ordered_parts()); +} + + +} // namespace