diff --git a/common/cuda_hip/distributed/vector_kernels.hpp.inc b/common/cuda_hip/distributed/vector_kernels.hpp.inc new file mode 100644 index 00000000000..28be6a512a7 --- /dev/null +++ b/common/cuda_hip/distributed/vector_kernels.hpp.inc @@ -0,0 +1,94 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +template +void build_local( + std::shared_ptr exec, + const device_matrix_data& input, + const distributed::Partition* partition, + comm_index_type local_part, matrix::Dense* local_mtx) +{ + const auto* range_bounds = partition->get_range_bounds(); + const auto* range_starting_indices = + partition->get_range_starting_indices(); + const auto* part_ids = partition->get_part_ids(); + const auto num_ranges = partition->get_num_ranges(); + + Array range_id{exec, input.get_num_elems()}; + thrust::upper_bound(thrust::device, range_bounds + 1, + range_bounds + num_ranges + 1, + input.get_const_row_idxs(), + input.get_const_row_idxs() + input.get_num_elems(), + range_id.get_data(), thrust::less()); + + // write values with local rows into the local matrix at the correct index + // this needs the following iterators: + // - local_row_it: (global_row, range_id) -> local row index + // - flat_idx_it: (local_row, col) -> flat index in local matrix values + // array + // the flat_idx_it is used by the scatter_if as an index map for the values + auto map_to_local_row = + [range_bounds, range_starting_indices] GKO_THRUST_LAMBDA( + const auto& idx_range_id) -> LocalIndexType { + const auto idx = thrust::get<0>(idx_range_id); + const auto rid = thrust::get<1>(idx_range_id); + return static_cast(idx - range_bounds[rid]) + + range_starting_indices[rid]; + }; + auto local_row_it = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(input.get_const_row_idxs(), + range_id.get_data())), + map_to_local_row); + + auto stride = local_mtx->get_stride(); + auto map_to_flat_idx = + [stride] GKO_THRUST_LAMBDA(const auto& row_col) -> size_type { + return thrust::get<0>(row_col) * stride + thrust::get<1>(row_col); + }; + auto flat_idx_it = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(local_row_it, input.get_const_col_idxs())), + map_to_flat_idx); + + auto is_local_row = [part_ids, + local_part] GKO_THRUST_LAMBDA(const auto rid) { + return part_ids[rid] == local_part; + }; + thrust::scatter_if(thrust::device, input.get_const_values(), + input.get_const_values() + input.get_num_elems(), + flat_idx_it, range_id.get_data(), + local_mtx->get_values(), is_local_row); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); diff --git a/cuda/distributed/vector_kernels.cu b/cuda/distributed/vector_kernels.cu index 46d834ee0ca..127bd2b06c3 100644 --- a/cuda/distributed/vector_kernels.cu +++ b/cuda/distributed/vector_kernels.cu @@ -36,22 +36,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include +#include +#include +#include +#include +#include + + namespace gko { namespace kernels { namespace cuda { namespace distributed_vector { -template -void build_local( - std::shared_ptr exec, - const device_matrix_data& input, - const distributed::Partition* partition, - comm_index_type local_part, - matrix::Dense* local_mtx) GKO_NOT_IMPLEMENTED; +#define GKO_THRUST_LAMBDA __device__ + + +#include "common/cuda_hip/distributed/vector_kernels.hpp.inc" + -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( - GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); +#undef GKO_THRUST_LAMBDA } // namespace distributed_vector diff --git a/dpcpp/distributed/vector_kernels.dp.cpp b/dpcpp/distributed/vector_kernels.dp.cpp index a51c9e22669..6b4a1a079ea 100644 --- a/dpcpp/distributed/vector_kernels.dp.cpp +++ b/dpcpp/distributed/vector_kernels.dp.cpp @@ -35,6 +35,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include +#include + namespace gko { namespace kernels { @@ -47,8 +50,56 @@ void build_local( std::shared_ptr exec, const device_matrix_data& input, const distributed::Partition* partition, - comm_index_type local_part, - matrix::Dense* local_mtx) GKO_NOT_IMPLEMENTED; + comm_index_type local_part, matrix::Dense* local_mtx) +{ + const auto* range_bounds = partition->get_range_bounds(); + const auto* range_starting_indices = + partition->get_range_starting_indices(); + const auto* part_ids = partition->get_part_ids(); + const auto num_ranges = partition->get_num_ranges(); + auto policy = + oneapi::dpl::execution::make_device_policy(*exec->get_queue()); + + Array range_id{exec, input.get_num_elems()}; + oneapi::dpl::upper_bound(policy, range_bounds + 1, + range_bounds + num_ranges + 1, + input.get_const_row_idxs(), + input.get_const_row_idxs() + input.get_num_elems(), + range_id.get_data()); + + // write values with local rows into the local matrix at the correct index + // this needs the following iterators: + // - local_row_it: (global_row, range_id) -> local row index + // - flat_idx_it: (index) -> flat index (row[index] * stride + col[index]) + // in local matrix values array + // the flat_idx_it is used by the copy_if as a permutation index for the + // values + auto map_to_local_row = [range_bounds, range_starting_indices]( + const auto& idx_range_id) -> LocalIndexType { + const auto [idx, rid] = idx_range_id; + return static_cast(idx - range_bounds[rid]) + + range_starting_indices[rid]; + }; + auto local_row_it = oneapi::dpl::make_transform_iterator( + oneapi::dpl::make_zip_iterator(input.get_const_row_idxs(), + range_id.get_data()), + map_to_local_row); + + auto flat_idx_it = oneapi::dpl::make_permutation_iterator( + local_mtx->get_values(), + [local_row_it, cols = input.get_const_col_idxs(), + stride = local_mtx->get_stride()](const auto i) { + return local_row_it[i] * stride + cols[i]; + }); + + auto is_local_row = [range_id = range_id.get_data(), part_ids, + local_part](const auto i) { + return part_ids[range_id[i]] == local_part; + }; + oneapi::dpl::copy_if(policy, input.get_const_values(), + input.get_const_values() + input.get_num_elems(), + flat_idx_it, is_local_row); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); diff --git a/hip/distributed/vector_kernels.hip.cpp b/hip/distributed/vector_kernels.hip.cpp index 1133317e4e4..58bac94d28f 100644 --- a/hip/distributed/vector_kernels.hip.cpp +++ b/hip/distributed/vector_kernels.hip.cpp @@ -36,22 +36,30 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + + +#include +#include +#include +#include +#include +#include + + namespace gko { namespace kernels { namespace hip { namespace distributed_vector { -template -void build_local( - std::shared_ptr exec, - const device_matrix_data& input, - const distributed::Partition* partition, - comm_index_type local_part, - matrix::Dense* local_mtx) GKO_NOT_IMPLEMENTED; +#define GKO_THRUST_LAMBDA __device__ __host__ + + +#include "common/cuda_hip/distributed/vector_kernels.hpp.inc" + -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( - GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); +#undef GKO_THRUST_LAMBDA } // namespace distributed_vector diff --git a/omp/test/CMakeLists.txt b/omp/test/CMakeLists.txt index 2ddf2808922..cf7723a11f1 100644 --- a/omp/test/CMakeLists.txt +++ b/omp/test/CMakeLists.txt @@ -2,7 +2,6 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(base) add_subdirectory(components) -add_subdirectory(distributed) add_subdirectory(factorization) add_subdirectory(matrix) add_subdirectory(preconditioner) diff --git a/omp/test/distributed/CMakeLists.txt b/omp/test/distributed/CMakeLists.txt deleted file mode 100644 index 61e5d60cb39..00000000000 --- a/omp/test/distributed/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -ginkgo_create_test(vector_kernels) diff --git a/test/distributed/CMakeLists.txt b/test/distributed/CMakeLists.txt index b4e2fbff054..4efad911912 100644 --- a/test/distributed/CMakeLists.txt +++ b/test/distributed/CMakeLists.txt @@ -1 +1,2 @@ ginkgo_create_common_test(partition_kernels DISABLE_EXECUTORS dpcpp) +ginkgo_create_common_test(vector_kernels) diff --git a/omp/test/distributed/vector_kernels.cpp b/test/distributed/vector_kernels.cpp similarity index 92% rename from omp/test/distributed/vector_kernels.cpp rename to test/distributed/vector_kernels.cpp index 5ee65cfb24c..616665e89d4 100644 --- a/omp/test/distributed/vector_kernels.cpp +++ b/test/distributed/vector_kernels.cpp @@ -47,6 +47,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/test/utils.hpp" +#include "test/utils/executor.hpp" + + namespace { @@ -56,23 +59,26 @@ using comm_index_type = gko::distributed::comm_index_type; template class Vector : public ::testing::Test { protected: - using value_type = - typename std::tuple_element<0, decltype( - ValueLocalGlobalIndexType())>::type; - using local_index_type = - typename std::tuple_element<1, decltype( - ValueLocalGlobalIndexType())>::type; - using global_index_type = - typename std::tuple_element<2, decltype( - ValueLocalGlobalIndexType())>::type; + using value_type = typename std::tuple_element< + 0, decltype(ValueLocalGlobalIndexType())>::type; + using local_index_type = typename std::tuple_element< + 1, decltype(ValueLocalGlobalIndexType())>::type; + using global_index_type = typename std::tuple_element< + 2, decltype(ValueLocalGlobalIndexType())>::type; using global_entry = gko::matrix_data_entry; using mtx = gko::matrix::Dense; - Vector() - : ref(gko::ReferenceExecutor::create()), - exec(gko::OmpExecutor::create()), - engine(42) - {} + Vector() : ref(gko::ReferenceExecutor::create()), engine(42) {} + + + void SetUp() { init_executor(ref, exec); } + + void TearDown() + { + if (exec != nullptr) { + ASSERT_NO_THROW(exec->synchronize()); + } + } void validate( const gko::distributed::Partition* @@ -94,20 +100,23 @@ class Vector : public ::testing::Test { gko::kernels::reference::distributed_vector::build_local( ref, input, partition, part, output.get()); - gko::kernels::omp::distributed_vector::build_local( + gko::kernels::EXEC_NAMESPACE::distributed_vector::build_local( exec, d_input, d_partition, part, d_output.get()); GKO_ASSERT_MTX_NEAR(output, d_output, 0); } } - std::shared_ptr ref; - std::shared_ptr exec; + std::shared_ptr ref; + std::shared_ptr exec; std::default_random_engine engine; }; + +TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); + + template - gko::device_matrix_data generate_random_matrix_data_array( gko::size_type num_rows, gko::size_type num_cols, NonzeroDistribution&& nonzero_dist, ValueDistribution&& value_dist, @@ -122,8 +131,6 @@ gko::device_matrix_data generate_random_matrix_data_array( md); } -TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); - TYPED_TEST(Vector, BuildsLocalEmptyIsEquivalentToRef) { diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/distributed/vector.cpp index 0200ffbfee2..10f487dbc0e 100644 --- a/test/mpi/distributed/vector.cpp +++ b/test/mpi/distributed/vector.cpp @@ -116,7 +116,7 @@ class VectorCreation : public ::testing::Test { void SetUp() override { ASSERT_EQ(this->comm.size(), 3); - init_executor(gko::ReferenceExecutor::create(), exec); + init_executor(gko::ReferenceExecutor::create(), exec, comm); } void TearDown() override @@ -143,7 +143,7 @@ class VectorCreation : public ::testing::Test { TYPED_TEST_SUITE(VectorCreation, gko::test::ValueLocalGlobalIndexTypes); -#ifdef GKO_COMPILING_REFERENCE +//#ifdef GKO_COMPILING_REFERENCE TYPED_TEST(VectorCreation, CanReadGlobalMatrixData) @@ -317,7 +317,7 @@ TYPED_TEST(VectorCreation, CanReadLocalMatrixDataSomeEmpty) } -#endif +//#endif TYPED_TEST(VectorCreation, CanCreateFromLocalVectorAndSize) @@ -370,10 +370,7 @@ class VectorReductions : public ::testing::Test { size{53, 11}, engine(42) { - init_executor(ref, exec, comm); - - logger = gko::share(HostToDeviceLogger::create(exec)); - exec->add_logger(logger); + logger = gko::share(HostToDeviceLogger::create(ref)); dense_x = dense_type::create(exec); dense_y = dense_type::create(exec); @@ -420,7 +417,8 @@ class VectorReductions : public ::testing::Test { void SetUp() override { ASSERT_GT(comm.size(), 0); - init_executor(gko::ReferenceExecutor::create(), exec); + init_executor(gko::ReferenceExecutor::create(), exec, comm); + exec->add_logger(logger); } void TearDown() override @@ -597,7 +595,7 @@ class VectorLocalOps : public ::testing::Test { void SetUp() override { ASSERT_GT(comm.size(), 0); - init_executor(gko::ReferenceExecutor::create(), exec); + init_executor(gko::ReferenceExecutor::create(), exec, comm); } void TearDown() override diff --git a/third_party/gtest/CMakeLists.txt b/third_party/gtest/CMakeLists.txt index 77698fd7a9f..66b15b975ef 100644 --- a/third_party/gtest/CMakeLists.txt +++ b/third_party/gtest/CMakeLists.txt @@ -3,7 +3,7 @@ include(FetchContent) FetchContent_Declare( googletest GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-1.11.0 + GIT_TAG main ) # need to set the variables in CACHE due to CMP0077 set(gtest_disable_pthreads ON CACHE INTERNAL "")