diff --git a/CMakeLists.txt b/CMakeLists.txt index 31175e23c9e..68cda285ec9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,6 +98,8 @@ option(GINKGO_DPCPP_SINGLE_MODE "Do not compile double kernels for the DPC++ bac option(GINKGO_INSTALL_RPATH "Set the RPATH when installing its libraries." ON) option(GINKGO_INSTALL_RPATH_ORIGIN "Add $ORIGIN (Linux) or @loader_path (MacOS) to the installation RPATH." ON) option(GINKGO_INSTALL_RPATH_DEPENDENCIES "Add dependencies to the installation RPATH." OFF) +option(GINKGO_FORCE_GPU_AWARE_MPI "Assert that the MPI library is GPU aware. This forces Ginkgo to assume that GPU aware functionality is available (OFF (default) or ON), but may fail + catastrophically in case the MPI implementation is not GPU Aware, and GPU aware functionality has been forced" OFF) set(GINKGO_CIRCULAR_DEPS_FLAGS "-Wl,--no-undefined") @@ -189,8 +191,14 @@ else() message(STATUS "HWLOC is being forcibly switched off") endif() +set(GINKGO_HAVE_GPU_AWARE_MPI OFF) if(GINKGO_BUILD_MPI) find_package(MPI REQUIRED) + if(GINKGO_FORCE_GPU_AWARE_MPI) + set(GINKGO_HAVE_GPU_AWARE_MPI ON) + else() + set(GINKGO_HAVE_GPU_AWARE_MPI OFF) + endif() endif() # We keep using NVCC/HCC for consistency with previous releases even if AMD diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index 28685fdf232..0cafb0c0f35 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -3,11 +3,18 @@ function(ginkgo_build_test_name test_name target_name) ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}") set(${target_name} ${TEST_TARGET_NAME} PARENT_SCOPE) -endfunction() +endfunction(ginkgo_build_test_name) -function(ginkgo_set_test_target_properties test_name test_target_name) - file(RELATIVE_PATH REL_BINARY_DIR - ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) +function(ginkgo_create_gtest_mpi_main) + add_library(gtest_mpi_main "") + target_sources(gtest_mpi_main + PRIVATE + ${PROJECT_SOURCE_DIR}/core/test/mpi/gtest/mpi_listener.cpp) + find_package(MPI REQUIRED) + target_link_libraries(gtest_mpi_main PRIVATE GTest::GTest MPI::MPI_CXX) +endfunction(ginkgo_create_gtest_mpi_main) + +function(ginkgo_set_test_target_default_properties test_name test_target_name) set_target_properties(${test_target_name} PROPERTIES OUTPUT_NAME ${test_name}) if (GINKGO_FAST_TESTS) @@ -19,12 +26,33 @@ function(ginkgo_set_test_target_properties test_name test_target_name) if (GINKGO_CHECK_CIRCULAR_DEPS) target_link_libraries(${test_target_name} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}") endif() + target_include_directories(${test_target_name} PRIVATE ${Ginkgo_BINARY_DIR}) + target_link_libraries(${test_target_name} PRIVATE ginkgo) +endfunction(ginkgo_set_test_target_default_properties) + +function(ginkgo_internal_add_test test_name test_target_name) + file(RELATIVE_PATH REL_BINARY_DIR + ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + ginkgo_set_test_target_default_properties(${test_name} ${test_target_name}) add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${test_target_name} WORKING_DIRECTORY "$") - target_include_directories(${test_target_name} PRIVATE ${Ginkgo_BINARY_DIR}) - target_link_libraries(${test_target_name} PRIVATE ginkgo GTest::Main GTest::GTest) -endfunction() + target_link_libraries(${test_target_name} PRIVATE GTest::Main GTest::GTest) +endfunction(ginkgo_internal_add_test) + +function(ginkgo_internal_add_mpi_test test_name test_target_name num_mpi_procs) + file(RELATIVE_PATH REL_BINARY_DIR + ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + ginkgo_set_test_target_default_properties(${test_name} ${test_target_name}) + if(NOT TARGET gtest_mpi_main) + ginkgo_create_gtest_mpi_main() + endif() + target_link_libraries(${test_target_name} PRIVATE gtest_mpi_main GTest::GTest MPI::MPI_CXX) + set(test_param ${MPIEXEC_NUMPROC_FLAG} ${num_mpi_procs} ${OPENMPI_RUN_AS_ROOT_FLAG} + ${CMAKE_BINARY_DIR}/${REL_BINARY_DIR}/${test_name}) + add_test(NAME ${REL_BINARY_DIR}/${test_name} + COMMAND ${MPIEXEC_EXECUTABLE} ${test_param}) +endfunction(ginkgo_internal_add_mpi_test) function(ginkgo_create_test test_name) ginkgo_build_test_name(${test_name} test_target_name) @@ -32,7 +60,7 @@ function(ginkgo_create_test test_name) target_compile_features(${test_target_name} PUBLIC cxx_std_14) target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) target_link_libraries(${test_target_name} PRIVATE ${ARGN}) - ginkgo_set_test_target_properties(${test_name} ${test_target_name}) + ginkgo_internal_add_test(${test_name} ${test_target_name}) endfunction(ginkgo_create_test) function(ginkgo_create_dpcpp_test test_name) @@ -42,7 +70,7 @@ function(ginkgo_create_dpcpp_test test_name) target_compile_options(${test_target_name} PRIVATE "${GINKGO_DPCPP_FLAGS}") target_compile_options(${test_target_name} PRIVATE "${GINKGO_COMPILER_FLAGS}") target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel) - ginkgo_set_test_target_properties(${test_name} ${test_target_name}) + ginkgo_internal_add_test(${test_name} ${test_target_name}) # Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test. if (MKL_ENV) set_tests_properties(${test_target_name} PROPERTIES ENVIRONMENT "${MKL_ENV}") @@ -57,29 +85,16 @@ function(ginkgo_create_thread_test test_name) target_compile_features(${test_target_name} PUBLIC cxx_std_14) target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) target_link_libraries(${test_target_name} PRIVATE Threads::Threads ${ARGN}) - ginkgo_set_test_target_properties(${test_name} ${test_target_name}) + ginkgo_internal_add_test(${test_name} ${test_target_name}) endfunction(ginkgo_create_thread_test) function(ginkgo_create_mpi_test test_name num_mpi_procs) - file(RELATIVE_PATH REL_BINARY_DIR - ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) - string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}") - add_executable(${TEST_TARGET_NAME} ${test_name}.cpp) - set_target_properties(${TEST_TARGET_NAME} PROPERTIES - OUTPUT_NAME ${test_name}) - if (GINKGO_CHECK_CIRCULAR_DEPS) - target_link_libraries(${TEST_TARGET_NAME} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}") - endif() - if("${GINKGO_MPI_EXEC_SUFFIX}" MATCHES ".openmpi" AND MPI_RUN_AS_ROOT) - set(OPENMPI_RUN_AS_ROOT_FLAG "--allow-run-as-root") - else() - set(OPENMPI_RUN_AS_ROOT_FLAG "") - endif() - target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo gtest_mpi_main GTest::GTest ${ARGN}) - target_link_libraries(${TEST_TARGET_NAME} PRIVATE MPI::MPI_CXX) - set(test_param ${MPIEXEC_NUMPROC_FLAG} ${num_mpi_procs} ${OPENMPI_RUN_AS_ROOT_FLAG} ${CMAKE_BINARY_DIR}/${REL_BINARY_DIR}/${test_name}) - add_test(NAME ${REL_BINARY_DIR}/${test_name} - COMMAND ${MPIEXEC_EXECUTABLE} ${test_param}) + ginkgo_build_test_name(${test_name} test_target_name) + add_executable(${test_target_name} ${test_name}.cpp) + target_compile_features(${test_target_name} PUBLIC cxx_std_14) + target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) + target_link_libraries(${test_target_name} PRIVATE ${ARGN}) + ginkgo_internal_add_mpi_test(${test_name} ${test_target_name} ${num_mpi_procs}) endfunction(ginkgo_create_mpi_test) function(ginkgo_create_test_cpp_cuda_header test_name) @@ -89,7 +104,7 @@ function(ginkgo_create_test_cpp_cuda_header test_name) target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) target_include_directories(${test_target_name} PRIVATE "${CUDA_INCLUDE_DIRS}") target_link_libraries(${test_target_name} PRIVATE ${ARGN}) - ginkgo_set_test_target_properties(${test_name} ${test_target_name}) + ginkgo_internal_add_test(${test_name} ${test_target_name}) endfunction(ginkgo_create_test_cpp_cuda_header) function(ginkgo_create_cuda_test test_name) @@ -112,7 +127,7 @@ function(ginkgo_create_cuda_test test_name) if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) set_target_properties(${test_target_name} PROPERTIES CUDA_ARCHITECTURES OFF) endif() - ginkgo_set_test_target_properties(${test_name} ${test_target_name}) + ginkgo_internal_add_test(${test_name} ${test_target_name}) endfunction(ginkgo_create_cuda_test) function(ginkgo_create_hip_test test_name) @@ -159,11 +174,12 @@ ginkgo_build_test_name(${test_name} test_target_name) ${HIPSPARSE_INCLUDE_DIRS} ) target_link_libraries(${test_target_name} PRIVATE ${ARGN}) - ginkgo_set_test_target_properties(${test_name} ${test_target_name}) + ginkgo_internal_add_test(${test_name} ${test_target_name}) endfunction(ginkgo_create_hip_test) -function(ginkgo_create_common_test test_name) - cmake_parse_arguments(PARSE_ARGV 1 common_test "" "" "DISABLE_EXECUTORS;ADDITIONAL_LIBRARIES") +function(ginkgo_internal_create_common_test_template test_name) + cmake_parse_arguments(PARSE_ARGV 1 common_test "" "TEST_TYPE" "DISABLE_EXECUTORS;ADDITIONAL_LIBRARIES;ADDITIONAL_TEST_PARAMETERS") + string(TOLOWER ${common_test_TEST_TYPE} test_type) set(executors) if(GINKGO_BUILD_OMP) list(APPEND executors omp) @@ -200,10 +216,24 @@ function(ginkgo_create_common_test test_name) target_compile_definitions(${test_target_name} PRIVATE GINKGO_COMMON_SINGLE_MODE=1) target_compile_definitions(${test_target_name} PRIVATE GINKGO_DPCPP_SINGLE_MODE=1) endif() - ginkgo_set_test_target_properties(${test_name}_${exec} ${test_target_name}) + if(${test_type} STREQUAL default) + ginkgo_internal_add_test(${test_name}_${exec} ${test_target_name}) + elseif(${test_type} STREQUAL mpi) + ginkgo_internal_add_mpi_test(${test_name}_${exec} ${test_target_name} ${common_test_ADDITIONAL_TEST_PARAMETERS}) + else() + message(FATAL_ERROR "Encountered unrecognized test type ${test_type} during common test creation.") + endif() endforeach() +endfunction(ginkgo_internal_create_common_test_template) + +function(ginkgo_create_common_test test_name) + ginkgo_internal_create_common_test_template(${test_name} TEST_TYPE default ${ARGN}) endfunction(ginkgo_create_common_test) +function(ginkgo_create_common_mpi_test test_name num_mpi_procs) + ginkgo_internal_create_common_test_template(${test_name} TEST_TYPE mpi ADDITIONAL_TEST_PARAMETERS ${num_mpi_procs} ${ARGN}) +endfunction(ginkgo_create_common_mpi_test) + function(ginkgo_create_common_and_reference_test test_name) ginkgo_create_common_test(${test_name}) ginkgo_build_test_name(${test_name} test_target_name) @@ -211,7 +241,20 @@ function(ginkgo_create_common_and_reference_test test_name) add_executable(${test_target_name} ${test_name}.cpp) target_compile_features(${test_target_name} PUBLIC cxx_std_14) target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) - target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=ReferenceExecutor EXEC_NAMESPACE=reference) + target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=ReferenceExecutor EXEC_NAMESPACE=reference GKO_COMPILING_REFERENCE) + target_link_libraries(${test_target_name} PRIVATE ${ARGN}) + ginkgo_internal_add_test(${test_name}_reference ${test_target_name}) +endfunction() + + +function(ginkgo_create_common_and_reference_mpi_test test_name num_mpi_procs) + ginkgo_create_common_mpi_test(${test_name} ${num_mpi_procs}) + ginkgo_build_test_name(${test_name} test_target_name) + set(test_target_name ${test_target_name}_reference) + add_executable(${test_target_name} ${test_name}.cpp) + target_compile_features(${test_target_name} PUBLIC cxx_std_14) + target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) + target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=ReferenceExecutor EXEC_NAMESPACE=reference GKO_COMPILING_REFERENCE) target_link_libraries(${test_target_name} PRIVATE ${ARGN}) - ginkgo_set_test_target_properties(${test_name}_reference ${test_target_name}) + ginkgo_internal_add_mpi_test(${test_name}_reference ${test_target_name} ${num_mpi_procs}) endfunction() diff --git a/cmake/get_info.cmake b/cmake/get_info.cmake index 479b889aeaf..2cf8dd06c3f 100644 --- a/cmake/get_info.cmake +++ b/cmake/get_info.cmake @@ -130,7 +130,7 @@ foreach(log_type ${log_types}) "GINKGO_BUILD_OMP;GINKGO_BUILD_MPI;GINKGO_BUILD_REFERENCE;GINKGO_BUILD_CUDA;GINKGO_BUILD_HIP;GINKGO_BUILD_DPCPP") ginkgo_print_module_footer(${${log_type}} " Enabled features:") ginkgo_print_foreach_variable(${${log_type}} - "GINKGO_MIXED_PRECISION") + "GINKGO_MIXED_PRECISION;GINKGO_HAVE_GPU_AWARE_MPI") ginkgo_print_module_footer(${${log_type}} " Tests, benchmarks and examples:") ginkgo_print_foreach_variable(${${log_type}} "GINKGO_BUILD_TESTS;GINKGO_FAST_TESTS;GINKGO_BUILD_EXAMPLES;GINKGO_EXTLIB_EXAMPLE;GINKGO_BUILD_BENCHMARKS;GINKGO_BENCHMARK_ENABLE_TUNING") diff --git a/common/unified/matrix/dense_kernels.cpp b/common/unified/matrix/dense_kernels.cpp index c1d1422c483..8573e2717ca 100644 --- a/common/unified/matrix/dense_kernels.cpp +++ b/common/unified/matrix/dense_kernels.cpp @@ -380,6 +380,37 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL_SIZE_T); +template +void compute_squared_norm2(std::shared_ptr exec, + const matrix::Dense* x, + matrix::Dense>* result) +{ + run_kernel_col_reduction( + exec, + [] GKO_KERNEL(auto i, auto j, auto x) { return squared_norm(x(i, j)); }, + GKO_KERNEL_REDUCE_SUM(remove_complex), result->get_values(), + x->get_size(), x); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL); + + +template +void compute_sqrt(std::shared_ptr exec, + matrix::Dense* x) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto x) { + x(row, col) = sqrt(x(row, col)); + }, + x->get_size(), x); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL); + + template void symm_permute(std::shared_ptr exec, const array* permutation_indices, diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index eb6412d9d51..5d741e79272 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -6,6 +6,7 @@ target_sources(ginkgo base/array.cpp base/combination.cpp base/composition.cpp + base/dense_cache.cpp base/device_matrix_data.cpp base/executor.cpp base/index_set.cpp @@ -66,7 +67,9 @@ endif() if (GINKGO_BUILD_MPI) target_sources(ginkgo - PRIVATE mpi/exception.cpp) + PRIVATE + mpi/exception.cpp + distributed/vector.cpp) endif() ginkgo_compile_features(ginkgo) diff --git a/core/base/dense_cache.cpp b/core/base/dense_cache.cpp new file mode 100644 index 00000000000..91e4a4247cd --- /dev/null +++ b/core/base/dense_cache.cpp @@ -0,0 +1,69 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +namespace gko { +namespace detail { + + +template +void DenseCache::init(std::shared_ptr exec, + dim<2> size) const +{ + if (!vec || vec->get_size() != size || vec->get_executor() != exec) { + vec = matrix::Dense::create(exec, size); + } +} + + +template +void DenseCache::init_from( + const matrix::Dense* template_vec) const +{ + if (!vec || vec->get_size() != template_vec->get_size() || + vec->get_executor() != template_vec->get_executor()) { + vec = matrix::Dense::create_with_config_of(template_vec); + } +} + + +#define GKO_DECLARE_DENSE_CACHE(_type) class DenseCache<_type> +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CACHE); + + +} // namespace detail +} // namespace gko diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index e99a30bca88..2a4b7cfa1d4 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/prefix_sum_kernels.hpp" #include "core/components/reduce_array_kernels.hpp" #include "core/distributed/partition_kernels.hpp" +#include "core/distributed/vector_kernels.hpp" #include "core/factorization/cholesky_kernels.hpp" #include "core/factorization/factorization_kernels.hpp" #include "core/factorization/ic_kernels.hpp" @@ -143,6 +144,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(_macro) +#define GKO_STUB_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE(_macro) \ + template \ + _macro(ValueType, LocalIndexType, GlobalIndexType) \ + GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ + GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE(_macro) + #define GKO_STUB_TEMPLATE_TYPE(_macro) \ template \ _macro(IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ @@ -242,6 +250,16 @@ GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED); } // namespace partition +namespace distributed_vector { + + +GKO_STUB_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); + + +} + + namespace dense { @@ -263,6 +281,8 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_CONVERT_TO_CSR_KERNEL); diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp new file mode 100644 index 00000000000..322676c6ec0 --- /dev/null +++ b/core/distributed/vector.cpp @@ -0,0 +1,475 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include "core/distributed/vector_kernels.hpp" +#include "core/matrix/dense_kernels.hpp" + + +namespace gko { +namespace distributed { +namespace vector { +namespace { + + +GKO_REGISTER_OPERATION(compute_squared_norm2, dense::compute_squared_norm2); +GKO_REGISTER_OPERATION(compute_sqrt, dense::compute_sqrt); +GKO_REGISTER_OPERATION(outplace_absolute_dense, dense::outplace_absolute_dense); +GKO_REGISTER_OPERATION(build_local, distributed_vector::build_local); + + +} // namespace +} // namespace vector + + +dim<2> compute_global_size(mpi::communicator comm, dim<2> local_size) +{ + size_type num_global_rows = local_size[0]; + comm.all_reduce(&num_global_rows, 1, MPI_SUM); + return {num_global_rows, local_size[1]}; +} + + +template +void Vector::apply_impl(const LinOp* b, LinOp* x) const +{ + GKO_NOT_SUPPORTED(this); +} + + +template +void Vector::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const +{ + GKO_NOT_SUPPORTED(this); +} + +template +Vector::Vector(std::shared_ptr exec, + mpi::communicator comm, dim<2> global_size, + dim<2> local_size) + : Vector(exec, comm, global_size, local_size, local_size[1]) +{} + + +template +Vector::Vector(std::shared_ptr exec, + mpi::communicator comm, dim<2> global_size, + dim<2> local_size, size_type stride) + : EnableLinOp>{exec, global_size}, + DistributedBase{comm}, + local_{exec, local_size, stride} +{ + GKO_ASSERT_EQUAL_COLS(global_size, local_size); +} + +template +Vector::Vector(std::shared_ptr exec, + mpi::communicator comm, dim<2> global_size, + local_vector_type* local_vector) + : EnableLinOp>{exec, global_size}, + DistributedBase{comm}, + local_{exec} +{ + local_vector->move_to(&local_); +} + + +template +Vector::Vector(std::shared_ptr exec, + mpi::communicator comm, + local_vector_type* local_vector) + : EnableLinOp>{exec, {}}, + DistributedBase{comm}, + local_{exec} +{ + this->set_size(compute_global_size(comm, local_vector->get_size())); + local_vector->move_to(&local_); +} + + +template +template +void Vector::read_distributed( + const device_matrix_data& data, + const Partition* partition) +{ + auto exec = this->get_executor(); + auto global_cols = data.get_size()[1]; + this->resize( + dim<2>(partition->get_size(), global_cols), + dim<2>(partition->get_part_size(this->get_communicator().rank()), + global_cols)); + + auto rank = this->get_communicator().rank(); + local_.fill(zero()); + exec->run(vector::make_build_local( + data, make_temporary_clone(exec, partition).get(), rank, &local_)); +} + + +template +template +void Vector::read_distributed( + const matrix_data& data, + const Partition* partition) + +{ + this->read_distributed( + device_matrix_data::create_from_host( + this->get_executor(), data), + std::move(partition)); +} + + +template +void Vector::fill(const ValueType value) +{ + local_.fill(value); +} + + +template +void Vector::convert_to( + Vector>* result) const +{ + GKO_ASSERT(this->get_communicator().size() == + result->get_communicator().size()); + result->set_size(this->get_size()); + this->get_local_vector()->convert_to(&result->local_); +} + + +template +void Vector::move_to(Vector>* result) +{ + this->convert_to(result); +} + + +template +std::unique_ptr::absolute_type> +Vector::compute_absolute() const +{ + auto exec = this->get_executor(); + + auto result = + absolute_type::create(exec, this->get_communicator(), this->get_size(), + this->get_local_vector()->get_size()); + + exec->run(vector::make_outplace_absolute_dense(this->get_local_vector(), + &result->local_)); + + return result; +} + + +template +void Vector::compute_absolute_inplace() +{ + local_.compute_absolute_inplace(); +} + + +template +const typename Vector::local_vector_type* +Vector::get_local_vector() const +{ + return &local_; +} + + +template +std::unique_ptr::complex_type> +Vector::make_complex() const +{ + auto result = complex_type::create( + this->get_executor(), this->get_communicator(), this->get_size(), + this->get_local_vector()->get_size(), + this->get_local_vector()->get_stride()); + this->make_complex(result.get()); + return result; +} + + +template +void Vector::make_complex(Vector::complex_type* result) const +{ + this->get_local_vector()->make_complex(&result->local_); +} + + +template +std::unique_ptr::real_type> +Vector::get_real() const +{ + auto result = real_type::create(this->get_executor(), + this->get_communicator(), this->get_size(), + this->get_local_vector()->get_size(), + this->get_local_vector()->get_stride()); + this->get_real(result.get()); + return result; +} + + +template +void Vector::get_real(Vector::real_type* result) const +{ + this->get_local_vector()->get_real(&result->local_); +} + + +template +std::unique_ptr::real_type> +Vector::get_imag() const +{ + auto result = real_type::create(this->get_executor(), + this->get_communicator(), this->get_size(), + this->get_local_vector()->get_size(), + this->get_local_vector()->get_stride()); + this->get_imag(result.get()); + return result; +} + + +template +void Vector::get_imag(Vector::real_type* result) const +{ + this->get_local_vector()->get_imag(&result->local_); +} + + +template +void Vector::scale(const LinOp* alpha) +{ + local_.scale(alpha); +} + + +template +void Vector::inv_scale(const LinOp* alpha) +{ + local_.inv_scale(alpha); +} + + +template +void Vector::add_scaled(const LinOp* alpha, const LinOp* b) +{ + auto dense_b = as>(b); + local_.add_scaled(alpha, dense_b->get_local_vector()); +} + + +template +void Vector::sub_scaled(const LinOp* alpha, const LinOp* b) +{ + auto dense_b = as>(b); + local_.sub_scaled(alpha, dense_b->get_local_vector()); +} + + +template +void Vector::compute_dot(const LinOp* b, LinOp* result) const +{ + auto exec = this->get_executor(); + const auto comm = this->get_communicator(); + auto dense_res = + make_temporary_clone(exec, as>(result)); + this->get_local_vector()->compute_dot(as(b)->get_local_vector(), + dense_res.get()); + exec->synchronize(); + auto use_host_buffer = + exec->get_master() != exec && !gko::mpi::is_gpu_aware(); + if (use_host_buffer) { + host_reduction_buffer_.init(exec->get_master(), dense_res->get_size()); + host_reduction_buffer_->copy_from(dense_res.get()); + comm.all_reduce(host_reduction_buffer_->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + dense_res->copy_from(host_reduction_buffer_.get()); + } else { + comm.all_reduce(dense_res->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + } +} + + +template +void Vector::compute_conj_dot(const LinOp* b, LinOp* result) const +{ + auto exec = this->get_executor(); + const auto comm = this->get_communicator(); + auto dense_res = + make_temporary_clone(exec, as>(result)); + this->get_local_vector()->compute_conj_dot( + as(b)->get_local_vector(), dense_res.get()); + exec->synchronize(); + auto use_host_buffer = + exec->get_master() != exec && !gko::mpi::is_gpu_aware(); + if (use_host_buffer) { + host_reduction_buffer_.init(exec->get_master(), dense_res->get_size()); + host_reduction_buffer_->copy_from(dense_res.get()); + comm.all_reduce(host_reduction_buffer_->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + dense_res->copy_from(host_reduction_buffer_.get()); + } else { + comm.all_reduce(dense_res->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + } +} + + +template +void Vector::compute_norm2(LinOp* result) const +{ + using NormVector = typename local_vector_type::absolute_type; + GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); + auto exec = this->get_executor(); + const auto comm = this->get_communicator(); + auto dense_res = make_temporary_clone(exec, as(result)); + exec->run(vector::make_compute_squared_norm2(this->get_local_vector(), + dense_res.get())); + exec->synchronize(); + auto use_host_buffer = + exec->get_master() != exec && !gko::mpi::is_gpu_aware(); + if (use_host_buffer) { + host_norm_buffer_.init(exec->get_master(), dense_res->get_size()); + host_norm_buffer_->copy_from(dense_res.get()); + comm.all_reduce(host_norm_buffer_->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + dense_res->copy_from(host_norm_buffer_.get()); + } else { + comm.all_reduce(dense_res->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + } + exec->run(vector::make_compute_sqrt(dense_res.get())); +} + + +template +void Vector::compute_norm1(LinOp* result) const +{ + using NormVector = typename local_vector_type::absolute_type; + GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); + auto exec = this->get_executor(); + const auto comm = this->get_communicator(); + auto dense_res = make_temporary_clone(exec, as(result)); + this->get_local_vector()->compute_norm1(dense_res.get()); + exec->synchronize(); + auto use_host_buffer = + exec->get_master() != exec && !gko::mpi::is_gpu_aware(); + if (use_host_buffer) { + host_norm_buffer_.init(exec->get_master(), dense_res->get_size()); + host_norm_buffer_->copy_from(dense_res.get()); + comm.all_reduce(host_norm_buffer_->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + dense_res->copy_from(host_norm_buffer_.get()); + } else { + comm.all_reduce(dense_res->get_values(), + static_cast(this->get_size()[1]), MPI_SUM); + } +} + + +template +ValueType& Vector::at_local(size_type row, size_type col) noexcept +{ + return local_.at(row, col); +} + +template +ValueType Vector::at_local(size_type row, size_type col) const + noexcept +{ + return local_.at(row, col); +} + +template +ValueType& Vector::at_local(size_type idx) noexcept +{ + return local_.at(idx); +} + +template +ValueType Vector::at_local(size_type idx) const noexcept +{ + return local_.at(idx); +} + + +template +void Vector::resize(dim<2> global_size, dim<2> local_size) +{ + if (this->get_size() != global_size) { + this->set_size(global_size); + } + local_.resize(local_size); +} + + +template +std::unique_ptr::real_type> +Vector::create_real_view() const +{ + const auto num_global_rows = this->get_size()[0]; + const auto num_cols = + is_complex() ? 2 * this->get_size()[1] : this->get_size()[1]; + + return real_type::create(this->get_executor(), this->get_communicator(), + dim<2>{num_global_rows, num_cols}, + const_cast( + local_.create_real_view().get())); +} + + +#define GKO_DECLARE_DISTRIBUTED_VECTOR(ValueType) class Vector +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DISTRIBUTED_VECTOR); + + +#define GKO_DECLARE_DISTRIBUTED_VECTOR_READ_DISTRIBUTED( \ + ValueType, LocalIndexType, GlobalIndexType) \ + void Vector::read_distributed( \ + const device_matrix_data& data, \ + const Partition* partition); \ + template void \ + Vector::read_distributed( \ + const matrix_data& data, \ + const Partition* partition) + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_READ_DISTRIBUTED); + + +} // namespace distributed +} // namespace gko diff --git a/core/distributed/vector_kernels.hpp b/core/distributed/vector_kernels.hpp new file mode 100644 index 00000000000..8965265259d --- /dev/null +++ b/core/distributed/vector_kernels.hpp @@ -0,0 +1,81 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_DISTRIBUTED_VECTOR_KERNELS_HPP_ +#define GKO_CORE_DISTRIBUTED_VECTOR_KERNELS_HPP_ + + +// can't include ginkgo/core/distributed/vector.hpp since that requires linking +// against MPI +#include +#include +#include +#include + + +#include "core/base/kernel_declaration.hpp" + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL(ValueType, LocalIndexType, \ + GlobalIndexType) \ + void build_local( \ + std::shared_ptr exec, \ + const device_matrix_data& input, \ + const distributed::Partition* \ + partition, \ + comm_index_type local_part, matrix::Dense* local_mtx) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + using comm_index_type = distributed::comm_index_type; \ + template \ + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL(ValueType, LocalIndexType, \ + GlobalIndexType) + + +GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(distributed_vector, + GKO_DECLARE_ALL_AS_TEMPLATES); + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_DISTRIBUTED_VECTOR_KERNELS_HPP_ diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 4c1027421b7..2969fa4fec1 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -80,6 +80,8 @@ GKO_REGISTER_OPERATION(compute_dot, dense::compute_dot_dispatch); GKO_REGISTER_OPERATION(compute_conj_dot, dense::compute_conj_dot_dispatch); GKO_REGISTER_OPERATION(compute_norm2, dense::compute_norm2_dispatch); GKO_REGISTER_OPERATION(compute_norm1, dense::compute_norm1); +GKO_REGISTER_OPERATION(compute_squared_norm2, dense::compute_squared_norm2); +GKO_REGISTER_OPERATION(compute_sqrt, dense::compute_sqrt); GKO_REGISTER_OPERATION(compute_max_nnz_per_row, dense::compute_max_nnz_per_row); GKO_REGISTER_OPERATION(compute_hybrid_coo_row_ptrs, hybrid::compute_coo_row_ptrs); diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index f6041ece443..04ee70bea3f 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -151,6 +151,15 @@ namespace kernels { const device_matrix_data<_type, _prec>& data, \ matrix::Dense<_type>* output) +#define GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL(_type) \ + void compute_squared_norm2(std::shared_ptr exec, \ + const matrix::Dense<_type>* x, \ + matrix::Dense>* result) + +#define GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL(_type) \ + void compute_sqrt(std::shared_ptr exec, \ + matrix::Dense<_type>* data) + #define GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL(_type, _prec) \ void convert_to_coo(std::shared_ptr exec, \ const matrix::Dense<_type>* source, \ @@ -341,6 +350,10 @@ namespace kernels { GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL(ValueType); \ template \ GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL(ValueType); \ + template \ + GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL(ValueType); \ template \ GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL(ValueType, IndexType); \ template \ diff --git a/core/test/base/CMakeLists.txt b/core/test/base/CMakeLists.txt index 6f8c7291165..f200d0eb583 100644 --- a/core/test/base/CMakeLists.txt +++ b/core/test/base/CMakeLists.txt @@ -1,6 +1,7 @@ ginkgo_create_test(abstract_factory) ginkgo_create_test(allocator) ginkgo_create_test(array) +ginkgo_create_test(dense_cache) ginkgo_create_test(combination) ginkgo_create_test(composition) ginkgo_create_test(dim) diff --git a/core/test/base/dense_cache.cpp b/core/test/base/dense_cache.cpp new file mode 100644 index 00000000000..41bac8c01c6 --- /dev/null +++ b/core/test/base/dense_cache.cpp @@ -0,0 +1,229 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class DenseCache : public ::testing::Test { +protected: + using value_type = ValueType; + + DenseCache() {} + + void SetUp() { ref = gko::ReferenceExecutor::create(); } + + void TearDown() {} + + void gen_cache(gko::dim<2> size) { cache.init(ref, size); } + + std::shared_ptr ref; + gko::detail::DenseCache cache; +}; + + +TYPED_TEST_SUITE(DenseCache, gko::test::ValueTypes, TypenameNameGenerator); + + +TYPED_TEST(DenseCache, CanDefaultConstruct) +{ + using value_type = typename TestFixture::value_type; + gko::detail::DenseCache cache; + + ASSERT_EQ(cache.get(), nullptr); +} + + +TYPED_TEST(DenseCache, CanInitWithSize) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{4, 7}; + + this->cache.init(this->ref, size); + + ASSERT_NE(this->cache.get(), nullptr); + GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), size); + ASSERT_EQ(this->cache->get_executor(), this->ref); +} + + +TYPED_TEST(DenseCache, SecondInitWithSameSizeIsNoOp) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{4, 7}; + this->cache.init(this->ref, size); + auto first_ptr = this->cache.get(); + + this->cache.init(this->ref, size); + + ASSERT_NE(this->cache.get(), nullptr); + ASSERT_EQ(first_ptr, this->cache.get()); +} + + +TYPED_TEST(DenseCache, SecondInitWithDifferentSizeInitializes) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{4, 7}; + gko::dim<2> second_size{7, 4}; + this->cache.init(this->ref, size); + auto first_ptr = this->cache.get(); + + this->cache.init(this->ref, second_size); + + ASSERT_NE(this->cache.get(), nullptr); + ASSERT_NE(first_ptr, this->cache.get()); +} + + +TYPED_TEST(DenseCache, CanInitFromDense) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{5, 2}; + auto dense = gko::matrix::Dense::create(this->ref, size); + + this->cache.init_from(dense.get()); + + ASSERT_NE(this->cache.get(), nullptr); + GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), size); + ASSERT_EQ(this->cache->get_executor(), dense->get_executor()); +} + + +TYPED_TEST(DenseCache, SecondInitFromSameDenseIsNoOp) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{4, 7}; + auto dense = gko::matrix::Dense::create(this->ref, size); + this->cache.init_from(dense.get()); + auto first_ptr = this->cache.get(); + + this->cache.init_from(dense.get()); + + ASSERT_NE(this->cache.get(), nullptr); + ASSERT_EQ(first_ptr, this->cache.get()); +} + + +TYPED_TEST(DenseCache, SecondInitFromDifferentDenseWithSameSizeIsNoOp) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{4, 7}; + auto first_dense = gko::matrix::Dense::create(this->ref, size); + auto second_dense = gko::matrix::Dense::create(this->ref, size); + this->cache.init_from(first_dense.get()); + auto first_ptr = this->cache.get(); + + this->cache.init_from(second_dense.get()); + + ASSERT_NE(this->cache.get(), nullptr); + ASSERT_EQ(first_ptr, this->cache.get()); +} + + +TYPED_TEST(DenseCache, SecondInitFromDifferentDenseWithDifferentSizeInitializes) +{ + using value_type = typename TestFixture::value_type; + gko::dim<2> size{4, 7}; + gko::dim<2> second_size{7, 4}; + auto first_dense = gko::matrix::Dense::create(this->ref, size); + auto second_dense = + gko::matrix::Dense::create(this->ref, second_size); + this->cache.init_from(first_dense.get()); + auto first_ptr = this->cache.get(); + + this->cache.init_from(second_dense.get()); + + ASSERT_NE(this->cache.get(), nullptr); + ASSERT_NE(first_ptr, this->cache.get()); +} + + +TYPED_TEST(DenseCache, VectorIsNotCopied) +{ + using value_type = typename TestFixture::value_type; + this->gen_cache({1, 1}); + gko::detail::DenseCache cache(this->cache); + + ASSERT_EQ(cache.get(), nullptr); + GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), gko::dim<2>(1, 1)); +} + + +TYPED_TEST(DenseCache, VectorIsNotMoved) +{ + using value_type = typename TestFixture::value_type; + this->gen_cache({1, 1}); + gko::detail::DenseCache cache(std::move(this->cache)); + + ASSERT_EQ(cache.get(), nullptr); + GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), gko::dim<2>(1, 1)); +} + + +TYPED_TEST(DenseCache, VectorIsNotCopyAssigned) +{ + using value_type = typename TestFixture::value_type; + this->gen_cache({1, 1}); + gko::detail::DenseCache cache; + cache = this->cache; + + ASSERT_EQ(cache.get(), nullptr); + GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), gko::dim<2>(1, 1)); +} + + +TYPED_TEST(DenseCache, VectorIsNotMoveAssigned) +{ + using value_type = typename TestFixture::value_type; + this->gen_cache({1, 1}); + gko::detail::DenseCache cache; + cache = std::move(this->cache); + + ASSERT_EQ(cache.get(), nullptr); + GKO_ASSERT_EQUAL_DIMENSIONS(this->cache->get_size(), gko::dim<2>(1, 1)); +} + + +} // namespace diff --git a/core/test/mpi/CMakeLists.txt b/core/test/mpi/CMakeLists.txt index 8edc6781c4e..1ad6a5575b2 100644 --- a/core/test/mpi/CMakeLists.txt +++ b/core/test/mpi/CMakeLists.txt @@ -1,7 +1 @@ -add_library(gtest_mpi_main "") -target_sources(gtest_mpi_main - PRIVATE - gtest/mpi_listener.cpp) -find_package(MPI REQUIRED) -target_link_libraries(gtest_mpi_main PRIVATE GTest::GTest MPI::MPI_CXX) add_subdirectory(base) diff --git a/core/test/utils.hpp b/core/test/utils.hpp index b37f6981e7c..3508ae9f2ee 100644 --- a/core/test/utils.hpp +++ b/core/test/utils.hpp @@ -184,6 +184,30 @@ using TwoValueIndexType = #endif +using ValueLocalGlobalIndexTypes = +#if GINKGO_DPCPP_SINGLE_MODE + ::testing::Types, + std::tuple, + std::tuple, + std::tuple, gko::int32, int32>, + std::tuple, gko::int32, int64>, + std::tuple, gko::int64, int64>>; +#else + ::testing::Types, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, gko::int32, int32>, + std::tuple, gko::int32, int64>, + std::tuple, gko::int64, int64>, + std::tuple, gko::int32, int32>, + std::tuple, gko::int32, int64>, + std::tuple, gko::int64, int64>>; +#endif + + template struct reduction_factor { using nc_output = remove_complex; diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index 031de0a3b2b..0c5ff299f8f 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -76,6 +76,7 @@ target_sources(ginkgo_cuda base/version.cpp components/prefix_sum_kernels.cu distributed/partition_kernels.cu + distributed/vector_kernels.cu factorization/cholesky_kernels.cu factorization/factorization_kernels.cu factorization/ic_kernels.cu diff --git a/cuda/distributed/vector_kernels.cu b/cuda/distributed/vector_kernels.cu new file mode 100644 index 00000000000..46d834ee0ca --- /dev/null +++ b/cuda/distributed/vector_kernels.cu @@ -0,0 +1,60 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/vector_kernels.hpp" + + +#include + + +namespace gko { +namespace kernels { +namespace cuda { +namespace distributed_vector { + + +template +void build_local( + std::shared_ptr exec, + const device_matrix_data& input, + const distributed::Partition* partition, + comm_index_type local_part, + matrix::Dense* local_mtx) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); + + +} // namespace distributed_vector +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index 8b9a5a50ca2..b1d92cd7c5e 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -21,6 +21,7 @@ target_sources(ginkgo_dpcpp base/version.dp.cpp components/prefix_sum_kernels.dp.cpp distributed/partition_kernels.dp.cpp + distributed/vector_kernels.dp.cpp factorization/cholesky_kernels.dp.cpp factorization/ic_kernels.dp.cpp factorization/ilu_kernels.dp.cpp diff --git a/dpcpp/distributed/vector_kernels.dp.cpp b/dpcpp/distributed/vector_kernels.dp.cpp new file mode 100644 index 00000000000..a51c9e22669 --- /dev/null +++ b/dpcpp/distributed/vector_kernels.dp.cpp @@ -0,0 +1,60 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/vector_kernels.hpp" + + +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { +namespace distributed_vector { + + +template +void build_local( + std::shared_ptr exec, + const device_matrix_data& input, + const distributed::Partition* partition, + comm_index_type local_part, + matrix::Dense* local_mtx) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); + + +} // namespace distributed_vector +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index dd4878222d0..d8ab165baa0 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -165,6 +165,7 @@ set(GINKGO_HIP_SOURCES base/version.hip.cpp components/prefix_sum_kernels.hip.cpp distributed/partition_kernels.hip.cpp + distributed/vector_kernels.hip.cpp factorization/cholesky_kernels.hip.cpp factorization/factorization_kernels.hip.cpp factorization/ic_kernels.hip.cpp diff --git a/hip/distributed/vector_kernels.hip.cpp b/hip/distributed/vector_kernels.hip.cpp new file mode 100644 index 00000000000..1133317e4e4 --- /dev/null +++ b/hip/distributed/vector_kernels.hip.cpp @@ -0,0 +1,60 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/vector_kernels.hpp" + + +#include + + +namespace gko { +namespace kernels { +namespace hip { +namespace distributed_vector { + + +template +void build_local( + std::shared_ptr exec, + const device_matrix_data& input, + const distributed::Partition* partition, + comm_index_type local_part, + matrix::Dense* local_mtx) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); + + +} // namespace distributed_vector +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/include/ginkgo/config.hpp.in b/include/ginkgo/config.hpp.in index 198f465d4d0..f56605ecd05 100644 --- a/include/ginkgo/config.hpp.in +++ b/include/ginkgo/config.hpp.in @@ -87,6 +87,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #cmakedefine01 GINKGO_BUILD_MPI // clang-format on +/* Is the MPI implementation GPU aware? */ +// clang-format off +#cmakedefine01 GINKGO_HAVE_GPU_AWARE_MPI +// clang-format on + /* Is HWLOC available ? */ // clang-format off diff --git a/include/ginkgo/core/base/dense_cache.hpp b/include/ginkgo/core/base/dense_cache.hpp new file mode 100644 index 00000000000..ea2a29ddf3c --- /dev/null +++ b/include/ginkgo/core/base/dense_cache.hpp @@ -0,0 +1,126 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_BASE_DENSE_CACHE_HPP_ +#define GKO_PUBLIC_CORE_BASE_DENSE_CACHE_HPP_ + + +#include + + +#include + + +namespace gko { +namespace matrix { + + +template +class Dense; + + +} + + +namespace detail { + + +/** + * Manages a Dense vector that is buffered and reused internally to avoid + * repeated allocations. Copying an instance will only yield an empty object + * since copying the cached vector would not make sense. The stored object is + * always mutable, so the cache can be used in a const-context. + * + * @internal The struct is present to wrap cache-like buffer storage that will + * not be copied when the outer object gets copied. + */ +template +struct DenseCache { + DenseCache() = default; + ~DenseCache() = default; + DenseCache(const DenseCache&) {} + DenseCache(DenseCache&&) noexcept {} + DenseCache& operator=(const DenseCache&) { return *this; } + DenseCache& operator=(DenseCache&&) noexcept { return *this; } + mutable std::unique_ptr> vec{}; + + + /** + * Initializes the buffered vector with the same configuration as the + * template vector, if + * - the current vector is null, + * - the sizes of the buffered and template vector differ, + * - the executor of the buffered and template vector differ. + * + * @note This does not copy any data from the template vector. + * + * @param template_vec Defines the configuration (executor, size, stride) + * of the buffered vector. + */ + void init_from(const matrix::Dense* template_vec) const; + + /** + * Initializes the buffered vector, if + * - the current vector is null, + * - the sizes differ, + * - the executor differs. + * + * @param exec Executor of the buffered vector. + * @param size Size of the buffered vector. + */ + void init(std::shared_ptr exec, dim<2> size) const; + + /** + * Reference access to the underlying vector. + * @return Reference to the stored vector. + */ + matrix::Dense& operator*() const { return *vec; } + + /** + * Pointer access to the underlying vector. + * @return Pointer to the stored vector. + */ + matrix::Dense* operator->() const { return vec.get(); } + + /** + * Pointer access to the underlying vector. + * @return Pointer to the stored vector. + */ + matrix::Dense* get() const { return vec.get(); } +}; + + +} // namespace detail +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_BASE_DENSE_CACHE_HPP_ diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp index 4d6df9544ab..10b8d826d63 100644 --- a/include/ginkgo/core/base/mpi.hpp +++ b/include/ginkgo/core/base/mpi.hpp @@ -58,6 +58,19 @@ namespace gko { namespace mpi { +/** + * Return if GPU aware functionality is available + */ +static constexpr bool is_gpu_aware() +{ +#if GINKGO_HAVE_GPU_AWARE_MPI + return true; +#else + return false; +#endif +} + + #define GKO_REGISTER_MPI_TYPE(input_type, mpi_type) \ template <> \ struct type_impl { \ diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index 009c2e182cd..be421de3cf6 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -152,7 +152,7 @@ using uint64 = std::uint64_t; /** - * + * Unsigned integer type capable of holding a pointer to void */ using uintptr = std::uintptr_t; @@ -559,6 +559,73 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x, #endif +/** + * Instantiates a template for each non-complex value, local and global index + * type compiled by Ginkgo. + * + * @param _macro A macro which expands the template instantiation + * (not including the leading `template` specifier). + * Should take three arguments, which are replaced by the + * value, the local and the global index types. + */ +#if GINKGO_DPCPP_SINGLE_MODE +#define GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( \ + _macro) \ + template _macro(float, int32, int32); \ + template _macro(float, int32, int64); \ + template _macro(float, int64, int64); \ + template <> \ + _macro(double, int32, int32) GKO_NOT_IMPLEMENTED; \ + template <> \ + _macro(double, int32, int64) GKO_NOT_IMPLEMENTED; \ + template <> \ + _macro(double, int64, int64) GKO_NOT_IMPLEMENTED +#else +#define GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( \ + _macro) \ + template _macro(float, int32, int32); \ + template _macro(float, int32, int64); \ + template _macro(float, int64, int64); \ + template _macro(double, int32, int32); \ + template _macro(double, int32, int64); \ + template _macro(double, int64, int64) +#endif + + +/** + * Instantiates a template for each value and index type compiled by Ginkgo. + * + * @param _macro A macro which expands the template instantiation + * (not including the leading `template` specifier). + * Should take two arguments, which are replaced by the + * value and index types. + */ +#if GINKGO_DPCPP_SINGLE_MODE +#define GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE(_macro) \ + GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( \ + _macro); \ + template _macro(std::complex, int32, int32); \ + template _macro(std::complex, int32, int64); \ + template _macro(std::complex, int64, int64); \ + template <> \ + _macro(std::complex, int32, int32) GKO_NOT_IMPLEMENTED; \ + template <> \ + _macro(std::complex, int32, int64) GKO_NOT_IMPLEMENTED; \ + template <> \ + _macro(std::complex, int64, int64) GKO_NOT_IMPLEMENTED +#else +#define GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE(_macro) \ + GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( \ + _macro); \ + template _macro(std::complex, int32, int32); \ + template _macro(std::complex, int32, int64); \ + template _macro(std::complex, int64, int64); \ + template _macro(std::complex, int32, int32); \ + template _macro(std::complex, int32, int64); \ + template _macro(std::complex, int64, int64) +#endif + + #if GINKGO_DPCPP_SINGLE_MODE #define GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(_macro) \ template <> \ @@ -580,8 +647,6 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x, template <> \ _macro(std::complex, std::complex) GKO_NOT_IMPLEMENTED #else - - /** * Instantiates a template for each value type conversion pair compiled by * Ginkgo. diff --git a/include/ginkgo/core/distributed/base.hpp b/include/ginkgo/core/distributed/base.hpp new file mode 100644 index 00000000000..217b80d3fcc --- /dev/null +++ b/include/ginkgo/core/distributed/base.hpp @@ -0,0 +1,104 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_DISTRIBUTED_BASE_HPP_ +#define GKO_PUBLIC_CORE_DISTRIBUTED_BASE_HPP_ + + +#include + + +#if GINKGO_BUILD_MPI + + +#include + + +namespace gko { +namespace distributed { + + +/** + * A base class for distributed objects. + * + * This class stores and gives access to the used mpi::communicator object. + * + * @note The communicator is not changed on assignment. + * + * @ingroup distributed + */ +class DistributedBase { +public: + virtual ~DistributedBase() = default; + + DistributedBase(const DistributedBase& other) = default; + + DistributedBase(DistributedBase&& other) = default; + + /** + * Copy assignment that doesn't change the used mpi::communicator. + * @return unmodified *this + */ + DistributedBase& operator=(const DistributedBase&) { return *this; } + + /** + * Move assignment that doesn't change the used mpi::communicator. + * @return unmodified *this + */ + DistributedBase& operator=(DistributedBase&&) noexcept { return *this; } + + /** + * Access the used mpi::communicator. + * @return used mpi::communicator + */ + mpi::communicator get_communicator() const { return comm_; } + +protected: + /** + * Creates a new DistributedBase with the specified mpi::communicator. + * @param comm used mpi::communicator + */ + explicit DistributedBase(mpi::communicator comm) : comm_{std::move(comm)} {} + +private: + mpi::communicator comm_; +}; + + +} // namespace distributed +} // namespace gko + + +#endif // GINKGO_BUILD_MPI + + +#endif // GKO_PUBLIC_CORE_DISTRIBUTED_BASE_HPP_ diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp index e78ee6329c8..aad54da254b 100644 --- a/include/ginkgo/core/distributed/partition.hpp +++ b/include/ginkgo/core/distributed/partition.hpp @@ -93,6 +93,8 @@ namespace distributed { * exceed this index type's maximum value. * @tparam GlobalIndexType The index type used for the global indices. Needs * to be at least as large a type as LocalIndexType. + * + * @ingroup distributed */ template class Partition diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp new file mode 100644 index 00000000000..6f9c9ea44ff --- /dev/null +++ b/include/ginkgo/core/distributed/vector.hpp @@ -0,0 +1,424 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_DISTRIBUTED_VECTOR_HPP_ +#define GKO_PUBLIC_CORE_DISTRIBUTED_VECTOR_HPP_ + + +#include + + +#if GINKGO_BUILD_MPI + + +#include +#include +#include +#include +#include + + +namespace gko { +namespace distributed { + + +/** + * Vector is a format which explicitly stores (multiple) distributed column + * vectors in a dense storage format. + * + * The (multi-)vector is distributed by row, which is described by a @see + * Partition. The local vectors are stored using the @see Dense format. The + * vector should be filled using the read_distributed method, e.g. + * ``` + * auto part = Partition<...>::build_from_mapping(...); + * auto vector = Vector<...>::create(exec, comm); + * vector->read_distributed(matrix_data, part); + * ``` + * Using this approach the size of the global vectors, as well as the size of + * the local vectors, will be automatically inferred. It is possible to create a + * vector with specified global and local sizes and fill the local vectors using + * the accessor get_local_vector. + * + * @note Operations between two vectors (axpy, dot product, etc.) are only valid + * if both vectors where created using the same partition. + * + * @tparam ValueType The precision of vector elements. + * + * @ingroup dist_vector + * @ingroup distributed + */ +template +class Vector + : public EnableLinOp>, + public EnableCreateMethod>, + public ConvertibleTo>>, + public EnableAbsoluteComputation>>, + public DistributedBase { + friend class EnableCreateMethod>; + friend class EnablePolymorphicObject, LinOp>; + friend class Vector>; + friend class Vector>; + friend class Vector>; + +public: + using EnableLinOp::convert_to; + using EnableLinOp::move_to; + + using value_type = ValueType; + using absolute_type = remove_complex; + using real_type = absolute_type; + using complex_type = Vector>; + using local_vector_type = gko::matrix::Dense; + + /** + * Reads a vector from the device_matrix_data structure and a global row + * partition. + * + * The number of rows of the matrix data is ignored, only its number of + * columns is relevant. Both the number of local and global rows are + * inferred from the row partition. + * + * @note The matrix data can contain entries for rows other than those owned + * by the process. Entries for those rows are discarded. + * + * @param data The device_matrix_data structure + * @param partition The global row partition + */ + template + void read_distributed( + const device_matrix_data& data, + const Partition* partition); + + /** + * Reads a vector from the matrix_data structure and a global row + * partition. + * + * See @read_distributed + * + * @note For efficiency it is advised to use the device_matrix_data + * overload. + */ + template + void read_distributed( + const matrix_data& data, + const Partition* partition); + + void convert_to(Vector>* result) const override; + + void move_to(Vector>* result) override; + + std::unique_ptr compute_absolute() const override; + + void compute_absolute_inplace() override; + + /** + * Creates a complex copy of the original vectors. If the original vectors + * were real, the imaginary part of the result will be zero. + */ + std::unique_ptr make_complex() const; + + /** + * Writes a complex copy of the original vectors to given complex vectors. + * If the original vectors were real, the imaginary part of the result will + * be zero. + */ + void make_complex(complex_type* result) const; + + /** + * Creates new real vectors and extracts the real part of the original + * vectors into that. + */ + std::unique_ptr get_real() const; + + /** + * Extracts the real part of the original vectors into given real vectors. + */ + void get_real(real_type* result) const; + + /** + * Creates new real vectors and extracts the imaginary part of the + * original vectors into that. + */ + std::unique_ptr get_imag() const; + + /** + * Extracts the imaginary part of the original vectors into given real + * vectors. + */ + void get_imag(real_type* result) const; + + /** + * Fill the distributed vectors with a given value. + * + * @param value the value to be filled + */ + void fill(ValueType value); + + /** + * Scales the vectors with a scalar (aka: BLAS scal). + * + * @param alpha If alpha is 1x1 Dense matrx, the all vectors are scaled + * by alpha. If it is a Dense row vector of values, + * then i-th column vector is scaled with the i-th + * element of alpha (the number of columns of alpha has to + * match the number of vectors). + */ + void scale(const LinOp* alpha); + + /** + * Scales the vectors with the inverse of a scalar. + * + * @param alpha If alpha is 1x1 Dense matrix, the all vectors are scaled + * by 1 / alpha. If it is a Dense row vector of values, + * then i-th column vector is scaled with the inverse + * of the i-th element of alpha (the number of columns of + * alpha has to match the number of vectors). + */ + void inv_scale(const LinOp* alpha); + + /** + * Adds `b` scaled by `alpha` to the vectors (aka: BLAS axpy). + * + * @param alpha If alpha is 1x1 Dense matrix, the all vectors of b are + * scaled by alpha. If it is a Dense row vector of values, then i-th column + * vector of b is scaled with the i-th element of alpha (the number of + * columns of alpha has to match the number of vectors). + * @param b a (multi-)vector of the same dimension as this + */ + void add_scaled(const LinOp* alpha, const LinOp* b); + + /** + * Subtracts `b` scaled by `alpha` from the vectors (aka: BLAS axpy). + * + * @param alpha If alpha is 1x1 Dense matrix, the all vectors of b are + * scaled by alpha. If it is a Dense row vector of values, then i-th column + * vector of b is scaled with the i-th element of alpha (the number of c + * @param b a (multi-)vector of the same dimension as this + */ + void sub_scaled(const LinOp* alpha, const LinOp* b); + + /** + * Computes the column-wise dot product of this (multi-)vector and `b` using + * a global reduction. + * + * @param b a (multi-)vector of same dimension as this + * @param result a Dense row matrix, used to store the dot product + * (the number of column in result must match the number + * of columns of this) + */ + void compute_dot(const LinOp* b, LinOp* result) const; + + /** + * Computes the column-wise dot product of this (multi-)vector and `conj(b)` + * using a global reduction. + * + * @param b a (multi-)vector of same dimension as this + * @param result a Dense row matrix, used to store the dot product + * (the number of column in result must match the number + * of columns of this) + */ + void compute_conj_dot(const LinOp* b, LinOp* result) const; + + /** + * Computes the Euclidian (L^2) norm of this (multi-)vector using a global + * reduction. + * + * @param result a Dense row matrix, used to store the norm + * (the number of columns in result must match the number + * of columns of this) + */ + void compute_norm2(LinOp* result) const; + + /** + * Computes the column-wise (L^1) norm of this (multi-)vector. + * + * @param result a Dense row matrix, used to store the norm + * (the number of columns in result must match the number + * of columns of this) + */ + void compute_norm1(LinOp* result) const; + + /** + * Returns a single element of the multi-vector. + * + * @param row the local row of the requested element + * @param col the local column of the requested element + * + * @note the method has to be called on the same Executor the multi-vector + * is stored at (e.g. trying to call this method on a GPU multi-vector from + * the OMP results in a runtime error) + */ + value_type& at_local(size_type row, size_type col) noexcept; + + /** + * @copydoc Vector::at(size_type, size_type) + */ + value_type at_local(size_type row, size_type col) const noexcept; + + /** + * Returns a single element of the multi-vector. + * + * Useful for iterating across all elements of the multi-vector. + * However, it is less efficient than the two-parameter variant of this + * method. + * + * @param idx a linear index of the requested element + * (ignoring the stride) + * + * @note the method has to be called on the same Executor the matrix is + * stored at (e.g. trying to call this method on a GPU matrix from + * the OMP results in a runtime error) + */ + ValueType& at_local(size_type idx) noexcept; + + /** + * @copydoc Vector::at(size_type) + */ + ValueType at_local(size_type idx) const noexcept; + + /** + * Returns a pointer to the array of local values of the multi-vector. + * + * @return the pointer to the array of local values + */ + value_type* get_local_values(); + + /** + * @copydoc get_local_values() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const value_type* get_const_local_values(); + + /** + * Direct (read) access to the underlying local local_vector_type vectors. + * + * @return a constant pointer to the underlying local_vector_type vectors + */ + const local_vector_type* get_local_vector() const; + + /** + * Create a real view of the (potentially) complex original multi-vector. + * If the original vector is real, nothing changes. If the original vector + * is complex, the result is created by viewing the complex vector with as + * real with a reinterpret_cast with twice the number of columns and + * double the stride. + */ + std::unique_ptr create_real_view() const; + +protected: + /** + * Creates an empty distributed vector with a specified size + * + * @param exec Executor associated with vector + * @param comm Communicator associated with vector, the default is + * MPI_COMM_WORLD + * @param partition Partition of global rows + * @param global_size Global size of the vector + * @param local_size Processor-local size of the vector + * @param stride Stride of the local vector. + */ + Vector(std::shared_ptr exec, mpi::communicator comm, + dim<2> global_size, dim<2> local_size, size_type stride); + + /** + * Creates an empty distributed vector with a specified size + * + * @param exec Executor associated with vector + * @param comm Communicator associated with vector, the default is + * MPI_COMM_WORLD + * @param partition Partition of global rows + * @param global_size Global size of the vector + * @param local_size Processor-local size of the vector, uses local_size[1] + * as the stride + */ + explicit Vector(std::shared_ptr exec, + mpi::communicator comm = mpi::communicator(MPI_COMM_WORLD), + dim<2> global_size = {}, dim<2> local_size = {}); + + /** + * Creates a distributed vector from local vectors with a specified size. + * + * @note The data form the local_vector will be moved into the new + * distributed vector. This means, access to local_vector + * will be invalid after this call. + * + * @param exec Executor associated with this vector + * @param comm Communicator associated with this vector + * @param global_size The global size of the vector + * @param local_vector The underlying local vector, the data will be moved + * into this + */ + Vector(std::shared_ptr exec, mpi::communicator comm, + dim<2> global_size, local_vector_type* local_vector); + + /** + * Creates a distributed vector from local vectors. The global size will + * be deduced from the local sizes, which will incur a collective + * communication. + * + * @note The data form the local_vector will be moved into the new + * distributed vector. This means, access to local_vector + * will be invalid after this call. + * + * @param exec Executor associated with this vector + * @param comm Communicator associated with this vector + * @param local_vector The underlying local vector, the data will be moved + * into this + */ + Vector(std::shared_ptr exec, mpi::communicator comm, + local_vector_type* local_vector); + + void resize(dim<2> global_size, dim<2> local_size); + + void apply_impl(const LinOp*, LinOp*) const override; + + void apply_impl(const LinOp*, const LinOp*, const LinOp*, + LinOp*) const override; + +private: + local_vector_type local_; + ::gko::detail::DenseCache host_reduction_buffer_; + ::gko::detail::DenseCache> host_norm_buffer_; +}; + + +} // namespace distributed +} // namespace gko + + +#endif // GINKGO_BUILD_MPI + + +#endif // GKO_PUBLIC_CORE_DISTRIBUTED_VECTOR_HPP_ diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp index f29f00603f7..26d32b5f16c 100644 --- a/include/ginkgo/core/matrix/dense.hpp +++ b/include/ginkgo/core/matrix/dense.hpp @@ -47,6 +47,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { +namespace distributed { + + +template +class Vector; + + +} + + namespace matrix { @@ -137,6 +147,7 @@ class Dense friend class SparsityCsr; friend class SparsityCsr; friend class Dense>; + friend class distributed::Vector; public: using EnableLinOp::convert_to; diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index b29f78c21f3..519eb05069e 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -69,7 +70,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include +#include #include #include diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index 8bee4ccfa6d..6165a2624e0 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -8,6 +8,7 @@ target_sources(ginkgo_omp base/version.cpp components/prefix_sum_kernels.cpp distributed/partition_kernels.cpp + distributed/vector_kernels.cpp factorization/cholesky_kernels.cpp factorization/factorization_kernels.cpp factorization/ic_kernels.cpp diff --git a/omp/distributed/vector_kernels.cpp b/omp/distributed/vector_kernels.cpp new file mode 100644 index 00000000000..8aaba548912 --- /dev/null +++ b/omp/distributed/vector_kernels.cpp @@ -0,0 +1,98 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/vector_kernels.hpp" + + +#include "core/components/prefix_sum_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace omp { +namespace distributed_vector { + + +template +void build_local( + std::shared_ptr exec, + const device_matrix_data& input, + const distributed::Partition* partition, + comm_index_type local_part, matrix::Dense* local_mtx) +{ + auto row_idxs = input.get_const_row_idxs(); + auto col_idxs = input.get_const_col_idxs(); + auto values = input.get_const_values(); + auto range_bounds = partition->get_range_bounds(); + auto range_parts = partition->get_part_ids(); + auto range_starting_indices = partition->get_range_starting_indices(); + auto num_ranges = partition->get_num_ranges(); + + auto find_range = [range_bounds, num_ranges](GlobalIndexType idx, + size_type hint) { + if (range_bounds[hint] <= idx && idx < range_bounds[hint + 1]) { + return hint; + } else { + auto it = std::upper_bound(range_bounds + 1, + range_bounds + num_ranges + 1, idx); + return static_cast(std::distance(range_bounds + 1, it)); + } + }; + auto map_to_local = [range_bounds, range_starting_indices]( + GlobalIndexType idx, + size_type range_id) -> LocalIndexType { + return static_cast(idx - range_bounds[range_id]) + + range_starting_indices[range_id]; + }; + + size_type range_id_hint = 0; +#pragma omp parallel for firstprivate(range_id_hint) + for (size_type i = 0; i < input.get_num_elems(); ++i) { + auto range_id = find_range(row_idxs[i], range_id_hint); + range_id_hint = range_id; + auto part_id = range_parts[range_id]; + // skip non-local rows + if (part_id == local_part) { + local_mtx->at(map_to_local(row_idxs[i], range_id), + static_cast(col_idxs[i])) = values[i]; + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); + + +} // namespace distributed_vector +} // namespace omp +} // namespace kernels +} // namespace gko diff --git a/omp/test/CMakeLists.txt b/omp/test/CMakeLists.txt index cf7723a11f1..2ddf2808922 100644 --- a/omp/test/CMakeLists.txt +++ b/omp/test/CMakeLists.txt @@ -2,6 +2,7 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(base) add_subdirectory(components) +add_subdirectory(distributed) add_subdirectory(factorization) add_subdirectory(matrix) add_subdirectory(preconditioner) diff --git a/omp/test/distributed/CMakeLists.txt b/omp/test/distributed/CMakeLists.txt new file mode 100644 index 00000000000..61e5d60cb39 --- /dev/null +++ b/omp/test/distributed/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(vector_kernels) diff --git a/omp/test/distributed/vector_kernels.cpp b/omp/test/distributed/vector_kernels.cpp new file mode 100644 index 00000000000..5ee65cfb24c --- /dev/null +++ b/omp/test/distributed/vector_kernels.cpp @@ -0,0 +1,225 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include + + +#include +#include + + +#include +#include + + +#include "core/distributed/vector_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +using comm_index_type = gko::distributed::comm_index_type; + + +template +class Vector : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype( + ValueLocalGlobalIndexType())>::type; + using local_index_type = + typename std::tuple_element<1, decltype( + ValueLocalGlobalIndexType())>::type; + using global_index_type = + typename std::tuple_element<2, decltype( + ValueLocalGlobalIndexType())>::type; + using global_entry = gko::matrix_data_entry; + using mtx = gko::matrix::Dense; + + Vector() + : ref(gko::ReferenceExecutor::create()), + exec(gko::OmpExecutor::create()), + engine(42) + {} + + void validate( + const gko::distributed::Partition* + partition, + const gko::distributed::Partition* + d_partition, + gko::device_matrix_data input) + { + gko::device_matrix_data d_input{exec, + input}; + for (comm_index_type part = 0; part < partition->get_num_parts(); + ++part) { + auto num_rows = + static_cast(partition->get_part_size(part)); + auto output = + mtx::create(ref, gko::dim<2>{num_rows, input.get_size()[1]}); + output->fill(gko::zero()); + auto d_output = gko::clone(exec, output); + + gko::kernels::reference::distributed_vector::build_local( + ref, input, partition, part, output.get()); + gko::kernels::omp::distributed_vector::build_local( + exec, d_input, d_partition, part, d_output.get()); + + GKO_ASSERT_MTX_NEAR(output, d_output, 0); + } + } + + std::shared_ptr ref; + std::shared_ptr exec; + std::default_random_engine engine; +}; +template + +gko::device_matrix_data generate_random_matrix_data_array( + gko::size_type num_rows, gko::size_type num_cols, + NonzeroDistribution&& nonzero_dist, ValueDistribution&& value_dist, + Engine&& engine, std::shared_ptr exec) +{ + auto md = gko::test::generate_random_matrix_data( + num_rows, num_cols, std::forward(nonzero_dist), + std::forward(value_dist), + std::forward(engine)); + md.ensure_row_major_order(); + return gko::device_matrix_data::create_from_host(exec, + md); +} + +TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); + + +TYPED_TEST(Vector, BuildsLocalEmptyIsEquivalentToRef) +{ + using value_type = typename TestFixture::value_type; + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + gko::distributed::comm_index_type num_parts = 10; + auto mapping = + gko::test::generate_random_array( + 100, + std::uniform_int_distribution( + 0, num_parts - 1), + this->engine, this->ref); + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + auto d_partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + mapping, + num_parts); + + this->validate( + partition.get(), d_partition.get(), + gko::device_matrix_data{this->ref}); +} + + +TYPED_TEST(Vector, BuildsLocalSmallIsEquivalentToRef) +{ + using value_type = typename TestFixture::value_type; + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + gko::distributed::comm_index_type num_parts = 3; + gko::size_type num_rows = 10; + gko::size_type num_cols = 2; + auto mapping = + gko::test::generate_random_array( + num_rows, + std::uniform_int_distribution( + 0, num_parts - 1), + this->engine, this->ref); + auto input = + generate_random_matrix_data_array( + num_rows, num_cols, + std::uniform_int_distribution(0, + static_cast(num_cols - 1)), + std::uniform_real_distribution>(0, + 1), + this->engine, this->ref); + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + auto d_partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + mapping, + num_parts); + + this->validate(partition.get(), d_partition.get(), input); +} + + +TYPED_TEST(Vector, BuildsLocalIsEquivalentToRef) +{ + using value_type = typename TestFixture::value_type; + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + gko::distributed::comm_index_type num_parts = 13; + gko::size_type num_rows = 40; + gko::size_type num_cols = 67; + auto mapping = + gko::test::generate_random_array( + num_rows, + std::uniform_int_distribution( + 0, num_parts - 1), + this->engine, this->ref); + auto input = + generate_random_matrix_data_array( + num_rows, num_cols, + std::uniform_int_distribution(0, + static_cast(num_cols - 1)), + std::uniform_real_distribution>(0, + 1), + this->engine, this->ref); + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + auto d_partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->exec, + mapping, + num_parts); + + this->validate(partition.get(), d_partition.get(), input); +} + + +} // namespace diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index c3bcd42cf43..7e698303f3c 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -11,6 +11,7 @@ target_sources(ginkgo_reference components/precision_conversion_kernels.cpp components/prefix_sum_kernels.cpp distributed/partition_kernels.cpp + distributed/vector_kernels.cpp factorization/cholesky_kernels.cpp factorization/factorization_kernels.cpp factorization/ic_kernels.cpp diff --git a/reference/distributed/vector_kernels.cpp b/reference/distributed/vector_kernels.cpp new file mode 100644 index 00000000000..a818b73be04 --- /dev/null +++ b/reference/distributed/vector_kernels.cpp @@ -0,0 +1,97 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/distributed/vector_kernels.hpp" + + +#include "core/components/prefix_sum_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace reference { +namespace distributed_vector { + + +template +void build_local( + std::shared_ptr exec, + const device_matrix_data& input, + const distributed::Partition* partition, + comm_index_type local_part, matrix::Dense* local_mtx) +{ + auto row_idxs = input.get_const_row_idxs(); + auto col_idxs = input.get_const_col_idxs(); + auto values = input.get_const_values(); + auto range_bounds = partition->get_range_bounds(); + auto range_parts = partition->get_part_ids(); + auto range_starting_indices = partition->get_range_starting_indices(); + auto num_ranges = partition->get_num_ranges(); + + auto find_range = [range_bounds, num_ranges](GlobalIndexType idx, + size_type hint) { + if (range_bounds[hint] <= idx && idx < range_bounds[hint + 1]) { + return hint; + } else { + auto it = std::upper_bound(range_bounds + 1, + range_bounds + num_ranges + 1, idx); + return static_cast(std::distance(range_bounds + 1, it)); + } + }; + auto map_to_local = [range_bounds, range_starting_indices]( + GlobalIndexType idx, + size_type range_id) -> LocalIndexType { + return static_cast(idx - range_bounds[range_id]) + + range_starting_indices[range_id]; + }; + + size_type range_id_hint = 0; + for (size_type i = 0; i < input.get_num_elems(); ++i) { + auto range_id = find_range(row_idxs[i], range_id_hint); + range_id_hint = range_id; + auto part_id = range_parts[range_id]; + // skip non-local rows + if (part_id == local_part) { + local_mtx->at(map_to_local(row_idxs[i], range_id), + static_cast(col_idxs[i])) = values[i]; + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_DISTRIBUTED_VECTOR_BUILD_LOCAL); + + +} // namespace distributed_vector +} // namespace reference +} // namespace kernels +} // namespace gko diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index 7aa52786e0e..2b0bb6aa020 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -412,6 +412,40 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL); +template +void compute_squared_norm2(std::shared_ptr exec, + const matrix::Dense* x, + matrix::Dense>* result) +{ + for (size_type j = 0; j < x->get_size()[1]; ++j) { + result->at(0, j) = zero>(); + } + for (size_type i = 0; i < x->get_size()[0]; ++i) { + for (size_type j = 0; j < x->get_size()[1]; ++j) { + result->at(0, j) += squared_norm(x->at(i, j)); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL); + + +template +void compute_sqrt(std::shared_ptr exec, + matrix::Dense* data) +{ + for (size_type i = 0; i < data->get_size()[0]; ++i) { + for (size_type j = 0; j < data->get_size()[1]; ++j) { + data->at(i, j) = sqrt(data->at(i, j)); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL); + + template void convert_to_coo(std::shared_ptr exec, const matrix::Dense* source, const int64*, diff --git a/reference/test/distributed/CMakeLists.txt b/reference/test/distributed/CMakeLists.txt index 78a626512af..9dfe653db9c 100644 --- a/reference/test/distributed/CMakeLists.txt +++ b/reference/test/distributed/CMakeLists.txt @@ -1 +1,2 @@ ginkgo_create_test(partition_kernels) +ginkgo_create_test(vector_kernels) diff --git a/reference/test/distributed/vector_kernels.cpp b/reference/test/distributed/vector_kernels.cpp new file mode 100644 index 00000000000..53f4d60b2e8 --- /dev/null +++ b/reference/test/distributed/vector_kernels.cpp @@ -0,0 +1,156 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include + + +#include +#include + + +#include +#include + + +#include "core/distributed/vector_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +using comm_index_type = gko::distributed::comm_index_type; + + +template +class Vector : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype( + ValueLocalGlobalIndexType())>::type; + using local_index_type = + typename std::tuple_element<1, decltype( + ValueLocalGlobalIndexType())>::type; + using global_index_type = + typename std::tuple_element<2, decltype( + ValueLocalGlobalIndexType())>::type; + using mtx = gko::matrix::Dense; + + Vector() : ref(gko::ReferenceExecutor::create()) {} + + void validate( + const gko::dim<2> size, + const gko::distributed::Partition* + partition, + I input_rows, I input_cols, + I input_vals, I>> output_entries) + { + std::vector>> ref_outputs; + auto input = gko::device_matrix_data{ + ref, size, input_rows, input_cols, input_vals}; + for (auto entry : output_entries) { + ref_outputs.emplace_back(entry); + } + for (comm_index_type part = 0; part < partition->get_num_parts(); + ++part) { + auto num_rows = + static_cast(partition->get_part_size(part)); + auto output = mtx::create(ref, gko::dim<2>{num_rows, size[1]}); + output->fill(gko::zero()); + + gko::kernels::reference::distributed_vector::build_local( + ref, input, partition, part, output.get()); + + GKO_ASSERT_MTX_NEAR(output, ref_outputs[part], 0); + } + } + + std::shared_ptr ref; +}; + +TYPED_TEST_SUITE(Vector, gko::test::ValueLocalGlobalIndexTypes); + + +TYPED_TEST(Vector, BuildsLocalEmpty) +{ + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + gko::Array mapping{this->ref, {1, 0, 2, 2, 0, 1, 1, 2}}; + comm_index_type num_parts = 3; + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + + this->validate(gko::dim<2>{0, 0}, partition.get(), {}, {}, {}, + {{{}, {}}, {{}, {}, {}}, {{}, {}, {}}}); +} + + +TYPED_TEST(Vector, BuildsLocalSmall) +{ + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + gko::Array mapping{this->ref, {1, 0}}; + comm_index_type num_parts = 2; + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + + this->validate(gko::dim<2>{2, 2}, partition.get(), {0, 0, 1, 1}, + {0, 1, 0, 1}, {1, 2, 3, 4}, {{{3, 4}}, {{1, 2}}}); +} + + +TYPED_TEST(Vector, BuildsLocal) +{ + using local_index_type = typename TestFixture::local_index_type; + using global_index_type = typename TestFixture::global_index_type; + gko::Array mapping{this->ref, {1, 2, 0, 0, 2, 1}}; + comm_index_type num_parts = 3; + auto partition = gko::distributed::Partition< + local_index_type, global_index_type>::build_from_mapping(this->ref, + mapping, + num_parts); + + this->validate(gko::dim<2>{6, 8}, partition.get(), {0, 0, 1, 1, 2, 3, 4, 5}, + {0, 1, 2, 3, 4, 5, 6, 7}, {1, 2, 3, 4, 5, 6, 7, 8}, + {{{0, 0, 0, 0, 5, 0, 0, 0}, {0, 0, 0, 0, 0, 6, 0, 0}}, + {{1, 2, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 8}}, + {{0, 0, 3, 4, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 7, 0}}}); +} + + +} // namespace diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index d699a79d85b..ecb3f229aa4 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -614,13 +614,14 @@ TYPED_TEST(Dense, ComputesNorm2) TYPED_TEST(Dense, ComputesNorm2Mixed) { + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; using MixedMtx = typename TestFixture::MixedMtx; using MixedT = typename MixedMtx::value_type; using MixedT_nc = gko::remove_complex; using MixedNormVector = gko::matrix::Dense; - auto mtx(gko::initialize( - {I{1.0, 0.0}, I{2.0, 3.0}, I{2.0, 4.0}}, - this->exec)); + auto mtx(gko::initialize( + {I{1.0, 0.0}, I{2.0, 3.0}, I{2.0, 4.0}}, this->exec)); auto result = MixedNormVector::create(this->exec, gko::dim<2>{1, 2}); mtx->compute_norm2(result.get()); @@ -630,6 +631,40 @@ TYPED_TEST(Dense, ComputesNorm2Mixed) } +TYPED_TEST(Dense, ComputesNorm2Squared) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using T_nc = gko::remove_complex; + using NormVector = gko::matrix::Dense; + auto mtx(gko::initialize( + {I{1.0, 0.0}, I{2.0, 3.0}, I{2.0, 4.0}}, this->exec)); + auto result = NormVector::create(this->exec, gko::dim<2>{1, 2}); + + gko::kernels::reference::dense::compute_squared_norm2( + gko::as(this->exec), mtx.get(), result.get()); + + EXPECT_EQ(result->at(0, 0), T_nc{9.0}); + EXPECT_EQ(result->at(0, 1), T_nc{25.0}); +} + + +TYPED_TEST(Dense, ComputesSqrt) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using T_nc = gko::remove_complex; + using NormVector = gko::matrix::Dense; + auto mtx(gko::initialize(I>{{9.0, 25.0}}, this->exec)); + + gko::kernels::reference::dense::compute_sqrt( + gko::as(this->exec), mtx.get()); + + EXPECT_EQ(mtx->at(0, 0), T_nc{3.0}); + EXPECT_EQ(mtx->at(0, 1), T_nc{5.0}); +} + + TYPED_TEST(Dense, ComputesNorm1) { using Mtx = typename TestFixture::Mtx; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d10b51222ca..d2228362fae 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -5,5 +5,8 @@ add_subdirectory(components) add_subdirectory(distributed) add_subdirectory(factorization) add_subdirectory(matrix) +if(GINKGO_BUILD_MPI) + add_subdirectory(mpi) +endif() add_subdirectory(multigrid) add_subdirectory(solver) diff --git a/test/base/device_matrix_data_kernels.cpp b/test/base/device_matrix_data_kernels.cpp index 3d55ae021a6..fd106a84231 100644 --- a/test/base/device_matrix_data_kernels.cpp +++ b/test/base/device_matrix_data_kernels.cpp @@ -65,7 +65,7 @@ class DeviceMatrixData : public ::testing::Test { DeviceMatrixData() : rand{82754} {} - void SetUp() + void SetUp() override { init_executor(gko::ReferenceExecutor::create(), exec); host_data.size = {100, 200}; @@ -115,7 +115,7 @@ class DeviceMatrixData : public ::testing::Test { deduplicated_data.sum_duplicates(); } - void TearDown() + void TearDown() override { if (exec != nullptr) { ASSERT_NO_THROW(exec->synchronize()); diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp index c4e86ea1dfc..0c07a25f532 100644 --- a/test/matrix/dense_kernels.cpp +++ b/test/matrix/dense_kernels.cpp @@ -1104,7 +1104,7 @@ TEST_F(Dense, ComputeNorm1IsEquivalentToRef) auto norm_size = gko::dim<2>{1, x->get_size()[1]}; auto norm_expected = NormVector::create(ref, norm_size); - auto dnorm = NormVector::create(ref, norm_size); + auto dnorm = NormVector::create(exec, norm_size); // all parameters are on ref to check cross-executor calls x->compute_norm1(norm_expected.get()); @@ -1332,4 +1332,35 @@ TEST_F(Dense, AddScaledIdentityToNonSquareOnDifferentExecutor) } +TEST_F(Dense, ComputeNorm2SquaredIsEquivalentToRef) +{ + set_up_apply_data(); + auto norm_size = gko::dim<2>{1, x->get_size()[1]}; + auto norm_expected = NormVector::create(ref, norm_size); + auto dnorm = NormVector::create(exec, norm_size); + + gko::kernels::reference::dense::compute_squared_norm2(ref, x.get(), + norm_expected.get()); + gko::kernels::EXEC_NAMESPACE::dense::compute_squared_norm2(exec, dx.get(), + dnorm.get()); + + GKO_ASSERT_MTX_NEAR(dnorm, norm_expected, r::value); +} + + +TEST_F(Dense, ComputesSqrt) +{ + auto mtx = gko::test::generate_random_matrix( + 1, 7, std::uniform_int_distribution(7, 7), + std::uniform_real_distribution>(0, 10), + rand_engine, ref); + auto dmtx = gko::clone(exec, mtx); + + gko::kernels::reference::dense::compute_sqrt(ref, mtx.get()); + gko::kernels::EXEC_NAMESPACE::dense::compute_sqrt(exec, dmtx.get()); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, r::value); +} + + } // namespace diff --git a/test/mpi/CMakeLists.txt b/test/mpi/CMakeLists.txt new file mode 100644 index 00000000000..9066de66970 --- /dev/null +++ b/test/mpi/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(distributed) diff --git a/test/mpi/distributed/CMakeLists.txt b/test/mpi/distributed/CMakeLists.txt new file mode 100644 index 00000000000..c0d932cb776 --- /dev/null +++ b/test/mpi/distributed/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_common_and_reference_mpi_test(vector 3) diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/distributed/vector.cpp new file mode 100644 index 00000000000..3f8abcc8422 --- /dev/null +++ b/test/mpi/distributed/vector.cpp @@ -0,0 +1,915 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include + + +#include "core/test/utils.hpp" +#include "test/utils/executor.hpp" + + +namespace { + + +bool needs_transfers(std::shared_ptr exec) +{ + return exec->get_master() != exec && !gko::mpi::is_gpu_aware(); +} + + +class HostToDeviceLogger : public gko::log::Logger { +public: + void on_copy_started(const gko::Executor* exec_from, + const gko::Executor* exec_to, + const gko::uintptr& loc_from, + const gko::uintptr& loc_to, + const gko::size_type& num_bytes) const override + { + if (exec_from != exec_to) { + transfer_count_++; + } + } + + int get_transfer_count() const { return transfer_count_; } + + static std::unique_ptr create( + std::shared_ptr exec) + { + return std::unique_ptr( + new HostToDeviceLogger(std::move(exec))); + } + +protected: + explicit HostToDeviceLogger(std::shared_ptr exec) + : gko::log::Logger(exec, gko::log::Logger::copy_started_mask) + {} + +private: + mutable int transfer_count_ = 0; +}; + + +template +class VectorCreation : public ::testing::Test { +public: + using value_type = + typename std::tuple_element<0, decltype( + ValueLocalGlobalIndexType())>::type; + using local_index_type = + typename std::tuple_element<1, decltype( + ValueLocalGlobalIndexType())>::type; + using global_index_type = + typename std::tuple_element<2, decltype( + ValueLocalGlobalIndexType())>::type; + using part_type = + gko::distributed::Partition; + using md_type = gko::matrix_data; + using d_md_type = gko::device_matrix_data; + using dist_vec_type = gko::distributed::Vector; + using dense_type = gko::matrix::Dense; + + VectorCreation() + : ref(gko::ReferenceExecutor::create()), + comm(MPI_COMM_WORLD), + part(gko::share(part_type::build_from_contiguous( + this->ref, {ref, {0, 2, 4, 6}}))), + local_size{4, 11}, + size{local_size[1] * comm.size(), 11}, + md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}, + md_localized{{{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}, {{8, 9}, {10, 11}}} + {} + + void SetUp() override + { + ASSERT_EQ(this->comm.size(), 3); + init_executor(gko::ReferenceExecutor::create(), exec); + } + + void TearDown() override + { + if (exec != nullptr) { + ASSERT_NO_THROW(exec->synchronize()); + } + } + + std::shared_ptr ref; + std::shared_ptr exec; + gko::mpi::communicator comm; + std::shared_ptr part; + + gko::dim<2> local_size; + gko::dim<2> size; + + md_type md; + md_type md_localized[3]; + + std::default_random_engine engine; +}; + +TYPED_TEST_SUITE(VectorCreation, gko::test::ValueLocalGlobalIndexTypes); + + +#ifdef GKO_COMPILING_REFERENCE + + +TYPED_TEST(VectorCreation, CanReadGlobalMatrixData) +{ + using value_type = typename TestFixture::value_type; + auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); + auto rank = this->comm.rank(); + I> ref_data[3] = { + {{0, 1}, {2, 3}}, + {{4, 5}, {6, 7}}, + {{8, 9}, {10, 11}}, + }; + + vec->read_distributed(this->md, this->part.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), + gko::dim<2>(2, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local_vector(), ref_data[rank], 0.0); +} + + +TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataSomeEmpty) +{ + using part_type = typename TestFixture::part_type; + auto part = gko::share(part_type::build_from_contiguous( + this->exec, {this->exec, {0, 0, 6, 6}})); + auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); + auto rank = this->comm.rank(); + + vec->read_distributed(this->md, part.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + if (rank == 1) { + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), + gko::dim<2>(6, 2)); + GKO_ASSERT_MTX_NEAR( + vec->get_local_vector(), + l({{0., 1.}, {2., 3.}, {4., 5.}, {6., 7.}, {8., 9.}, {10., 11.}}), + 0.0); + } else { + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), + gko::dim<2>(0, 2)); + } +} + + +TYPED_TEST(VectorCreation, CanReadGlobalDeviceMatrixData) +{ + using index_type = typename TestFixture::global_index_type; + using d_md_type = typename TestFixture::d_md_type; + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + d_md_type md{ + this->exec, gko::dim<2>{6, 2}, + gko::Array{ + this->exec, I{0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5}}, + gko::Array{ + this->exec, I{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}}, + gko::Array{ + this->exec, I{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}}}; + auto part = gko::share(part_type::build_from_contiguous( + this->exec, {this->exec, {0, 2, 4, 6}})); + auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); + auto rank = this->comm.rank(); + I> ref_data[3] = { + {{0, 1}, {2, 3}}, + {{4, 5}, {6, 7}}, + {{8, 9}, {10, 11}}, + }; + + vec->read_distributed(md, part.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), + gko::dim<2>(2, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local_vector(), ref_data[rank], 0.0); +} + + +TYPED_TEST(VectorCreation, CanReadGlobalMatrixDataScattered) +{ + using md_type = typename TestFixture::md_type; + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + md_type md{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}; + auto part = gko::share(part_type::build_from_mapping( + this->exec, {this->exec, {0, 1, 2, 0, 2, 0}}, 3)); + auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); + auto rank = this->comm.rank(); + gko::dim<2> ref_size[3] = {{3, 2}, {1, 2}, {2, 2}}; + I> ref_data[3] = { + {{0, 1}, {6, 7}, {10, 11}}, + {{2, 3}}, + {{4, 5}, {8, 9}}, + }; + + vec->read_distributed(md, part.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), + ref_size[rank]); + GKO_ASSERT_MTX_NEAR(vec->get_local_vector(), ref_data[rank], 0.0); +} + + +TYPED_TEST(VectorCreation, CanReadLocalMatrixData) +{ + using md_type = typename TestFixture::md_type; + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + md_type md[3] = { + {gko::dim<2>{6, 2}, {{0, 0, 0}, {0, 1, 1}, {1, 0, 2}, {1, 1, 3}}}, + {gko::dim<2>{6, 2}, {{2, 0, 4}, {2, 1, 5}, {3, 0, 6}, {3, 1, 7}}}, + {gko::dim<2>{6, 2}, {{4, 0, 8}, {4, 1, 9}, {5, 0, 10}, {5, 1, 11}}}}; + auto part = gko::share(part_type::build_from_contiguous( + this->exec, {this->exec, {0, 2, 4, 6}})); + auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); + auto rank = this->comm.rank(); + I> ref_data[3] = { + {{0, 1}, {2, 3}}, + {{4, 5}, {6, 7}}, + {{8, 9}, {10, 11}}, + }; + + vec->read_distributed(md[rank], part.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), + gko::dim<2>(2, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local_vector(), ref_data[rank], 0.0); +} + + +TYPED_TEST(VectorCreation, CanReadLocalMatrixDataSomeEmpty) +{ + using md_type = typename TestFixture::md_type; + using part_type = typename TestFixture::part_type; + using value_type = typename TestFixture::value_type; + md_type md[3] = {{gko::dim<2>{6, 2}, {}}, + {gko::dim<2>{6, 2}, + // clang-format off + {{0, 0, 0}, {0, 1, 1}, + {1, 0, 2}, {1, 1, 3}, + {2, 0, 4}, {2, 1, 5}, + {3, 0, 6}, {3, 1, 7}, + {4, 0, 8}, {4, 1, 9}, + {5, 0, 10}, {5, 1, 11}}}, + // clang-format on + {gko::dim<2>{6, 2}, {}}}; + auto part = gko::share(part_type::build_from_contiguous( + this->exec, {this->exec, {0, 0, 6, 6}})); + auto vec = TestFixture::dist_vec_type::create(this->exec, this->comm); + auto rank = this->comm.rank(); + + vec->read_distributed(md[rank], part.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_size(), gko::dim<2>(6, 2)); + if (rank == 1) { + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), + gko::dim<2>(6, 2)); + GKO_ASSERT_MTX_NEAR( + vec->get_local_vector(), + I>( + {{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}), + 0.0); + } else { + GKO_ASSERT_EQUAL_DIMENSIONS(vec->get_local_vector()->get_size(), + gko::dim<2>(0, 2)); + } +} + + +#endif + + +TYPED_TEST(VectorCreation, CanCreateFromLocalVectorAndSize) +{ + using dist_vec_type = typename TestFixture::dist_vec_type; + using dense_type = typename TestFixture::dense_type; + auto local_vec = dense_type::create(this->exec); + local_vec->read(this->md_localized[this->comm.rank()]); + auto clone_local_vec = gko::clone(local_vec); + + auto vec = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{6, 2}, + local_vec.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec, gko::dim<2>(6, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local_vector(), clone_local_vec, 0); +} + + +TYPED_TEST(VectorCreation, CanCreateFromLocalVectorWithoutSize) +{ + using dist_vec_type = typename TestFixture::dist_vec_type; + using dense_type = typename TestFixture::dense_type; + auto local_vec = dense_type::create(this->exec); + local_vec->read(this->md_localized[this->comm.rank()]); + auto clone_local_vec = gko::clone(local_vec); + + auto vec = dist_vec_type::create(this->exec, this->comm, local_vec.get()); + + GKO_ASSERT_EQUAL_DIMENSIONS(vec, gko::dim<2>(6, 2)); + GKO_ASSERT_MTX_NEAR(vec->get_local_vector(), clone_local_vec, 0); +} + + +template +class VectorReductions : public ::testing::Test { +public: + using value_type = ValueType; + using local_index_type = gko::int32; + using global_index_type = gko::int64; + using part_type = + gko::distributed::Partition; + using dist_vec_type = gko::distributed::Vector; + using dense_type = gko::matrix::Dense; + using real_dense_type = typename dense_type::real_type; + + VectorReductions() + : ref(gko::ReferenceExecutor::create()), + exec(), + comm(MPI_COMM_WORLD), + size{53, 11}, + engine(42) + { + init_executor(ref, exec, comm); + + logger = gko::share(HostToDeviceLogger::create(exec)); + exec->add_logger(logger); + + dense_x = dense_type::create(exec); + dense_y = dense_type::create(exec); + x = dist_vec_type::create(exec, comm); + y = dist_vec_type::create(exec, comm); + dense_res = dense_type ::create(exec); + res = dense_type ::create(exec); + dense_real_res = real_dense_type ::create(exec); + real_res = real_dense_type ::create(exec); + + auto num_parts = + static_cast(comm.size()); + auto mapping = + gko::test::generate_random_array( + size[0], + std::uniform_int_distribution< + gko::distributed::comm_index_type>(0, num_parts - 1), + engine, ref); + auto part = part_type::build_from_mapping(ref, mapping, num_parts); + + auto md_x = gko::test::generate_random_matrix_data( + size[0], size[1], + std::uniform_int_distribution(size[1], size[1]), + std::normal_distribution>(), + engine); + dense_x->read(md_x); + auto tmp_x = dist_vec_type::create(ref, comm); + tmp_x->read_distributed(md_x, part.get()); + x = gko::clone(exec, tmp_x); + + auto md_y = gko::test::generate_random_matrix_data( + size[0], size[1], + std::uniform_int_distribution(size[1], size[1]), + std::normal_distribution>(), + engine); + dense_y->read(md_y); + auto tmp_y = dist_vec_type::create(ref, comm); + tmp_y->read_distributed(md_y, part.get()); + y = gko::clone(exec, tmp_y); + } + + void SetUp() override + { + ASSERT_GT(comm.size(), 0); + init_executor(gko::ReferenceExecutor::create(), exec); + } + + void TearDown() override + { + if (exec != nullptr) { + ASSERT_NO_THROW(exec->synchronize()); + } + } + + void init_result() + { + res = dense_type::create(exec, gko::dim<2>{1, size[1]}); + dense_res = dense_type::create(exec, gko::dim<2>{1, size[1]}); + real_res = real_dense_type::create(exec, gko::dim<2>{1, size[1]}); + dense_real_res = real_dense_type::create(exec, gko::dim<2>{1, size[1]}); + res->fill(0.0); + dense_res->fill(0.0); + real_res->fill(0.0); + dense_real_res->fill(0.0); + } + + std::shared_ptr ref; + std::shared_ptr exec; + + gko::mpi::communicator comm; + + gko::dim<2> size; + + std::unique_ptr dense_x; + std::unique_ptr dense_y; + std::unique_ptr x; + std::unique_ptr y; + std::unique_ptr dense_res; + std::unique_ptr res; + std::unique_ptr dense_real_res; + std::unique_ptr real_res; + + std::shared_ptr logger; + + std::default_random_engine engine; +}; + +TYPED_TEST_SUITE(VectorReductions, gko::test::ValueTypes); + + +TYPED_TEST(VectorReductions, ComputesDotProductIsSameAsDense) +{ + using value_type = typename TestFixture::value_type; + this->init_result(); + + this->x->compute_dot(this->y.get(), this->res.get()); + this->dense_x->compute_dot(this->dense_y.get(), this->dense_res.get()); + + GKO_ASSERT_MTX_NEAR(this->res, this->dense_res, r::value); +} + + +TYPED_TEST(VectorReductions, ComputesConjDotProductIsSameAsDense) +{ + using value_type = typename TestFixture::value_type; + this->init_result(); + + this->x->compute_conj_dot(this->y.get(), this->res.get()); + this->dense_x->compute_conj_dot(this->dense_y.get(), this->dense_res.get()); + + GKO_ASSERT_MTX_NEAR(this->res, this->dense_res, r::value); +} + + +TYPED_TEST(VectorReductions, ComputesNorm2IsSameAsDense) +{ + using value_type = typename TestFixture::value_type; + this->init_result(); + + this->x->compute_norm2(this->real_res.get()); + this->dense_x->compute_norm2(this->dense_real_res.get()); + + GKO_ASSERT_MTX_NEAR(this->real_res, this->dense_real_res, + r::value); +} + + +TYPED_TEST(VectorReductions, ComputesNorm1IsSameAsDense) +{ + using value_type = typename TestFixture::value_type; + this->init_result(); + + this->x->compute_norm1(this->real_res.get()); + this->dense_x->compute_norm1(this->dense_real_res.get()); + + GKO_ASSERT_MTX_NEAR(this->real_res, this->dense_real_res, + r::value); +} + + +TYPED_TEST(VectorReductions, ComputeDotCopiesToHostOnlyIfNecessary) +{ + this->init_result(); + auto transfer_count_before = this->logger->get_transfer_count(); + + this->x->compute_dot(this->y.get(), this->res.get()); + + ASSERT_EQ(this->logger->get_transfer_count() > transfer_count_before, + needs_transfers(this->exec)); +} + + +TYPED_TEST(VectorReductions, ComputeConjDotCopiesToHostOnlyIfNecessary) +{ + this->init_result(); + auto transfer_count_before = this->logger->get_transfer_count(); + + this->x->compute_conj_dot(this->y.get(), this->res.get()); + + ASSERT_EQ(this->logger->get_transfer_count() > transfer_count_before, + needs_transfers(this->exec)); +} + + +TYPED_TEST(VectorReductions, ComputeNorm2CopiesToHostOnlyIfNecessary) +{ + this->init_result(); + auto transfer_count_before = this->logger->get_transfer_count(); + + this->x->compute_norm2(this->real_res.get()); + + ASSERT_EQ(this->logger->get_transfer_count() > transfer_count_before, + needs_transfers(this->exec)); +} + + +TYPED_TEST(VectorReductions, ComputeNorm1CopiesToHostOnlyIfNecessary) +{ + this->init_result(); + auto transfer_count_before = this->logger->get_transfer_count(); + + this->x->compute_norm1(this->real_res.get()); + + ASSERT_EQ(this->logger->get_transfer_count() > transfer_count_before, + needs_transfers(this->exec)); +} + + +template +class VectorLocalOps : public ::testing::Test { +public: + using value_type = ValueType; + using local_index_type = gko::int32; + using global_index_type = gko::int64; + using dist_vec_type = gko::distributed::Vector; + using complex_dist_vec_type = typename dist_vec_type::complex_type; + using real_dist_vec_type = typename dist_vec_type ::real_type; + using dense_type = gko::matrix::Dense; + using complex_dense_type = typename dense_type::complex_type; + using real_dense_type = typename dense_type ::real_type; + + VectorLocalOps() + : ref(gko::ReferenceExecutor::create()), + exec(), + comm(MPI_COMM_WORLD), + local_size{4, 11}, + size{local_size[0] * comm.size(), 11}, + engine(42) + { + init_executor(ref, exec, comm); + + x = dist_vec_type::create(exec, comm); + y = dist_vec_type::create(exec, comm); + alpha = dense_type ::create(exec); + local_complex = complex_dense_type ::create(exec); + complex = complex_dist_vec_type::create(exec, comm); + } + + void SetUp() override + { + ASSERT_GT(comm.size(), 0); + init_executor(gko::ReferenceExecutor::create(), exec); + } + + void TearDown() override + { + if (exec != nullptr) { + ASSERT_NO_THROW(exec->synchronize()); + } + } + + template + void generate_vector_pair(std::unique_ptr& local, + std::unique_ptr& dist) + { + using vtype = typename LocalVectorType::value_type; + local = gko::test::generate_random_matrix( + local_size[0], local_size[1], + std::uniform_int_distribution(local_size[1], + local_size[1]), + std::normal_distribution>(), engine, + exec); + dist = + DistVectorType::create(exec, comm, size, gko::clone(local).get()); + } + + void init_vectors() + { + generate_vector_pair(local_x, x); + generate_vector_pair(local_y, y); + + alpha = gko::test::generate_random_matrix( + 1, size[1], + std::uniform_int_distribution(size[1], size[1]), + std::normal_distribution>(), engine, + exec); + } + + void init_complex_vectors() + { + generate_vector_pair(local_real, real); + generate_vector_pair(local_complex, complex); + } + + std::shared_ptr ref; + std::shared_ptr exec; + + gko::mpi::communicator comm; + + gko::dim<2> local_size; + gko::dim<2> size; + + std::unique_ptr local_x; + std::unique_ptr local_y; + std::unique_ptr local_complex; + std::unique_ptr local_real; + std::unique_ptr x; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr complex; + std::unique_ptr real; + + std::default_random_engine engine; +}; + +TYPED_TEST_SUITE(VectorLocalOps, gko::test::ValueTypes); + + +TYPED_TEST(VectorLocalOps, ApplyNotSupported) +{ + using dist_vec_type = typename TestFixture::dist_vec_type; + auto a = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{2, 2}); + auto b = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{2, 2}); + auto c = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{2, 2}); + + ASSERT_THROW(a->apply(b.get(), c.get()), gko::NotSupported); +} + + +TYPED_TEST(VectorLocalOps, AdvancedApplyNotSupported) +{ + using dist_vec_type = typename TestFixture::dist_vec_type; + auto a = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{2, 2}); + auto b = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{1, 1}, + gko::dim<2>{1, 1}); + auto c = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{2, 2}); + auto d = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{1, 1}, + gko::dim<2>{1, 1}); + auto e = dist_vec_type::create(this->exec, this->comm, gko::dim<2>{2, 2}, + gko::dim<2>{2, 2}); + + ASSERT_THROW(a->apply(b.get(), c.get(), d.get(), e.get()), + gko::NotSupported); +} + + +TYPED_TEST(VectorLocalOps, ConvertsToPrecision) +{ + using T = typename TestFixture::value_type; + using OtherT = typename gko::next_precision; + using OtherVector = typename gko::distributed::Vector; + auto local_tmp = OtherVector::local_vector_type::create(this->exec); + auto tmp = OtherVector::create(this->exec, this->comm); + this->init_vectors(); + + this->local_x->convert_to(local_tmp.get()); + this->x->convert_to(tmp.get()); + + GKO_ASSERT_MTX_NEAR(tmp->get_local_vector(), local_tmp, 0.0); +} + + +TYPED_TEST(VectorLocalOps, MovesToPrecision) +{ + using T = typename TestFixture::value_type; + using OtherT = typename gko::next_precision; + using OtherVector = typename gko::distributed::Vector; + auto local_tmp = OtherVector::local_vector_type::create(this->exec); + auto tmp = OtherVector::create(this->exec, this->comm); + this->init_vectors(); + + this->local_x->move_to(local_tmp.get()); + this->x->move_to(tmp.get()); + + GKO_ASSERT_MTX_NEAR(tmp->get_local_vector(), local_tmp, 0.0); +} + + +TYPED_TEST(VectorLocalOps, ComputeAbsoluteSameAsLocal) +{ + using value_type = typename TestFixture::value_type; + this->init_vectors(); + + auto local_abs = this->local_x->compute_absolute(); + auto abs = this->x->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs->get_local_vector(), local_abs, + r::value); +} + + +TYPED_TEST(VectorLocalOps, ComputeAbsoluteInplaceSameAsLocal) +{ + using value_type = typename TestFixture::value_type; + this->init_vectors(); + + this->local_x->compute_absolute_inplace(); + this->x->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(this->x->get_local_vector(), this->local_x, + r::value); +} + + +TYPED_TEST(VectorLocalOps, MakeComplexSameAsLocal) +{ + this->init_vectors(); + this->init_complex_vectors(); + + this->complex = this->x->make_complex(); + this->local_complex = this->local_x->make_complex(); + + GKO_ASSERT_MTX_NEAR(this->complex->get_local_vector(), this->local_complex, + 0.0); +} + + +TYPED_TEST(VectorLocalOps, MakeComplexInplaceSameAsLocal) +{ + this->init_vectors(); + this->init_complex_vectors(); + + this->x->make_complex(this->complex.get()); + this->local_x->make_complex(this->local_complex.get()); + + GKO_ASSERT_MTX_NEAR(this->complex->get_local_vector(), this->local_complex, + 0.0); +} + + +TYPED_TEST(VectorLocalOps, GetRealSameAsLocal) +{ + this->init_vectors(); + this->init_complex_vectors(); + + this->real = this->complex->get_real(); + this->local_real = this->local_complex->get_real(); + + GKO_ASSERT_MTX_NEAR(this->real->get_local_vector(), this->local_real, 0.0); +} + + +TYPED_TEST(VectorLocalOps, GetRealInplaceSameAsLocal) +{ + this->init_vectors(); + this->init_complex_vectors(); + + this->complex->get_real(this->real.get()); + this->local_complex->get_real(this->local_real.get()); + + GKO_ASSERT_MTX_NEAR(this->real->get_local_vector(), this->local_real, 0.0); +} + + +TYPED_TEST(VectorLocalOps, GetImagSameAsLocal) +{ + this->init_complex_vectors(); + + this->real = this->complex->get_imag(); + this->local_real = this->local_complex->get_imag(); + + GKO_ASSERT_MTX_NEAR(this->real->get_local_vector(), this->local_real, 0.0); +} + + +TYPED_TEST(VectorLocalOps, GetImagInplaceSameAsLocal) +{ + this->init_complex_vectors(); + + this->complex->get_imag(this->real.get()); + this->local_complex->get_imag(this->local_real.get()); + + GKO_ASSERT_MTX_NEAR(this->real->get_local_vector(), this->local_real, 0.0); +} + + +TYPED_TEST(VectorLocalOps, FillSameAsLocal) +{ + using value_type = typename TestFixture::value_type; + auto value = gko::test::detail::get_rand_value( + std::normal_distribution>(), + this->engine); + this->init_vectors(); + + this->x->fill(value); + this->local_x->fill(value); + + GKO_ASSERT_MTX_NEAR(this->x->get_local_vector(), this->local_x, 0.0); +} + + +TYPED_TEST(VectorLocalOps, ScaleSameAsLocal) +{ + using value_type = typename TestFixture::value_type; + this->init_vectors(); + + this->x->scale(this->alpha.get()); + this->local_x->scale(this->alpha.get()); + + GKO_ASSERT_MTX_NEAR(this->x->get_local_vector(), this->local_x, + r::value); +} + + +TYPED_TEST(VectorLocalOps, InvScaleSameAsLocal) +{ + using value_type = typename TestFixture::value_type; + this->init_vectors(); + + this->x->inv_scale(this->alpha.get()); + this->local_x->inv_scale(this->alpha.get()); + + GKO_ASSERT_MTX_NEAR(this->x->get_local_vector(), this->local_x, + r::value); +} + + +TYPED_TEST(VectorLocalOps, AddScaleSameAsLocal) +{ + using value_type = typename TestFixture::value_type; + this->init_vectors(); + + this->x->add_scaled(this->alpha.get(), this->y.get()); + this->local_x->add_scaled(this->alpha.get(), this->local_y.get()); + + GKO_ASSERT_MTX_NEAR(this->x->get_local_vector(), this->local_x, + r::value); +} + + +TYPED_TEST(VectorLocalOps, SubScaleSameAsLocal) +{ + using value_type = typename TestFixture::value_type; + this->init_vectors(); + + this->x->sub_scaled(this->alpha.get(), this->y.get()); + this->local_x->sub_scaled(this->alpha.get(), this->local_y.get()); + + GKO_ASSERT_MTX_NEAR(this->x->get_local_vector(), this->local_x, + r::value); +} + + +TYPED_TEST(VectorLocalOps, CreateRealViewSameAsLocal) +{ + this->init_vectors(); + + auto rv = this->x->create_real_view(); + auto local_rv = this->local_x->create_real_view(); + + GKO_ASSERT_EQUAL_ROWS(rv, this->x); + GKO_ASSERT_EQUAL_ROWS(rv->get_local_vector(), local_rv); + GKO_ASSERT_EQUAL_COLS(rv->get_local_vector(), local_rv); + EXPECT_EQ(rv->get_local_vector()->get_stride(), local_rv->get_stride()); + GKO_ASSERT_MTX_NEAR(rv->get_local_vector(), local_rv, 0.0); +} + + +} // namespace diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp index a21ce962f4d..9e28237f625 100644 --- a/test/utils/executor.hpp +++ b/test/utils/executor.hpp @@ -33,9 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef GKO_TEST_UTILS_EXECUTOR_HPP_ #define GKO_TEST_UTILS_EXECUTOR_HPP_ + #include +#if GINKGO_BUILD_MPI +#include +#endif + + #include @@ -85,4 +91,66 @@ void init_executor(std::shared_ptr ref, } +#if GINKGO_BUILD_MPI + + +void init_executor(std::shared_ptr ref, + std::shared_ptr& exec, + gko::mpi::communicator comm) +{ + exec = gko::ReferenceExecutor::create(); +} + + +void init_executor(std::shared_ptr ref, + std::shared_ptr& exec, + gko::mpi::communicator comm) +{ + exec = gko::OmpExecutor::create(); +} + + +void init_executor(std::shared_ptr ref, + std::shared_ptr& exec, + gko::mpi::communicator comm) +{ + ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); + auto device_id = + comm.node_local_rank() % gko::CudaExecutor::get_num_devices(); + exec = gko::CudaExecutor::create(device_id, ref); +} + + +void init_executor(std::shared_ptr ref, + std::shared_ptr& exec, + gko::mpi::communicator comm) +{ + ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); + auto device_id = + comm.node_local_rank() % gko::HipExecutor::get_num_devices(); + exec = gko::HipExecutor::create(device_id, ref); +} + + +void init_executor(std::shared_ptr ref, + std::shared_ptr& exec, + gko::mpi::communicator comm) +{ + if (gko::DpcppExecutor::get_num_devices("gpu") > 0) { + auto device_id = + comm.node_local_rank() % gko::DpcppExecutor::get_num_devices("gpu"); + exec = gko::DpcppExecutor::create(device_id, ref); + } else if (gko::DpcppExecutor::get_num_devices("cpu") > 0) { + auto device_id = + comm.node_local_rank() % gko::DpcppExecutor::get_num_devices("cpu"); + exec = gko::DpcppExecutor::create(device_id, ref); + } else { + FAIL() << "No suitable DPC++ devices"; + } +} + + +#endif + + #endif // GKO_TEST_UTILS_EXECUTOR_HPP_