diff --git a/.github/workflows/windows-build.yml b/.github/workflows/windows-build.yml index ccd0d978eeb..3eae206f6a9 100644 --- a/.github/workflows/windows-build.yml +++ b/.github/workflows/windows-build.yml @@ -51,7 +51,7 @@ jobs: $env:PATH="$env:PATH;$pwd\build\windows_shared_library" mkdir build cd build - cmake -DCMAKE_CXX_FLAGS=/bigobj -DCMAKE_CXX_FLAGS_DEBUG="/MDd /Zi /Ob1 /Od /RTC1" -DGINKGO_BUILD_CUDA=OFF -DGINKGO_BUILD_OMP=OFF .. + cmake -DCMAKE_CXX_FLAGS=/bigobj -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DCMAKE_CXX_FLAGS_DEBUG="/MDd /Zi /Ob1 /Od /RTC1" -DGINKGO_BUILD_CUDA=OFF -DGINKGO_BUILD_OMP=OFF .. cmake --build . -j4 --config ${{ matrix.config.build_type }} ctest . -C ${{ matrix.config.build_type }} --output-on-failure - name: install diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0e49972117f..242de328838 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -288,6 +288,26 @@ build/cuda92/intel/cuda/release/static: - cuda - gpu +# Build CUDA NVIDIA without omp +build/cuda92/intel/cuda_wo_omp/release/shared: + <<: *default_build_with_test + image: localhost:5000/gko-cuda92-gnu7-llvm50-intel2017 + variables: + <<: *default_variables + C_COMPILER: "icc" + CXX_COMPILER: "icpc" + BUILD_CUDA: "ON" + BUILD_HWLOC: "OFF" + BUILD_TYPE: "Release" + CUDA_ARCH: 35 + only: + variables: + - $RUN_CI_TAG + tags: + - private_ci + - cuda + - gpu + # cuda 10.0 and friends # Make sure that our jobs run when using self-installed # third-party HWLOC. @@ -597,6 +617,24 @@ build/amd/clang/hip/release/static: - amd - gpu +# Build HIP AMD without omp +build/amd/clang/hip_wo_omp/release/shared: + <<: *default_build_with_test + image: localhost:5000/gko-amd-gnu8-llvm7 + variables: + <<: *default_variables + C_COMPILER: "clang" + CXX_COMPILER: "clang++" + BUILD_HIP: "ON" + BUILD_TYPE: "Release" + only: + variables: + - $RUN_CI_TAG + tags: + - private_ci + - amd + - gpu + # no cuda but latest gcc and clang build/nocuda/gcc/core/debug/static: <<: *default_build_with_test diff --git a/CMakeLists.txt b/CMakeLists.txt index 7e2fbeadd10..bedfe94378f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,7 +73,11 @@ set(GINKGO_HIP_AMDGPU "" CACHE STRING "The amdgpu_target(s) variable passed to hipcc. The default is none (auto).") option(GINKGO_JACOBI_FULL_OPTIMIZATIONS "Use all the optimizations for the CUDA Jacobi algorithm" OFF) option(BUILD_SHARED_LIBS "Build shared (.so, .dylib, .dll) libraries" ON) -option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is ON. If a system HWLOC is not found, then we try to build it ourselves. Switch this OFF to disable HWLOC." ON) +if(MSVC OR WIN32 OR CYGWIN OR APPLE) + option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is OFF. Ginkgo does not support HWLOC on Windows/MacOS" OFF) +else() + option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is ON. If a system HWLOC is not found, then we try to build it ourselves. Switch this OFF to disable HWLOC." ON) +endif() option(GINKGO_INSTALL_RPATH "Set the RPATH when installing its libraries." ON) option(GINKGO_INSTALL_RPATH_ORIGIN "Add $ORIGIN (Linux) or @loader_path (MacOS) to the installation RPATH." ON) option(GINKGO_INSTALL_RPATH_DEPENDENCIES "Add dependencies to the installation RPATH." OFF) @@ -169,6 +173,11 @@ if(PAPI_sde_FOUND) set(GINKGO_HAVE_PAPI_SDE 1) endif() +# Switch off HWLOC for Windows and MacOS +if(GINKGO_BUILD_HWLOC AND (MSVC OR WIN32 OR CYGWIN OR APPLE)) + set(GINKGO_BUILD_HWLOC OFF CACHE BOOL "Build Ginkgo with HWLOC. Default is OFF. Ginkgo does not support HWLOC on Windows/MacOS" FORCE) + message(WARNING "Ginkgo does not support HWLOC on Windows/MacOS, switch GINKGO_BUILD_HWLOC to OFF") +endif() if(GINKGO_BUILD_HWLOC) # By default always use external HWLOC set(GINKGO_USE_EXTERNAL_HWLOC 1) @@ -178,10 +187,6 @@ else() set(GINKGO_HAVE_HWLOC 0) message(STATUS "HWLOC is being forcibly switched off") endif() -# Switch off HWLOC for Windows and MacOS -if(MSVC OR WIN32 OR CYGWIN OR APPLE) - set(GINKGO_HAVE_HWLOC 0) -endif() # We keep using NVCC/HCC for consistency with previous releases even if AMD # updated everything to use NVIDIA/AMD in ROCM 4.1 @@ -211,6 +216,21 @@ if(GINKGO_BUILD_HIP) endif() +if(MSVC) + # This is modified from + # https://gitlab.kitware.com/cmake/community/wikis/FAQ#dynamic-replace + include(cmake/windows_helpers.cmake) + if(BUILD_SHARED_LIBS) + ginkgo_switch_to_windows_dynamic("CXX") + ginkgo_switch_to_windows_dynamic("C") + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE) + else() + ginkgo_switch_to_windows_static("CXX") + ginkgo_switch_to_windows_static("C") + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS FALSE) + endif() +endif() + # Try to find the third party packages before using our subdirectories include(cmake/package_helpers.cmake) ginkgo_find_package(GTest "GTest::GTest;GTest::Main" FALSE 1.8.1) @@ -222,26 +242,12 @@ if(GINKGO_HAVE_HWLOC) set(GINKGO_USE_EXTERNAL_HWLOC 0) endif() endif() +# third_party needs to be after flag modification. add_subdirectory(third_party) # Third-party tools and libraries # Load CMake helpers include(cmake/build_helpers.cmake) include(cmake/install_helpers.cmake) -include(cmake/windows_helpers.cmake) - -# This is modified from -# https://gitlab.kitware.com/cmake/community/wikis/FAQ#dynamic-replace -if(MSVC) - if(BUILD_SHARED_LIBS) - ginkgo_switch_to_windows_dynamic("CXX") - ginkgo_switch_to_windows_dynamic("C") - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE) - else() - ginkgo_switch_to_windows_static("CXX") - ginkgo_switch_to_windows_static("C") - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS FALSE) - endif() -endif() configure_file(${Ginkgo_SOURCE_DIR}/include/ginkgo/config.hpp.in ${Ginkgo_BINARY_DIR}/include/ginkgo/config.hpp @ONLY) diff --git a/INSTALL.md b/INSTALL.md index a477b2f6b9a..d85bcc48329 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -44,7 +44,8 @@ Ginkgo adds the following additional switches to control what is being built: * `-DGINKGO_HIP_AMDGPU="gpuarch1;gpuarch2"` the amdgpu_target(s) variable passed to hipcc for the `hcc` HIP backend. The default is none (auto). * `-DGINKGO_BUILD_HWLOC={ON, OFF}` builds Ginkgo with HWLOC. If system HWLOC - is not found, Ginkgo will try to build it. Default is `ON`. + is not found, Ginkgo will try to build it. Default is `ON` on Linux. Ginkgo + does not support HWLOC on Windows/MacOS, so the default is `OFF` on Windows/MacOS. * `-DGINKGO_BUILD_DOC={ON, OFF}` creates an HTML version of Ginkgo's documentation from inline comments in the code. The default is `OFF`. * `-DGINKGO_DOC_GENERATE_EXAMPLES={ON, OFF}` generates the documentation of examples @@ -99,7 +100,7 @@ Ginkgo adds the following additional switches to control what is being built: list of architectures. Supported values are: * `Auto` - * `Kepler`, `Maxwell`, `Pascal`, `Volta`, `Ampere` + * `Kepler`, `Maxwell`, `Pascal`, `Volta`, `Turing`, `Ampere` * `CODE`, `CODE(COMPUTE)`, `(COMPUTE)` `Auto` will automatically detect the present CUDA-enabled GPU architectures @@ -137,7 +138,7 @@ Depending on the configuration settings, some manual work might be required: * Build Ginkgo as shared library: Add `PROJECT_BINARY_DIR/GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH` into the environment variable `PATH`. `GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH` is `windows_shared_library` by default. More Details are available in the [Installation page](./INSTALL.md). - * cmd: `set PATH=";%PATH%"` + * cmd: `set PATH=;%PATH%` * powershell: `$env:PATH=";$env:PATH"` CMake will give the following error message if the path is not correct. @@ -147,16 +148,16 @@ Depending on the configuration settings, some manual work might be required: where `` is the needed ``. * Build Ginkgo with Debug mode: Some Debug build specific issues can appear depending on the machine and environment. The known issues are the following: - 1. `bigobj` issue: encountering `too many sections` needs the compilation flags `\bigobj` or `-Wa,-mbig-obj` + 1. `bigobj` issue: encountering `too many sections` needs the compilation flags `/bigobj` or `-Wa,-mbig-obj` 2. `ld` issue: encountering `ld: error: export ordinal too large` needs the compilation flag `-O1` The following are the details for different environments: * _Microsoft Visual Studio_: 1. `bigobj` issue - * `cmake -DCMAKE_CXX_FLAGS=\bigobj ` which might overwrite the default settings. - * add `\bigobj` into the environment variable `CXXFLAGS` (only available in the first cmake configuration) - * cmd: `set CXXFLAGS=\bigobj` - * powershell: `$env:CXXFLAGS=\bigobj` + * `cmake -DCMAKE_CXX_FLAGS=/bigobj ` which might overwrite the default settings. + * add `/bigobj` into the environment variable `CXXFLAGS` (only available in the first cmake configuration) + * cmd: `set CXXFLAGS=/bigobj` + * powershell: `$env:CXXFLAGS=/bigobj` 2. `ld` issue (_Microsoft Visual Studio_ does not have this issue) * _Cygwin_: 1. `bigobj` issue @@ -175,7 +176,10 @@ Depending on the configuration settings, some manual work might be required: 2. `ld` issue (If building Ginkgo as static library, this is not needed) * `cmake -DGINKGO_COMPILER_FLAGS="-Wpedantic -O1" ` (`GINKGO_COMPILER_FLAGS` is `-Wpedantic` by default) * add `-O1` in the environement variable `CXX_FLAGS` or `CMAKE_CXX_FLAGS` -* Build Ginkgo in _MinGW_: +* Possible issue when switching static/shared of Ginkgo with MSVC in the same build directory:\ + If an issue occurs from mixing MD/MT runtime library when enabling `GINKGO_BUILD_BENCHMARKS`, it means the third-party flags are not updated correctly. + To update the third party flags, turn off `GINKGO_SKIP_DEPENDENCY_UPDATE` (`-DGINKGO_SKIP_DEPENDENCY_UPDATE=OFF`). +* Build Ginkgo in _MinGW_:\ If encountering the issue `cc1plus.exe: out of memory allocating 65536 bytes`, please follow the workaround in [reference](https://www.intel.com/content/www/us/en/programmable/support/support-resources/knowledge-base/embedded/2016/cc1plus-exe--out-of-memory-allocating-65536-bytes.html), or trying to compile ginkgo again might work. diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index 7f41fe87f3d..0cbaa82b3b9 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -61,9 +61,9 @@ std::shared_ptr CudaExecutor::create( return std::shared_ptr( new CudaExecutor(device_id, std::move(master), device_reset), [device_id](CudaExecutor *exec) { + auto device_reset = exec->get_device_reset(); delete exec; - if (!CudaExecutor::get_num_execs(device_id) && - exec->get_device_reset()) { + if (!CudaExecutor::get_num_execs(device_id) && device_reset) { cuda::device_guard g(device_id); cudaDeviceReset(); } @@ -76,9 +76,9 @@ void CudaExecutor::populate_exec_info(const MachineTopology *mach_topo) if (this->get_device_id() < this->get_num_devices() && this->get_device_id() >= 0) { cuda::device_guard g(this->get_device_id()); - GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetPCIBusId( - const_cast(this->get_exec_info().pci_bus_id.data()), 13, - this->get_device_id())); + GKO_ASSERT_NO_CUDA_ERRORS( + cudaDeviceGetPCIBusId(&(this->get_exec_info().pci_bus_id.front()), + 13, this->get_device_id())); auto cuda_hwloc_obj = mach_topo->get_pci_device(this->get_exec_info().pci_bus_id); @@ -230,7 +230,7 @@ void CudaExecutor::set_gpu_property() GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute( &max_threads_per_block, cudaDevAttrMaxThreadsPerBlock, this->get_device_id())); - std::vector max_threads_per_block_dim{3, 0}; + std::vector max_threads_per_block_dim(3, 0); GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute( &max_threads_per_block_dim[0], cudaDevAttrMaxBlockDimX, this->get_device_id())); diff --git a/cuda/test/base/array.cu b/cuda/test/base/array.cu index f35ac061972..4d12cff3988 100644 --- a/cuda/test/base/array.cu +++ b/cuda/test/base/array.cu @@ -39,7 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/test/utils.hpp" +#include "cuda/test/utils.hpp" template diff --git a/cuda/test/factorization/par_ic_kernels.cpp b/cuda/test/factorization/par_ic_kernels.cpp index ac454eb6b80..e957efbadf4 100644 --- a/cuda/test/factorization/par_ic_kernels.cpp +++ b/cuda/test/factorization/par_ic_kernels.cpp @@ -71,7 +71,7 @@ class ParIc : public ::testing::Test { : mtx_size(624, 624), rand_engine(43456), ref(gko::ReferenceExecutor::create()), - cuda(gko::CudaExecutor::create(0, gko::OmpExecutor::create())) + cuda(gko::CudaExecutor::create(0, gko::ReferenceExecutor::create())) { mtx_l = gko::test::generate_random_lower_triangular_matrix( mtx_size[0], mtx_size[0], false, diff --git a/cuda/test/matrix/diagonal_kernels.cpp b/cuda/test/matrix/diagonal_kernels.cpp index d0016b0a079..d3c6f8c5973 100644 --- a/cuda/test/matrix/diagonal_kernels.cpp +++ b/cuda/test/matrix/diagonal_kernels.cpp @@ -125,10 +125,10 @@ class Diagonal : public ::testing::Test { diag = gen_diag(mtx_size[0]); ddiag = Diag::create(cuda); ddiag->copy_from(diag.get()); - dense1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[0]); - dense2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[1]); - denseexpected1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[0]); - denseexpected2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[1]); + dense1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[1]); + dense2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[0]); + denseexpected1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[1]); + denseexpected2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[0]); ddense1 = Dense::create(cuda); ddense1->copy_from(dense1.get()); ddense2 = Dense::create(cuda); diff --git a/cuda/test/preconditioner/isai_kernels.cpp b/cuda/test/preconditioner/isai_kernels.cpp index f099e3f503b..9385c0f109d 100644 --- a/cuda/test/preconditioner/isai_kernels.cpp +++ b/cuda/test/preconditioner/isai_kernels.cpp @@ -244,7 +244,7 @@ TEST_F(Isai, CudaIsaiGenerateSpdinverseShortIsEquivalentToRef) true); GKO_ASSERT_MTX_EQ_SPARSITY(inverse, d_inverse); - GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 10 * r::value); + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 15 * r::value); GKO_ASSERT_ARRAY_EQ(a1, da1); GKO_ASSERT_ARRAY_EQ(a2, da2); ASSERT_EQ(a1.get_const_data()[num_rows], 0); diff --git a/cuda/test/reorder/rcm_kernels.cpp b/cuda/test/reorder/rcm_kernels.cpp index 0a066918b95..afb3336134b 100644 --- a/cuda/test/reorder/rcm_kernels.cpp +++ b/cuda/test/reorder/rcm_kernels.cpp @@ -52,7 +52,8 @@ class Rcm : public ::testing::Test { Rcm() - : exec(gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true)), + : exec(gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(), + true)), // clang-format off p_mtx(gko::initialize({{1.0, 2.0, 0.0, -1.3, 2.1}, {2.0, 5.0, 1.5, 0.0, 0.0}, diff --git a/cuda/test/solver/cb_gmres_kernels.cpp b/cuda/test/solver/cb_gmres_kernels.cpp index cb4b96f65c9..ce28556ef24 100644 --- a/cuda/test/solver/cb_gmres_kernels.cpp +++ b/cuda/test/solver/cb_gmres_kernels.cpp @@ -49,7 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/solver/cb_gmres_accessor.hpp" #include "core/solver/cb_gmres_kernels.hpp" -#include "core/test/utils.hpp" +#include "cuda/test/utils.hpp" namespace { diff --git a/cuda/test/utils.hpp b/cuda/test/utils.hpp index 4b6e0e0a667..7667c7beb9f 100644 --- a/cuda/test/utils.hpp +++ b/cuda/test/utils.hpp @@ -43,9 +43,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { +// Visual Studio does not define the constructor of std::mutex as constexpr, +// causing it to not be initialized when creating this executor (which uses +// the mutex) +#if !defined(_MSC_VER) // prevent device reset after each test auto no_reset_exec = gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(), true); +#endif } // namespace diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index 706270aaa47..49013544d98 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -61,9 +61,9 @@ std::shared_ptr HipExecutor::create( return std::shared_ptr( new HipExecutor(device_id, std::move(master), device_reset), [device_id](HipExecutor *exec) { + auto device_reset = exec->get_device_reset(); delete exec; - if (!HipExecutor::get_num_execs(device_id) && - exec->get_device_reset()) { + if (!HipExecutor::get_num_execs(device_id) && device_reset) { hip::device_guard g(device_id); hipDeviceReset(); } @@ -76,9 +76,9 @@ void HipExecutor::populate_exec_info(const MachineTopology *mach_topo) if (this->get_device_id() < this->get_num_devices() && this->get_device_id() >= 0) { hip::device_guard g(this->get_device_id()); - GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetPCIBusId( - const_cast(this->get_exec_info().pci_bus_id.data()), 13, - this->get_device_id())); + GKO_ASSERT_NO_HIP_ERRORS( + hipDeviceGetPCIBusId(&(this->get_exec_info().pci_bus_id.front()), + 13, this->get_device_id())); auto hip_hwloc_obj = mach_topo->get_pci_device(this->get_exec_info().pci_bus_id); @@ -232,7 +232,7 @@ void HipExecutor::set_gpu_property() this->get_device_id())); this->get_exec_info().max_workitem_sizes.push_back( max_threads_per_block); - std::vector max_threads_per_block_dim{3, 0}; + std::vector max_threads_per_block_dim(3, 0); GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute( &max_threads_per_block_dim[0], hipDeviceAttributeMaxBlockDimX, this->get_device_id())); diff --git a/hip/test/factorization/par_ic_kernels.hip.cpp b/hip/test/factorization/par_ic_kernels.hip.cpp index 230ade961e5..e58b4da93e4 100644 --- a/hip/test/factorization/par_ic_kernels.hip.cpp +++ b/hip/test/factorization/par_ic_kernels.hip.cpp @@ -71,7 +71,7 @@ class ParIc : public ::testing::Test { : mtx_size(585, 585), rand_engine(10667), ref(gko::ReferenceExecutor::create()), - hip(gko::HipExecutor::create(0, gko::OmpExecutor::create())) + hip(gko::HipExecutor::create(0, gko::ReferenceExecutor::create())) { mtx_l = gko::test::generate_random_lower_triangular_matrix( mtx_size[0], mtx_size[0], false, diff --git a/hip/test/matrix/diagonal_kernels.hip.cpp b/hip/test/matrix/diagonal_kernels.hip.cpp index 2c97e7e4b73..606e39db7e6 100644 --- a/hip/test/matrix/diagonal_kernels.hip.cpp +++ b/hip/test/matrix/diagonal_kernels.hip.cpp @@ -125,10 +125,10 @@ class Diagonal : public ::testing::Test { diag = gen_diag(mtx_size[0]); ddiag = Diag::create(hip); ddiag->copy_from(diag.get()); - dense1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[0]); - dense2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[1]); - denseexpected1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[0]); - denseexpected2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[1]); + dense1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[1]); + dense2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[0]); + denseexpected1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[1]); + denseexpected2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[0]); ddense1 = Dense::create(hip); ddense1->copy_from(dense1.get()); ddense2 = Dense::create(hip); diff --git a/hip/test/preconditioner/isai_kernels.hip.cpp b/hip/test/preconditioner/isai_kernels.hip.cpp index 18470c3d77c..bfcb226bbc1 100644 --- a/hip/test/preconditioner/isai_kernels.hip.cpp +++ b/hip/test/preconditioner/isai_kernels.hip.cpp @@ -225,7 +225,7 @@ TEST_F(Isai, HipIsaiGenerateSpdinverseShortIsEquivalentToRef) true); GKO_ASSERT_MTX_EQ_SPARSITY(inverse, d_inverse); - GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 10 * r::value); + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 15 * r::value); GKO_ASSERT_ARRAY_EQ(a1, da1); GKO_ASSERT_ARRAY_EQ(a2, da2); ASSERT_EQ(a1.get_const_data()[num_rows], 0); diff --git a/hip/test/solver/cb_gmres_kernels.cpp b/hip/test/solver/cb_gmres_kernels.cpp index 19572f73c26..b5114129935 100644 --- a/hip/test/solver/cb_gmres_kernels.cpp +++ b/hip/test/solver/cb_gmres_kernels.cpp @@ -49,7 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/solver/cb_gmres_accessor.hpp" #include "core/solver/cb_gmres_kernels.hpp" -#include "core/test/utils.hpp" +#include "hip/test/utils.hip.hpp" namespace { diff --git a/hip/test/utils.hip.hpp b/hip/test/utils.hip.hpp index dda2c068524..03d4f2ba6c7 100644 --- a/hip/test/utils.hip.hpp +++ b/hip/test/utils.hip.hpp @@ -43,9 +43,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { +// Visual Studio does not define the constructor of std::mutex as constexpr, +// causing it to not be initialized when creating this executor (which uses +// the mutex) +#if !defined(_MSC_VER) // prevent device reset after each test auto no_reset_exec = gko::HipExecutor::create(0, gko::ReferenceExecutor::create(), true); +#endif } // namespace