From 906aee1923e31729cf903a90e63e730acf6873bc Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Thu, 19 Sep 2024 12:20:20 +0300 Subject: [PATCH 01/21] Fixes for HIP detection for recent ROCm and CMake --- CMakeLists.txt | 4 ++-- cmake/public/detect_features.cmake | 24 +++++++++++-------- cmake/public/gridtools_setup_targets.cmake | 20 ++++++++-------- .../getting_started/code/CMakeLists.txt | 2 +- include/gridtools/common/cuda_is_ptr.hpp | 5 ---- include/gridtools/common/cuda_runtime.hpp | 1 + include/gridtools/common/cuda_util.hpp | 4 +++- include/gridtools/common/timer/timer_cuda.hpp | 2 +- 8 files changed, 32 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a33f191b1..9889cedb42 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,8 +11,8 @@ # the environment from which GridTools was installed is irrelevant for the installation. # Therefore, we use the same macro to setup CMake targets in the main CMakeLists.txt (this file) and in the # GridToolsConfig.cmake of an installation (see cmake/public/gridtools_setup_target.cmake) -cmake_minimum_required(VERSION 3.18.1) -cmake_policy(VERSION 3.18.1) +cmake_minimum_required(VERSION 3.21.0) +cmake_policy(VERSION 3.21.0) file(STRINGS "version.txt" __GT_VERSION) project(GridTools VERSION ${__GT_VERSION} LANGUAGES CXX) diff --git a/cmake/public/detect_features.cmake b/cmake/public/detect_features.cmake index 86ce60de79..dcd94a4fc7 100644 --- a/cmake/public/detect_features.cmake +++ b/cmake/public/detect_features.cmake @@ -27,6 +27,16 @@ function(try_nvcc_cuda gt_result) set(${gt_result} NOTFOUND PARENT_SCOPE) endfunction() +function(try_hip gt_result) + include(CheckLanguage) + check_language(HIP) + if(CMAKE_HIP_COMPILER) + set(${gt_result} HIPCC-AMDGPU PARENT_SCOPE) + return() + endif() + set(${gt_result} NOTFOUND PARENT_SCOPE) +endfunction() + # detect_cuda_type() # Parameters: # - cuda_type: result variable is set to one of HIPCC-AMDGPU/NVCC-CUDA/Clang-CUDA/NOTFOUND @@ -35,16 +45,10 @@ endfunction() # - Clang-CUDA: Try Clang-CUDA or fail. # - NVCC-CUDA: Try NVCC-CUDA or fail. function(detect_cuda_type cuda_type clang_mode) - get_filename_component(cxx_name ${CMAKE_CXX_COMPILER} NAME) - if(cxx_name STREQUAL "hipcc") - include(try_compile_hip) - try_compile_hip(GT_HIP_WORKS) #TODO use cache variable to avoid compiling each cmake run - if(GT_HIP_WORKS) - set(${cuda_type} HIPCC-AMDGPU PARENT_SCOPE) - return() - else() - message(FATAL_ERROR "${cxx_name} wasn't able to compile a simple HIP program.") - endif() + try_hip(gt_result) + if(gt_result) + set(${cuda_type} HIPCC-AMDGPU PARENT_SCOPE) + return() endif() if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") diff --git a/cmake/public/gridtools_setup_targets.cmake b/cmake/public/gridtools_setup_targets.cmake index 553f86f7cb..f0c6dc0d7c 100644 --- a/cmake/public/gridtools_setup_targets.cmake +++ b/cmake/public/gridtools_setup_targets.cmake @@ -161,10 +161,15 @@ macro(_gt_setup_targets _config_mode clang_cuda_mode) set(_gt_namespace ${GT_NAMESPACE}) set(_gt_imported "IMPORTED") else() - if((GT_CUDA_TYPE STREQUAL NVCC-CUDA) AND (CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)) + if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) # Do not enable the language if we are included from a super-project. - # It is up to the super-project to enable CUDA. - enable_language(CUDA) + # It is up to the super-project to enable CUDA/HIP. + if(GT_CUDA_TYPE STREQUAL NVCC-CUDA) + enable_language(CUDA) + endif() + if(GT_CUDA_TYPE STREQUAL HIPCC-AMDGPU) + enable_language(HIP) + endif() endif() endif() @@ -212,15 +217,10 @@ macro(_gt_setup_targets _config_mode clang_cuda_mode) set(_gt_setup_root_dir ${CUDAToolkit_BIN_DIR}/..) target_compile_options(_gridtools_cuda INTERFACE $<$:-xcuda --cuda-path=${_gt_setup_root_dir}>) target_link_libraries(_gridtools_cuda INTERFACE CUDA::cudart) - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11.0.0) - # Workaround for problem seen with Clang 10.0.1 in CUDA mode (havogt): - # The default std in Clang 10 is c++14, however in CUDA mode the compiler falls back to pre-c++11. - # Hypothesis: CMake tries to be smart and only puts `-std=c++14` if needed, but isn't aware of the CUDA problem... - # TODO check if fixed in Clang 11 - target_compile_options(_gridtools_cuda INTERFACE $<$:-std=c++14>) - endif() elseif(type STREQUAL HIPCC-AMDGPU) + find_package(hip REQUIRED) target_compile_options(_gridtools_cuda INTERFACE $<$:-xhip>) + target_link_libraries(_gridtools_cuda INTERFACE hip::host) endif() endfunction() diff --git a/docs_src/manuals/getting_started/code/CMakeLists.txt b/docs_src/manuals/getting_started/code/CMakeLists.txt index 051ac03207..3fcfa85d13 100644 --- a/docs_src/manuals/getting_started/code/CMakeLists.txt +++ b/docs_src/manuals/getting_started/code/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.18.1) +cmake_minimum_required(VERSION 3.21.0) project(GridTools-laplacian LANGUAGES CXX) diff --git a/include/gridtools/common/cuda_is_ptr.hpp b/include/gridtools/common/cuda_is_ptr.hpp index 118b94a431..32e4aa870f 100644 --- a/include/gridtools/common/cuda_is_ptr.hpp +++ b/include/gridtools/common/cuda_is_ptr.hpp @@ -27,12 +27,7 @@ namespace gridtools { cudaPointerAttributes ptrAttributes; cudaError_t error = cudaPointerGetAttributes(&ptrAttributes, ptr); if (error == cudaSuccess) - -#if defined(CUDART_VERSION) && CUDART_VERSION < 10000 or defined(__HIPCC__) - return ptrAttributes.memoryType == cudaMemoryTypeDevice; // deprecated in CUDA 10 -#else return ptrAttributes.type == cudaMemoryTypeDevice || ptrAttributes.type == cudaMemoryTypeManaged; -#endif if (error != cudaErrorInvalidValue) GT_CUDA_CHECK(error); diff --git a/include/gridtools/common/cuda_runtime.hpp b/include/gridtools/common/cuda_runtime.hpp index 63888d0113..2f52a924e1 100644 --- a/include/gridtools/common/cuda_runtime.hpp +++ b/include/gridtools/common/cuda_runtime.hpp @@ -41,6 +41,7 @@ #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost #define cudaMemcpyHostToDevice hipMemcpyHostToDevice #define cudaMemoryTypeDevice hipMemoryTypeDevice +#define cudaMemoryTypeManaged hipMemoryTypeManaged #define cudaPointerAttributes hipPointerAttribute_t #define cudaPointerGetAttributes hipPointerGetAttributes #define cudaSetDevice hipSetDevice diff --git a/include/gridtools/common/cuda_util.hpp b/include/gridtools/common/cuda_util.hpp index 374492f041..f46884d9a8 100644 --- a/include/gridtools/common/cuda_util.hpp +++ b/include/gridtools/common/cuda_util.hpp @@ -37,7 +37,9 @@ namespace gridtools { struct cuda_free { template void operator()(T *ptr) const { - cudaFree(const_cast *>(ptr)); + cudaError_t err = cudaFree(const_cast *>(ptr)); + if (err != cudaSuccess) + on_error(err, "cudaFree", "", "", 0); } }; diff --git a/include/gridtools/common/timer/timer_cuda.hpp b/include/gridtools/common/timer/timer_cuda.hpp index aabecf619f..9f844f829a 100644 --- a/include/gridtools/common/timer/timer_cuda.hpp +++ b/include/gridtools/common/timer/timer_cuda.hpp @@ -21,7 +21,7 @@ namespace gridtools { class timer_cuda { struct destroy_event { using pointer = cudaEvent_t; - void operator()(cudaEvent_t event) const { cudaEventDestroy(event); } + void operator()(cudaEvent_t event) const { GT_CUDA_CHECK(cudaEventDestroy(event)); } }; using event_holder = std::unique_ptr; From b6d1ad4e941c7e85b5e69e99c275d40e3a806fbf Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Mon, 23 Sep 2024 14:05:17 +0200 Subject: [PATCH 02/21] backward compatible hipPointerAttribute_t --- include/gridtools/common/cuda_is_ptr.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/gridtools/common/cuda_is_ptr.hpp b/include/gridtools/common/cuda_is_ptr.hpp index 32e4aa870f..d1e99566ae 100644 --- a/include/gridtools/common/cuda_is_ptr.hpp +++ b/include/gridtools/common/cuda_is_ptr.hpp @@ -27,7 +27,11 @@ namespace gridtools { cudaPointerAttributes ptrAttributes; cudaError_t error = cudaPointerGetAttributes(&ptrAttributes, ptr); if (error == cudaSuccess) +#if defined(HIP_VERSION) and HIP_VERSION < 60000000 + return ptrAttributes.memoryType == cudaMemoryTypeDevice; +#else return ptrAttributes.type == cudaMemoryTypeDevice || ptrAttributes.type == cudaMemoryTypeManaged; +#endif if (error != cudaErrorInvalidValue) GT_CUDA_CHECK(error); From 1bdd36bc493665584b48eaef3b717d788480b94a Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Tue, 24 Sep 2024 11:18:22 +0200 Subject: [PATCH 03/21] prefer CUDA over HIP if both work --- CMakeLists.txt | 9 +-- cmake/public/detect_features.cmake | 92 ++++++++++++++++-------------- 2 files changed, 54 insertions(+), 47 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9889cedb42..23e8a27801 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,12 +29,13 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/public" "${CMAKE_ set(CMAKE_EXPORT_NO_PACKAGE_REGISTRY ON CACHE BOOL "") mark_as_advanced(CMAKE_EXPORT_NO_PACKAGE_REGISTRY) -# User setting GT_CLANG_CUDA_MODE: decide if Clang-CUDA or NVCC +# User setting GT_CLANG_CUDA_MODE: decide if Clang-CUDA, NVCC or HIP +# TODO(havogt): rename variable to GT_CLANG_MODE in backwards compatible way if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set(GT_CLANG_CUDA_MODE "AUTO" CACHE STRING - "AUTO, Clang-CUDA or NVCC-CUDA; \ - AUTO = Use NVCC if language CUDA is enabled, else prefer Clang-CUDA.") - set_property(CACHE GT_CLANG_CUDA_MODE PROPERTY STRINGS "AUTO;Clang-CUDA;NVCC-CUDA") + "AUTO, Clang-CUDA, NVCC-CUDA or HIP; \ + AUTO = Use NVCC if language CUDA is enabled, else prefer Clang-CUDA, else prefer HIP.") + set_property(CACHE GT_CLANG_CUDA_MODE PROPERTY STRINGS "AUTO;Clang-CUDA;NVCC-CUDA;HIP") endif() # User setting GT_INSTALL_EXAMPLES: diff --git a/cmake/public/detect_features.cmake b/cmake/public/detect_features.cmake index dcd94a4fc7..3fcc57fbe7 100644 --- a/cmake/public/detect_features.cmake +++ b/cmake/public/detect_features.cmake @@ -40,64 +40,70 @@ endfunction() # detect_cuda_type() # Parameters: # - cuda_type: result variable is set to one of HIPCC-AMDGPU/NVCC-CUDA/Clang-CUDA/NOTFOUND -# - clang_mode: AUTO, Clang-CUDA, NVCC-CUDA -# - AUTO: Prefer NVCC-CUDA if the CUDA language is enabled, else try Clang-CUDA +# - mode: AUTO, HIP, Clang-CUDA, NVCC-CUDA +# - AUTO: Prefer NVCC-CUDA if the CUDA language is enabled, prefer HIP if the HIP langauge is enabled, else try Clang-CUDA, else try HIP. +# - HIP: Try HIP or fail. # - Clang-CUDA: Try Clang-CUDA or fail. # - NVCC-CUDA: Try NVCC-CUDA or fail. -function(detect_cuda_type cuda_type clang_mode) - try_hip(gt_result) - if(gt_result) - set(${cuda_type} HIPCC-AMDGPU PARENT_SCOPE) - return() - endif() - - if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") +function(detect_cuda_type cuda_type mode) + string(TOLOWER "${mode}" _lower_case_mode) + if(_lower_case_mode STREQUAL "clang-cuda") + try_clang_cuda(gt_result) + if(gt_result) + set(${cuda_type} ${gt_result} PARENT_SCOPE) + return() + else() + message(FATAL_ERROR "Clang-CUDA mode was selected, but doesn't work.") + endif() + elseif(_lower_case_mode STREQUAL "nvcc-cuda") try_nvcc_cuda(gt_result) - set(${cuda_type} ${gt_result} PARENT_SCOPE) - return() - else() # Clang - string(TOLOWER "${clang_mode}" _lower_case_clang_cuda) - if(_lower_case_clang_cuda STREQUAL "clang-cuda") + if(gt_result) + set(${cuda_type} ${gt_result} PARENT_SCOPE) + return() + else() + message(FATAL_ERROR "NVCC-CUDA mode was selected, but doesn't work.") + endif() + elseif(_lower_case_mode STREQUAL "hip") + try_hip(gt_result) + if(gt_result) + set(${cuda_type} ${gt_result} PARENT_SCOPE) + return() + else() + message(FATAL_ERROR "HIP mode was selected, but doesn't work.") + endif() + elseif(_lower_case_mode STREQUAL "auto") # AUTO + get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) + if("CUDA" IN_LIST languages) # CUDA language is already enabled, prefer it + set(${cuda_type} NVCC-CUDA PARENT_SCOPE) + return() + elseif("HIP" IN_LIST languages) # HIP language is already enabled, prefer it + set(${cuda_type} HIPCC-AMDGPU PARENT_SCOPE) + return() + else() + # Prefer Clang-CUDA try_clang_cuda(gt_result) if(gt_result) set(${cuda_type} ${gt_result} PARENT_SCOPE) return() - else() - message(FATAL_ERROR "Clang-CUDA mode was selected, but doesn't work.") endif() - elseif(_lower_case_clang_cuda STREQUAL "nvcc-cuda") + + # Clang-CUDA doesn't work, try NVCC try_nvcc_cuda(gt_result) if(gt_result) set(${cuda_type} ${gt_result} PARENT_SCOPE) return() - else() - message(FATAL_ERROR "NVCC-CUDA mode was selected, but doesn't work.") endif() - elseif(_lower_case_clang_cuda STREQUAL "auto") # AUTO - get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) - if("CUDA" IN_LIST languages) # CUDA language is already enabled, prefer it - set(${cuda_type} NVCC-CUDA PARENT_SCOPE) - return() - else() - # Prefer Clang-CUDA - try_clang_cuda(gt_result) - if(gt_result) - set(${cuda_type} ${gt_result} PARENT_SCOPE) - return() - endif() - # Clang-CUDA doesn't work, try NVCC - try_nvcc_cuda(gt_result) - if(gt_result) - set(${cuda_type} ${gt_result} PARENT_SCOPE) - return() - endif() - - set(${cuda_type} NOTFOUND PARENT_SCOPE) + # No CUDA variant works, try HIP + try_hip(gt_result) + if(gt_result) + set(${cuda_type} ${gt_result} PARENT_SCOPE) + return() endif() - else() - message(FATAL_ERROR "Clang CUDA mode set to invalid value ${clang_mode}") + + set(${cuda_type} NOTFOUND PARENT_SCOPE) endif() + else() + message(FATAL_ERROR "CUDA/HIP mode set to invalid value ${mode}") endif() - set(${cuda_type} NOTFOUND PARENT_SCOPE) endfunction() From cfe0a890ffe597c531725e950b6d4a92dedb9aaf Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Tue, 24 Sep 2024 11:26:36 +0200 Subject: [PATCH 04/21] test hip detection in gh --- .github/workflows/cmake-configure.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/cmake-configure.yml b/.github/workflows/cmake-configure.yml index 0544ec250e..9cdf937089 100644 --- a/.github/workflows/cmake-configure.yml +++ b/.github/workflows/cmake-configure.yml @@ -102,3 +102,21 @@ jobs: && cmake .. > out.log && cat out.log && grep "GPU mode: NVCC-CUDA" out.log > /dev/null + + hip: + runs-on: ubuntu-latest + container: ghcr.io/gridtools/gridtools-base:base-hip + strategy: + matrix: + cuda-mode: [AUTO, HIP] + steps: + - uses: actions/checkout@v2 + - name: CMake version + run: | + cmake --version + - name: CMake configure expect success + run: > + mkdir build && cd build + && cmake .. -DGT_CLANG_CUDA_MODE=${{ matrix.cuda-mode }} > out.log + && cat out.log + && grep "GPU mode: HIPCC-AMD" out.log > /dev/null From 0e789ab5a66086825cff664886267139ac3341da Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Tue, 24 Sep 2024 11:30:59 +0200 Subject: [PATCH 05/21] default to AUTO --- cmake/public/detect_features.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/public/detect_features.cmake b/cmake/public/detect_features.cmake index 3fcc57fbe7..7272abce88 100644 --- a/cmake/public/detect_features.cmake +++ b/cmake/public/detect_features.cmake @@ -46,6 +46,9 @@ endfunction() # - Clang-CUDA: Try Clang-CUDA or fail. # - NVCC-CUDA: Try NVCC-CUDA or fail. function(detect_cuda_type cuda_type mode) + if(NOT DEFINED mode) + set(mode AUTO) + endif() string(TOLOWER "${mode}" _lower_case_mode) if(_lower_case_mode STREQUAL "clang-cuda") try_clang_cuda(gt_result) From 2b262f02f2574d8801b869b48bdeda00f948d590 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Tue, 24 Sep 2024 11:32:49 +0200 Subject: [PATCH 06/21] fix empty --- cmake/public/detect_features.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/public/detect_features.cmake b/cmake/public/detect_features.cmake index 7272abce88..7ec95a0d68 100644 --- a/cmake/public/detect_features.cmake +++ b/cmake/public/detect_features.cmake @@ -46,7 +46,7 @@ endfunction() # - Clang-CUDA: Try Clang-CUDA or fail. # - NVCC-CUDA: Try NVCC-CUDA or fail. function(detect_cuda_type cuda_type mode) - if(NOT DEFINED mode) + if(NOT mode) set(mode AUTO) endif() string(TOLOWER "${mode}" _lower_case_mode) From 4d3da871d81a9ebc12426746e2274e8b5525039a Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Tue, 24 Sep 2024 11:34:33 +0200 Subject: [PATCH 07/21] back to old pattern --- cmake/public/detect_features.cmake | 92 ++++++++++++++++-------------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/cmake/public/detect_features.cmake b/cmake/public/detect_features.cmake index 7ec95a0d68..4d7b057a1d 100644 --- a/cmake/public/detect_features.cmake +++ b/cmake/public/detect_features.cmake @@ -46,67 +46,71 @@ endfunction() # - Clang-CUDA: Try Clang-CUDA or fail. # - NVCC-CUDA: Try NVCC-CUDA or fail. function(detect_cuda_type cuda_type mode) - if(NOT mode) - set(mode AUTO) - endif() - string(TOLOWER "${mode}" _lower_case_mode) - if(_lower_case_mode STREQUAL "clang-cuda") - try_clang_cuda(gt_result) - if(gt_result) - set(${cuda_type} ${gt_result} PARENT_SCOPE) - return() - else() - message(FATAL_ERROR "Clang-CUDA mode was selected, but doesn't work.") - endif() - elseif(_lower_case_mode STREQUAL "nvcc-cuda") + if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") try_nvcc_cuda(gt_result) - if(gt_result) - set(${cuda_type} ${gt_result} PARENT_SCOPE) - return() - else() - message(FATAL_ERROR "NVCC-CUDA mode was selected, but doesn't work.") - endif() - elseif(_lower_case_mode STREQUAL "hip") - try_hip(gt_result) - if(gt_result) - set(${cuda_type} ${gt_result} PARENT_SCOPE) - return() - else() - message(FATAL_ERROR "HIP mode was selected, but doesn't work.") - endif() - elseif(_lower_case_mode STREQUAL "auto") # AUTO - get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) - if("CUDA" IN_LIST languages) # CUDA language is already enabled, prefer it - set(${cuda_type} NVCC-CUDA PARENT_SCOPE) - return() - elseif("HIP" IN_LIST languages) # HIP language is already enabled, prefer it - set(${cuda_type} HIPCC-AMDGPU PARENT_SCOPE) - return() - else() - # Prefer Clang-CUDA + set(${cuda_type} ${gt_result} PARENT_SCOPE) + return() + else() + string(TOLOWER "${mode}" _lower_case_mode) + if(_lower_case_mode STREQUAL "clang-cuda") try_clang_cuda(gt_result) if(gt_result) set(${cuda_type} ${gt_result} PARENT_SCOPE) return() + else() + message(FATAL_ERROR "Clang-CUDA mode was selected, but doesn't work.") endif() - - # Clang-CUDA doesn't work, try NVCC + elseif(_lower_case_mode STREQUAL "nvcc-cuda") try_nvcc_cuda(gt_result) if(gt_result) set(${cuda_type} ${gt_result} PARENT_SCOPE) return() + else() + message(FATAL_ERROR "NVCC-CUDA mode was selected, but doesn't work.") endif() - - # No CUDA variant works, try HIP + elseif(_lower_case_mode STREQUAL "hip") try_hip(gt_result) if(gt_result) set(${cuda_type} ${gt_result} PARENT_SCOPE) return() + else() + message(FATAL_ERROR "HIP mode was selected, but doesn't work.") endif() + elseif(_lower_case_mode STREQUAL "auto") # AUTO + get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) + if("CUDA" IN_LIST languages) # CUDA language is already enabled, prefer it + set(${cuda_type} NVCC-CUDA PARENT_SCOPE) + return() + elseif("HIP" IN_LIST languages) # HIP language is already enabled, prefer it + set(${cuda_type} HIPCC-AMDGPU PARENT_SCOPE) + return() + else() + # Prefer Clang-CUDA + try_clang_cuda(gt_result) + if(gt_result) + set(${cuda_type} ${gt_result} PARENT_SCOPE) + return() + endif() + + # Clang-CUDA doesn't work, try NVCC + try_nvcc_cuda(gt_result) + if(gt_result) + set(${cuda_type} ${gt_result} PARENT_SCOPE) + return() + endif() + + # No CUDA variant works, try HIP + try_hip(gt_result) + if(gt_result) + set(${cuda_type} ${gt_result} PARENT_SCOPE) + return() + endif() - set(${cuda_type} NOTFOUND PARENT_SCOPE) + set(${cuda_type} NOTFOUND PARENT_SCOPE) + endif() + else() + message(FATAL_ERROR "CUDA/HIP mode set to invalid value ${mode}") endif() - else() - message(FATAL_ERROR "CUDA/HIP mode set to invalid value ${mode}") + set(${cuda_type} NOTFOUND PARENT_SCOPE) endif() endfunction() From 0535f5b9d5d0292c79923ae804745b16029281a8 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Tue, 24 Sep 2024 11:35:25 +0200 Subject: [PATCH 08/21] add comment --- cmake/public/detect_features.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/public/detect_features.cmake b/cmake/public/detect_features.cmake index 4d7b057a1d..f870569d95 100644 --- a/cmake/public/detect_features.cmake +++ b/cmake/public/detect_features.cmake @@ -47,10 +47,11 @@ endfunction() # - NVCC-CUDA: Try NVCC-CUDA or fail. function(detect_cuda_type cuda_type mode) if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + # not Clang, therefore the only option is NVCC try_nvcc_cuda(gt_result) set(${cuda_type} ${gt_result} PARENT_SCOPE) return() - else() + else() # Clang string(TOLOWER "${mode}" _lower_case_mode) if(_lower_case_mode STREQUAL "clang-cuda") try_clang_cuda(gt_result) From 8877508d4517cd8f5bf9c534cd08e87e2428254e Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Tue, 24 Sep 2024 11:36:22 +0200 Subject: [PATCH 09/21] restore not found --- cmake/public/detect_features.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/public/detect_features.cmake b/cmake/public/detect_features.cmake index f870569d95..06ce703616 100644 --- a/cmake/public/detect_features.cmake +++ b/cmake/public/detect_features.cmake @@ -112,6 +112,6 @@ function(detect_cuda_type cuda_type mode) else() message(FATAL_ERROR "CUDA/HIP mode set to invalid value ${mode}") endif() - set(${cuda_type} NOTFOUND PARENT_SCOPE) endif() + set(${cuda_type} NOTFOUND PARENT_SCOPE) endfunction() From 6134fcc0f5abf64c14e763781e3a592ad70ea976 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Tue, 24 Sep 2024 13:50:12 +0200 Subject: [PATCH 10/21] Update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0f73ab8e86..348ccb1e74 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ CUDAHOSTCXX=`which g++` # full path to the C++ compiler to be used as CUDA host ##### Requirements - C++17 compiler (see also list of tested compilers) - Boost headers (1.73 or later) -- CMake (3.18.1 or later) +- CMake (3.21.0 or later) - CUDA Toolkit (11.0 or later, optional) - MPI (optional, CUDA-aware MPI for the GPU communication module `gcl_gpu`) @@ -56,8 +56,8 @@ The GridTools libraries are currently nightly tested with the following compiler ##### Known issues -- Some tests are failing with ROCm3.8.0 (Clang 11). - CUDA 11.0.x has a severe issue, see https://github.com/GridTools/gridtools/issues/1522. Under certain conditions, GridTools code will not compile for this version of CUDA. CUDA 11.1.x and later should not be affected by this issue. +- CUDA 12.1, 12.2, 12.3, 12.4 have various issues related to `constexpr`, see https://github.com/GridTools/gridtools/issues/1766. We recommend CUDA 12.5 or later. - Cray Clang version 11.0.0 has a problem with the `gridtools::tuple` conversion constructor, see https://github.com/GridTools/gridtools/issues/1615. ##### Partly supported (expected to work, but not tested regularly) @@ -66,6 +66,7 @@ The GridTools libraries are currently nightly tested with the following compiler | --- | --- | --- | --- | | Intel 19.1.1.217 | all backends | 2021-09-30 | with `cmake . -DCMAKE_CXX_FLAGS=-qnextgen` | | NVHPC 23.3 | all backends | 2023-04-20 | only compilation is tested regularly in CI | +| ROCm 6.0.3 | all backends | 2024-09-24 | tested on AMD MI250X (LUMI) | ### Contributing From 8d508acf5e1769e5bcc9f4a6d0393a4d58d5d06a Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Tue, 24 Sep 2024 15:09:30 +0300 Subject: [PATCH 11/21] fix mpi test --- include/gridtools/common/cuda_runtime.hpp | 2 +- tests/src/CMakeLists.txt | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/gridtools/common/cuda_runtime.hpp b/include/gridtools/common/cuda_runtime.hpp index 2f52a924e1..a43ae8f94b 100644 --- a/include/gridtools/common/cuda_runtime.hpp +++ b/include/gridtools/common/cuda_runtime.hpp @@ -9,7 +9,7 @@ */ #pragma once -#if defined(__HIP__) +#if defined(__HIP__) || defined(GT_HIP_RUNTIME) #include #ifdef NDEBUG #undef assert diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index 3dcd9c82f9..89dbde5569 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -16,5 +16,8 @@ if (TARGET gcl_gpu) target_link_libraries(mpi_gtest_main_gpu PUBLIC gridtools gtest MPI::MPI_CXX) if(TARGET CUDA::cudart) # doesn't exist if HIP mode target_link_libraries(mpi_gtest_main_gpu PUBLIC CUDA::cudart) + else() # HIP + target_link_libraries(mpi_gtest_main_gpu PRIVATE hip::host) + target_compile_definitions(mpi_gtest_main_gpu PRIVATE GT_HIP_RUNTIME) endif() endif() From 3639524b7404a4b0ff0d3a2ca1a6ba6f9c47cdc9 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Tue, 24 Sep 2024 15:09:42 +0300 Subject: [PATCH 12/21] fix clang 17 --- include/gridtools/common/hymap.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/gridtools/common/hymap.hpp b/include/gridtools/common/hymap.hpp index bfa554eb32..6c93c6f46a 100644 --- a/include/gridtools/common/hymap.hpp +++ b/include/gridtools/common/hymap.hpp @@ -198,7 +198,7 @@ namespace gridtools { template struct values; -#if !defined(__NVCC__) && defined(__clang__) && __clang_major__ <= 16 +#if !defined(__NVCC__) && defined(__clang__) && __clang_major__ <= 17 template values(Vs const &...) -> values; #endif From bf5b4a9342208e0e6b612505358ba07d2b4a9352 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Tue, 24 Sep 2024 15:12:29 +0300 Subject: [PATCH 13/21] fix clang 17 --- include/gridtools/sid/composite.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/gridtools/sid/composite.hpp b/include/gridtools/sid/composite.hpp index 6c275befec..f7baed34c3 100644 --- a/include/gridtools/sid/composite.hpp +++ b/include/gridtools/sid/composite.hpp @@ -167,7 +167,7 @@ namespace gridtools { struct compressed; template struct values; -#if !defined(__NVCC__) && defined(__clang__) && __clang_major__ <= 16 +#if !defined(__NVCC__) && defined(__clang__) && __clang_major__ <= 17 template values(Sids const &...) -> values; #endif From f5cd0b81866f3cfb4118331377234170d5528147 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Tue, 24 Sep 2024 15:48:57 +0300 Subject: [PATCH 14/21] storage with hip runtime --- cmake/public/gridtools_setup_targets.cmake | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmake/public/gridtools_setup_targets.cmake b/cmake/public/gridtools_setup_targets.cmake index f0c6dc0d7c..effcfca05d 100644 --- a/cmake/public/gridtools_setup_targets.cmake +++ b/cmake/public/gridtools_setup_targets.cmake @@ -297,7 +297,11 @@ macro(_gt_setup_targets _config_mode clang_cuda_mode) if(CUDAToolkit_FOUND OR GT_CUDA_TYPE STREQUAL HIPCC-AMDGPU) _gt_add_library(${_config_mode} storage_gpu) target_link_libraries(${_gt_namespace}storage_gpu INTERFACE ${_gt_namespace}gridtools) - if(NOT GT_CUDA_TYPE STREQUAL HIPCC-AMDGPU) + if(GT_CUDA_TYPE STREQUAL HIPCC-AMDGPU) + find_package(hip REQUIRED) + target_link_libraries(${_gt_namespace}storage_gpu INTERFACE hip::host) + target_compile_definitions(${_gt_namespace}storage_gpu INTERFACE GT_HIP_RUNTIME) + else() target_link_libraries(${_gt_namespace}storage_gpu INTERFACE CUDA::cudart) endif() list(APPEND GT_STORAGES gpu) From 14234b439715f6197ad51cb6036c6ae32644e0af Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Wed, 25 Sep 2024 09:50:23 +0200 Subject: [PATCH 15/21] Apply suggestions from code review Co-authored-by: Felix Thaler --- CMakeLists.txt | 2 +- cmake/public/detect_features.cmake | 2 +- cmake/public/gridtools_setup_targets.cmake | 3 +-- include/gridtools/common/cuda_util.hpp | 4 +--- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 23e8a27801..f6cc839a9e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,7 @@ mark_as_advanced(CMAKE_EXPORT_NO_PACKAGE_REGISTRY) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set(GT_CLANG_CUDA_MODE "AUTO" CACHE STRING "AUTO, Clang-CUDA, NVCC-CUDA or HIP; \ - AUTO = Use NVCC if language CUDA is enabled, else prefer Clang-CUDA, else prefer HIP.") +AUTO = Use NVCC if language CUDA is enabled, else prefer Clang-CUDA, else prefer HIP.") set_property(CACHE GT_CLANG_CUDA_MODE PROPERTY STRINGS "AUTO;Clang-CUDA;NVCC-CUDA;HIP") endif() diff --git a/cmake/public/detect_features.cmake b/cmake/public/detect_features.cmake index 06ce703616..9f7a1cd613 100644 --- a/cmake/public/detect_features.cmake +++ b/cmake/public/detect_features.cmake @@ -41,7 +41,7 @@ endfunction() # Parameters: # - cuda_type: result variable is set to one of HIPCC-AMDGPU/NVCC-CUDA/Clang-CUDA/NOTFOUND # - mode: AUTO, HIP, Clang-CUDA, NVCC-CUDA -# - AUTO: Prefer NVCC-CUDA if the CUDA language is enabled, prefer HIP if the HIP langauge is enabled, else try Clang-CUDA, else try HIP. +# - AUTO: Prefer NVCC-CUDA if the CUDA language is enabled, prefer HIP if the HIP language is enabled, else try Clang-CUDA, else try HIP. # - HIP: Try HIP or fail. # - Clang-CUDA: Try Clang-CUDA or fail. # - NVCC-CUDA: Try NVCC-CUDA or fail. diff --git a/cmake/public/gridtools_setup_targets.cmake b/cmake/public/gridtools_setup_targets.cmake index effcfca05d..1a8510c0f0 100644 --- a/cmake/public/gridtools_setup_targets.cmake +++ b/cmake/public/gridtools_setup_targets.cmake @@ -166,8 +166,7 @@ macro(_gt_setup_targets _config_mode clang_cuda_mode) # It is up to the super-project to enable CUDA/HIP. if(GT_CUDA_TYPE STREQUAL NVCC-CUDA) enable_language(CUDA) - endif() - if(GT_CUDA_TYPE STREQUAL HIPCC-AMDGPU) + elseif(GT_CUDA_TYPE STREQUAL HIPCC-AMDGPU) enable_language(HIP) endif() endif() diff --git a/include/gridtools/common/cuda_util.hpp b/include/gridtools/common/cuda_util.hpp index f46884d9a8..cbd5f4b0b0 100644 --- a/include/gridtools/common/cuda_util.hpp +++ b/include/gridtools/common/cuda_util.hpp @@ -37,9 +37,7 @@ namespace gridtools { struct cuda_free { template void operator()(T *ptr) const { - cudaError_t err = cudaFree(const_cast *>(ptr)); - if (err != cudaSuccess) - on_error(err, "cudaFree", "", "", 0); + GT_CUDA_CHECK(cudaFree(const_cast *>(ptr))); } }; From 3183581bf7ddc60cf5bf63d1c11f14a9743131bb Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Wed, 25 Sep 2024 10:28:44 +0200 Subject: [PATCH 16/21] Update tests.yml --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2c71e2688e..0d20651319 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: container: ghcr.io/gridtools/gridtools-base:${{ matrix.compiler }} strategy: matrix: - compiler: [gcc-8, gcc-9, gcc-10, gcc-11, gcc-12, gcc-13, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16, clang-14-cuda-11, gcc-10-cuda-11.8, gcc-11-cuda-12.0, gcc-12-cuda-12.3, gcc-12-cuda-12.4, base-hip, gcc-10-hpx, nvhpc-23.3, nvhpc-23.9] + compiler: [gcc-8, gcc-9, gcc-10, gcc-11, gcc-12, gcc-13, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16, clang-14-cuda-11, gcc-10-cuda-11.8, gcc-11-cuda-12.0, gcc-12-cuda-12.3, gcc-12-cuda-12.4, base-hip, base-rocm-6.2, gcc-10-hpx, nvhpc-23.3, nvhpc-23.9] build_type: [debug, release] exclude: - compiler: gcc-8 From aa34de7e7ac69fc4e6a122d6965948774f727e69 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Wed, 25 Sep 2024 10:30:32 +0200 Subject: [PATCH 17/21] Update cmake-configure.yml --- .github/workflows/cmake-configure.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cmake-configure.yml b/.github/workflows/cmake-configure.yml index 9cdf937089..2c7a301937 100644 --- a/.github/workflows/cmake-configure.yml +++ b/.github/workflows/cmake-configure.yml @@ -105,9 +105,10 @@ jobs: hip: runs-on: ubuntu-latest - container: ghcr.io/gridtools/gridtools-base:base-hip + container: ghcr.io/gridtools/gridtools-base:base-${{ matrix.rocm_version }} strategy: matrix: + rocm_version: [hip, rocm-6.2] # "hip" is rocm-5.x cuda-mode: [AUTO, HIP] steps: - uses: actions/checkout@v2 From 63a71c3df66c8876659c20547d1bcc92d8f6b770 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Wed, 25 Sep 2024 11:05:40 +0200 Subject: [PATCH 18/21] Update tests.yml --- .github/workflows/tests.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0d20651319..b1df8c83c8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -31,6 +31,12 @@ jobs: build_type: debug steps: - uses: actions/checkout@v2 + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false - name: setup environment run: | echo "GTRUN_BUILD_COMMAND=make -j $(nproc)" >> $GITHUB_ENV From 4cc71a00eff9b53f6749804cd0b9ee5c3ae2aa4e Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Wed, 25 Sep 2024 11:26:26 +0200 Subject: [PATCH 19/21] cleanup too late... --- .github/workflows/tests.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b1df8c83c8..0d20651319 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -31,12 +31,6 @@ jobs: build_type: debug steps: - uses: actions/checkout@v2 - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - name: setup environment run: | echo "GTRUN_BUILD_COMMAND=make -j $(nproc)" >> $GITHUB_ENV From 577fed44e56935a39753cfae95e5bc798b08d954 Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Wed, 25 Sep 2024 11:26:41 +0200 Subject: [PATCH 20/21] Update tests.yml --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0d20651319..2c71e2688e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: container: ghcr.io/gridtools/gridtools-base:${{ matrix.compiler }} strategy: matrix: - compiler: [gcc-8, gcc-9, gcc-10, gcc-11, gcc-12, gcc-13, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16, clang-14-cuda-11, gcc-10-cuda-11.8, gcc-11-cuda-12.0, gcc-12-cuda-12.3, gcc-12-cuda-12.4, base-hip, base-rocm-6.2, gcc-10-hpx, nvhpc-23.3, nvhpc-23.9] + compiler: [gcc-8, gcc-9, gcc-10, gcc-11, gcc-12, gcc-13, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16, clang-14-cuda-11, gcc-10-cuda-11.8, gcc-11-cuda-12.0, gcc-12-cuda-12.3, gcc-12-cuda-12.4, base-hip, gcc-10-hpx, nvhpc-23.3, nvhpc-23.9] build_type: [debug, release] exclude: - compiler: gcc-8 From ecef47a9420221b8be6b8dd2701713eae47fe41a Mon Sep 17 00:00:00 2001 From: Hannes Vogt Date: Wed, 25 Sep 2024 11:26:58 +0200 Subject: [PATCH 21/21] Update cmake-configure.yml --- .github/workflows/cmake-configure.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-configure.yml b/.github/workflows/cmake-configure.yml index 2c7a301937..8f322defa6 100644 --- a/.github/workflows/cmake-configure.yml +++ b/.github/workflows/cmake-configure.yml @@ -108,7 +108,7 @@ jobs: container: ghcr.io/gridtools/gridtools-base:base-${{ matrix.rocm_version }} strategy: matrix: - rocm_version: [hip, rocm-6.2] # "hip" is rocm-5.x + rocm_version: [hip] # "hip" is rocm-5.x cuda-mode: [AUTO, HIP] steps: - uses: actions/checkout@v2