Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for HIP detection for recent ROCm and CMake #1804

Merged
merged 23 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .github/workflows/cmake-configure.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,22 @@ jobs:
&& cmake .. > out.log
&& cat out.log
&& grep "GPU mode: NVCC-CUDA" out.log > /dev/null

hip:
runs-on: ubuntu-latest
container: ghcr.io/gridtools/gridtools-base:base-${{ matrix.rocm_version }}
strategy:
matrix:
rocm_version: [hip] # "hip" is rocm-5.x
cuda-mode: [AUTO, HIP]
steps:
- uses: actions/checkout@v2
- name: CMake version
run: |
cmake --version
- name: CMake configure expect success
run: >
mkdir build && cd build
&& cmake .. -DGT_CLANG_CUDA_MODE=${{ matrix.cuda-mode }} > out.log
&& cat out.log
&& grep "GPU mode: HIPCC-AMD" out.log > /dev/null
13 changes: 7 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
# the environment from which GridTools was installed is irrelevant for the installation.
# Therefore, we use the same macro to setup CMake targets in the main CMakeLists.txt (this file) and in the
# GridToolsConfig.cmake of an installation (see cmake/public/gridtools_setup_target.cmake)
cmake_minimum_required(VERSION 3.18.1)
cmake_policy(VERSION 3.18.1)
cmake_minimum_required(VERSION 3.21.0)
cmake_policy(VERSION 3.21.0)

file(STRINGS "version.txt" __GT_VERSION)
project(GridTools VERSION ${__GT_VERSION} LANGUAGES CXX)
Expand All @@ -29,12 +29,13 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/public" "${CMAKE_
set(CMAKE_EXPORT_NO_PACKAGE_REGISTRY ON CACHE BOOL "")
mark_as_advanced(CMAKE_EXPORT_NO_PACKAGE_REGISTRY)

# User setting GT_CLANG_CUDA_MODE: decide if Clang-CUDA or NVCC
# User setting GT_CLANG_CUDA_MODE: decide if Clang-CUDA, NVCC or HIP
# TODO(havogt): rename variable to GT_CLANG_MODE in backwards compatible way
fthaler marked this conversation as resolved.
Show resolved Hide resolved
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(GT_CLANG_CUDA_MODE "AUTO" CACHE STRING
"AUTO, Clang-CUDA or NVCC-CUDA; \
AUTO = Use NVCC if language CUDA is enabled, else prefer Clang-CUDA.")
set_property(CACHE GT_CLANG_CUDA_MODE PROPERTY STRINGS "AUTO;Clang-CUDA;NVCC-CUDA")
"AUTO, Clang-CUDA, NVCC-CUDA or HIP; \
AUTO = Use NVCC if language CUDA is enabled, else prefer Clang-CUDA, else prefer HIP.")
set_property(CACHE GT_CLANG_CUDA_MODE PROPERTY STRINGS "AUTO;Clang-CUDA;NVCC-CUDA;HIP")
endif()

# User setting GT_INSTALL_EXAMPLES:
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ CUDAHOSTCXX=`which g++` # full path to the C++ compiler to be used as CUDA host
##### Requirements
- C++17 compiler (see also list of tested compilers)
- Boost headers (1.73 or later)
- CMake (3.18.1 or later)
- CMake (3.21.0 or later)
- CUDA Toolkit (11.0 or later, optional)
- MPI (optional, CUDA-aware MPI for the GPU communication module `gcl_gpu`)

Expand All @@ -56,8 +56,8 @@ The GridTools libraries are currently nightly tested with the following compiler

##### Known issues

- Some tests are failing with ROCm3.8.0 (Clang 11).
- CUDA 11.0.x has a severe issue, see https://github.com/GridTools/gridtools/issues/1522. Under certain conditions, GridTools code will not compile for this version of CUDA. CUDA 11.1.x and later should not be affected by this issue.
- CUDA 12.1, 12.2, 12.3, 12.4 have various issues related to `constexpr`, see https://github.com/GridTools/gridtools/issues/1766. We recommend CUDA 12.5 or later.
- Cray Clang version 11.0.0 has a problem with the `gridtools::tuple` conversion constructor, see https://github.com/GridTools/gridtools/issues/1615.

##### Partly supported (expected to work, but not tested regularly)
Expand All @@ -66,6 +66,7 @@ The GridTools libraries are currently nightly tested with the following compiler
| --- | --- | --- | --- |
| Intel 19.1.1.217 | all backends | 2021-09-30 | with `cmake . -DCMAKE_CXX_FLAGS=-qnextgen` |
| NVHPC 23.3 | all backends | 2023-04-20 | only compilation is tested regularly in CI |
| ROCm 6.0.3 | all backends | 2024-09-24 | tested on AMD MI250X (LUMI) |

### Contributing

Expand Down
58 changes: 38 additions & 20 deletions cmake/public/detect_features.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,53 +27,64 @@ function(try_nvcc_cuda gt_result)
set(${gt_result} NOTFOUND PARENT_SCOPE)
endfunction()

function(try_hip gt_result)
include(CheckLanguage)
check_language(HIP)
if(CMAKE_HIP_COMPILER)
set(${gt_result} HIPCC-AMDGPU PARENT_SCOPE)
return()
endif()
set(${gt_result} NOTFOUND PARENT_SCOPE)
endfunction()

# detect_cuda_type()
# Parameters:
# - cuda_type: result variable is set to one of HIPCC-AMDGPU/NVCC-CUDA/Clang-CUDA/NOTFOUND
# - clang_mode: AUTO, Clang-CUDA, NVCC-CUDA
# - AUTO: Prefer NVCC-CUDA if the CUDA language is enabled, else try Clang-CUDA
# - mode: AUTO, HIP, Clang-CUDA, NVCC-CUDA
# - AUTO: Prefer NVCC-CUDA if the CUDA language is enabled, prefer HIP if the HIP language is enabled, else try Clang-CUDA, else try HIP.
# - HIP: Try HIP or fail.
# - Clang-CUDA: Try Clang-CUDA or fail.
# - NVCC-CUDA: Try NVCC-CUDA or fail.
function(detect_cuda_type cuda_type clang_mode)
get_filename_component(cxx_name ${CMAKE_CXX_COMPILER} NAME)
if(cxx_name STREQUAL "hipcc")
include(try_compile_hip)
try_compile_hip(GT_HIP_WORKS) #TODO use cache variable to avoid compiling each cmake run
if(GT_HIP_WORKS)
set(${cuda_type} HIPCC-AMDGPU PARENT_SCOPE)
return()
else()
message(FATAL_ERROR "${cxx_name} wasn't able to compile a simple HIP program.")
endif()
endif()

function(detect_cuda_type cuda_type mode)
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# not Clang, therefore the only option is NVCC
try_nvcc_cuda(gt_result)
set(${cuda_type} ${gt_result} PARENT_SCOPE)
return()
else() # Clang
string(TOLOWER "${clang_mode}" _lower_case_clang_cuda)
if(_lower_case_clang_cuda STREQUAL "clang-cuda")
string(TOLOWER "${mode}" _lower_case_mode)
if(_lower_case_mode STREQUAL "clang-cuda")
try_clang_cuda(gt_result)
if(gt_result)
set(${cuda_type} ${gt_result} PARENT_SCOPE)
return()
else()
message(FATAL_ERROR "Clang-CUDA mode was selected, but doesn't work.")
endif()
elseif(_lower_case_clang_cuda STREQUAL "nvcc-cuda")
elseif(_lower_case_mode STREQUAL "nvcc-cuda")
try_nvcc_cuda(gt_result)
if(gt_result)
set(${cuda_type} ${gt_result} PARENT_SCOPE)
return()
else()
message(FATAL_ERROR "NVCC-CUDA mode was selected, but doesn't work.")
endif()
elseif(_lower_case_clang_cuda STREQUAL "auto") # AUTO
elseif(_lower_case_mode STREQUAL "hip")
try_hip(gt_result)
if(gt_result)
set(${cuda_type} ${gt_result} PARENT_SCOPE)
return()
else()
message(FATAL_ERROR "HIP mode was selected, but doesn't work.")
endif()
elseif(_lower_case_mode STREQUAL "auto") # AUTO
get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
if("CUDA" IN_LIST languages) # CUDA language is already enabled, prefer it
set(${cuda_type} NVCC-CUDA PARENT_SCOPE)
return()
elseif("HIP" IN_LIST languages) # HIP language is already enabled, prefer it
set(${cuda_type} HIPCC-AMDGPU PARENT_SCOPE)
return()
else()
# Prefer Clang-CUDA
try_clang_cuda(gt_result)
Expand All @@ -89,10 +100,17 @@ function(detect_cuda_type cuda_type clang_mode)
return()
endif()

# No CUDA variant works, try HIP
try_hip(gt_result)
if(gt_result)
set(${cuda_type} ${gt_result} PARENT_SCOPE)
return()
endif()

set(${cuda_type} NOTFOUND PARENT_SCOPE)
endif()
else()
message(FATAL_ERROR "Clang CUDA mode set to invalid value ${clang_mode}")
message(FATAL_ERROR "CUDA/HIP mode set to invalid value ${mode}")
endif()
endif()
set(${cuda_type} NOTFOUND PARENT_SCOPE)
Expand Down
25 changes: 14 additions & 11 deletions cmake/public/gridtools_setup_targets.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,14 @@ macro(_gt_setup_targets _config_mode clang_cuda_mode)
set(_gt_namespace ${GT_NAMESPACE})
set(_gt_imported "IMPORTED")
else()
if((GT_CUDA_TYPE STREQUAL NVCC-CUDA) AND (CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME))
if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
# Do not enable the language if we are included from a super-project.
# It is up to the super-project to enable CUDA.
enable_language(CUDA)
# It is up to the super-project to enable CUDA/HIP.
if(GT_CUDA_TYPE STREQUAL NVCC-CUDA)
enable_language(CUDA)
elseif(GT_CUDA_TYPE STREQUAL HIPCC-AMDGPU)
enable_language(HIP)
endif()
endif()
endif()

Expand Down Expand Up @@ -212,15 +216,10 @@ macro(_gt_setup_targets _config_mode clang_cuda_mode)
set(_gt_setup_root_dir ${CUDAToolkit_BIN_DIR}/..)
target_compile_options(_gridtools_cuda INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-xcuda --cuda-path=${_gt_setup_root_dir}>)
target_link_libraries(_gridtools_cuda INTERFACE CUDA::cudart)
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11.0.0)
# Workaround for problem seen with Clang 10.0.1 in CUDA mode (havogt):
# The default std in Clang 10 is c++14, however in CUDA mode the compiler falls back to pre-c++11.
# Hypothesis: CMake tries to be smart and only puts `-std=c++14` if needed, but isn't aware of the CUDA problem...
# TODO check if fixed in Clang 11
target_compile_options(_gridtools_cuda INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-std=c++14>)
endif()
elseif(type STREQUAL HIPCC-AMDGPU)
find_package(hip REQUIRED)
target_compile_options(_gridtools_cuda INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-xhip>)
target_link_libraries(_gridtools_cuda INTERFACE hip::host)
endif()
endfunction()

Expand Down Expand Up @@ -297,7 +296,11 @@ macro(_gt_setup_targets _config_mode clang_cuda_mode)
if(CUDAToolkit_FOUND OR GT_CUDA_TYPE STREQUAL HIPCC-AMDGPU)
_gt_add_library(${_config_mode} storage_gpu)
target_link_libraries(${_gt_namespace}storage_gpu INTERFACE ${_gt_namespace}gridtools)
if(NOT GT_CUDA_TYPE STREQUAL HIPCC-AMDGPU)
if(GT_CUDA_TYPE STREQUAL HIPCC-AMDGPU)
find_package(hip REQUIRED)
target_link_libraries(${_gt_namespace}storage_gpu INTERFACE hip::host)
target_compile_definitions(${_gt_namespace}storage_gpu INTERFACE GT_HIP_RUNTIME)
else()
target_link_libraries(${_gt_namespace}storage_gpu INTERFACE CUDA::cudart)
endif()
list(APPEND GT_STORAGES gpu)
Expand Down
2 changes: 1 addition & 1 deletion docs_src/manuals/getting_started/code/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.18.1)
cmake_minimum_required(VERSION 3.21.0)

project(GridTools-laplacian LANGUAGES CXX)

Expand Down
5 changes: 2 additions & 3 deletions include/gridtools/common/cuda_is_ptr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ namespace gridtools {
cudaPointerAttributes ptrAttributes;
cudaError_t error = cudaPointerGetAttributes(&ptrAttributes, ptr);
if (error == cudaSuccess)

#if defined(CUDART_VERSION) && CUDART_VERSION < 10000 or defined(__HIPCC__)
return ptrAttributes.memoryType == cudaMemoryTypeDevice; // deprecated in CUDA 10
#if defined(HIP_VERSION) and HIP_VERSION < 60000000
return ptrAttributes.memoryType == cudaMemoryTypeDevice;
#else
return ptrAttributes.type == cudaMemoryTypeDevice || ptrAttributes.type == cudaMemoryTypeManaged;
#endif
Expand Down
3 changes: 2 additions & 1 deletion include/gridtools/common/cuda_runtime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
*/
#pragma once

#if defined(__HIP__)
#if defined(__HIP__) || defined(GT_HIP_RUNTIME)
#include <hip/hip_runtime.h>
#ifdef NDEBUG
#undef assert
Expand Down Expand Up @@ -41,6 +41,7 @@
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
#define cudaMemoryTypeDevice hipMemoryTypeDevice
#define cudaMemoryTypeManaged hipMemoryTypeManaged
#define cudaPointerAttributes hipPointerAttribute_t
#define cudaPointerGetAttributes hipPointerGetAttributes
#define cudaSetDevice hipSetDevice
Expand Down
2 changes: 1 addition & 1 deletion include/gridtools/common/cuda_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ namespace gridtools {
struct cuda_free {
template <class T>
void operator()(T *ptr) const {
cudaFree(const_cast<std::remove_cv_t<T> *>(ptr));
GT_CUDA_CHECK(cudaFree(const_cast<std::remove_cv_t<T> *>(ptr)));
}
};

Expand Down
2 changes: 1 addition & 1 deletion include/gridtools/common/hymap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ namespace gridtools {
template <class...>
struct values;

#if !defined(__NVCC__) && defined(__clang__) && __clang_major__ <= 16
#if !defined(__NVCC__) && defined(__clang__) && __clang_major__ <= 17
template <class... Vs>
values(Vs const &...) -> values<Vs...>;
#endif
Expand Down
2 changes: 1 addition & 1 deletion include/gridtools/common/timer/timer_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace gridtools {
class timer_cuda {
struct destroy_event {
using pointer = cudaEvent_t;
void operator()(cudaEvent_t event) const { cudaEventDestroy(event); }
void operator()(cudaEvent_t event) const { GT_CUDA_CHECK(cudaEventDestroy(event)); }
};

using event_holder = std::unique_ptr<void, destroy_event>;
Expand Down
2 changes: 1 addition & 1 deletion include/gridtools/sid/composite.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ namespace gridtools {
struct compressed;
template <class...>
struct values;
#if !defined(__NVCC__) && defined(__clang__) && __clang_major__ <= 16
#if !defined(__NVCC__) && defined(__clang__) && __clang_major__ <= 17
template <class... Sids>
values(Sids const &...) -> values<Sids...>;
#endif
Expand Down
3 changes: 3 additions & 0 deletions tests/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,8 @@ if (TARGET gcl_gpu)
target_link_libraries(mpi_gtest_main_gpu PUBLIC gridtools gtest MPI::MPI_CXX)
if(TARGET CUDA::cudart) # doesn't exist if HIP mode
target_link_libraries(mpi_gtest_main_gpu PUBLIC CUDA::cudart)
else() # HIP
target_link_libraries(mpi_gtest_main_gpu PRIVATE hip::host)
target_compile_definitions(mpi_gtest_main_gpu PRIVATE GT_HIP_RUNTIME)
endif()
endif()
Loading