Skip to content

Commit

Permalink
Rocm36 (AMReX-Codes#1263)
Browse files Browse the repository at this point in the history
## Summary

Update make system for hip-clang.  Remove some hip workarounds.

## Additional background

## Checklist

The proposed changes:
- [ ] fix a bug or incorrect behavior in AMReX
- [ ] add new capabilities to AMReX
- [ ] changes answers in the test suite to more than roundoff level
- [ ] are likely to significantly affect the results of downstream AMReX users
- [ ] are described in the proposed changes to the AMReX documentation, if appropriate
  • Loading branch information
WeiqunZhang authored and dwillcox committed Oct 3, 2020
1 parent b37d37d commit 5131b11
Show file tree
Hide file tree
Showing 10 changed files with 28 additions and 87 deletions.
10 changes: 5 additions & 5 deletions Src/Base/AMReX_BaseFab.H
Original file line number Diff line number Diff line change
Expand Up @@ -2456,7 +2456,7 @@ BaseFab<T>::maxabs (const Box& subbox, int comp) const noexcept
#endif
{
T r = 0;
amrex::Loop(subbox, [=,&r] (int i, int j, int k) AMREX_NOEXCEPT
amrex::Loop(subbox, [=,&r] (int i, int j, int k) noexcept
{
r = amrex::max(r, amrex::Math::abs(a(i,j,k)));
});
Expand Down Expand Up @@ -3929,7 +3929,7 @@ BaseFab<T>::sum (const Box& bx, DestComp dcomp, NumComps ncomp) const noexcept
} else
#endif
{
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) AMREX_NOEXCEPT
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
{
r += a(i,j,k,n+dcomp.i);
});
Expand Down Expand Up @@ -3969,7 +3969,7 @@ BaseFab<T>::dot (const BaseFab<T>& src, const Box& bx, SrcComp scomp, DestComp d
} else
#endif
{
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) AMREX_NOEXCEPT
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
{
r += d(i,j,k,n+dcomp.i) * s(i,j,k,n+scomp.i);
});
Expand Down Expand Up @@ -4015,7 +4015,7 @@ BaseFab<T>::dot (const Box& bx, DestComp dcomp, NumComps ncomp) const noexcept
} else
#endif
{
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) AMREX_NOEXCEPT
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
{
r += a(i,j,k,n+dcomp.i)*a(i,j,k,n+dcomp.i);
});
Expand Down Expand Up @@ -4059,7 +4059,7 @@ BaseFab<T>::dotmask (const BaseFab<T>& src, const Box& bx, const BaseFab<int>& m
} else
#endif
{
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) AMREX_NOEXCEPT
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
{
int mi = static_cast<int>(static_cast<bool>(m(i,j,k)));
r += d(i,j,k,n+dcomp.i)*s(i,j,k,n+scomp.i)*mi;
Expand Down
4 changes: 2 additions & 2 deletions Src/Base/AMReX_BaseFabUtility.H
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ template <class Tto, class Tfrom>
AMREX_GPU_HOST_DEVICE
void
cast (BaseFab<Tto>& tofab, BaseFab<Tfrom> const& fromfab,
Box const& bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) AMREX_NOEXCEPT
Box const& bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
{
auto const& tdata = tofab.array();
auto const& fdata = fromfab.const_array();
amrex::LoopConcurrent(bx, ncomp.n, [=] (int i, int j, int k, int n) AMREX_NOEXCEPT
amrex::LoopConcurrent(bx, ncomp.n, [=] (int i, int j, int k, int n) noexcept
{
tdata(i,j,k,n+dcomp.i) = static_cast<Tto>(fdata(i,j,k,n+scomp.i));
});
Expand Down
9 changes: 0 additions & 9 deletions Src/Base/AMReX_Extension.H
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,6 @@

#if !defined(BL_LANG_FORT)

// HIP FIX HERE - noexcept

#ifdef AMREX_HIP_PLATFORM_HCC
#define AMREX_NOEXCEPT
#else
#define AMREX_NOEXCEPT noexcept
#endif


// restrict

#ifdef __cplusplus
Expand Down
7 changes: 1 addition & 6 deletions Src/Base/AMReX_GpuAsyncArray.H
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,10 @@
#include <AMReX_TypeTraits.H>
#include <AMReX_GpuDevice.H>

// HIP FIX HERE - MOVE TO ONE SPOT
#ifdef __HIP_PLATFORM_HCC__
#define HIPRT_CB
#endif

#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
extern "C" {
#if defined(AMREX_USE_HIP)
void HIPRT_CB amrex_asyncarray_delete ( hipStream_t stream, hipError_t error, void* p);
void amrex_asyncarray_delete ( hipStream_t stream, hipError_t error, void* p);
#elif defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10)
void CUDART_CB amrex_asyncarray_delete (void* p);
#elif defined(AMREX_USE_CUDA)
Expand Down
2 changes: 1 addition & 1 deletion Src/Base/AMReX_GpuAsyncArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#if !defined(AMREX_USE_DPCPP)
extern "C" {
#if defined(AMREX_USE_HIP)
void HIPRT_CB amrex_asyncarray_delete ( hipStream_t /*stream*/, hipError_t /*error*/, void* p)
void amrex_asyncarray_delete ( hipStream_t /*stream*/, hipError_t /*error*/, void* p)
#elif defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10)
void CUDART_CB amrex_asyncarray_delete (void* p)
#elif defined(AMREX_USE_CUDA)
Expand Down
6 changes: 1 addition & 5 deletions Src/Base/AMReX_GpuElixir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@
#include <memory>
#include <AMReX_GpuDevice.H>

#ifdef __HIP_PLATFORM_HCC__
#define HIPRT_CB
#endif

namespace amrex {
namespace Gpu {

Expand All @@ -19,7 +15,7 @@ namespace {

extern "C" {
#if defined(AMREX_USE_HIP)
void HIPRT_CB amrex_elixir_delete ( hipStream_t /*stream*/, hipError_t /*error*/, void* p)
void amrex_elixir_delete ( hipStream_t /*stream*/, hipError_t /*error*/, void* p)
#elif defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10)
void CUDART_CB amrex_elixir_delete (void* p)
#elif defined(AMREX_USE_CUDA)
Expand Down
11 changes: 0 additions & 11 deletions Src/Base/AMReX_GpuError.H
Original file line number Diff line number Diff line change
Expand Up @@ -89,17 +89,6 @@ namespace Gpu {
+ " " + hipGetErrorString(amrex_i_err)); \
amrex::Abort(errStr); \
}}

#ifdef AMREX_HIP_PLATFORM_NVCC
#define AMREX_CUDA_SAFE_CALL(call) { \
cudaError_t amrex_i_err = call; \
if (cudaSuccess != amrex_i_err) { \
std::string errStr(std::string("CUDA error in file ") + __FILE__ \
+ " line " + std::to_string(__LINE__) \
+ " " + cudaGetErrorString(amrex_i_err)); \
amrex::Abort(errStr); \
}}
#endif
#endif

#define AMREX_GPU_ERROR_CHECK() amrex::Gpu::ErrorCheck(__FILE__, __LINE__)
Expand Down
8 changes: 4 additions & 4 deletions Src/Base/AMReX_GpuLaunch.H
Original file line number Diff line number Diff line change
Expand Up @@ -248,12 +248,12 @@ namespace Gpu {
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) AMREX_NOEXCEPT \
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
block \
); \
} \
else { \
amrex::LoopConcurrentOnCpu(box, nc, [=] (int i, int j, int k, int n) AMREX_NOEXCEPT \
amrex::LoopConcurrentOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
block \
); \
}
Expand Down Expand Up @@ -288,12 +288,12 @@ namespace Gpu {
#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) AMREX_NOEXCEPT \
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
block \
); \
} \
else { \
amrex::LoopOnCpu(box, nc, [=] (int i, int j, int k, int n) AMREX_NOEXCEPT \
amrex::LoopOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
block \
); \
}
Expand Down
20 changes: 5 additions & 15 deletions Tools/GNUMake/Make.defs
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,12 @@ ifeq ($(USE_HIP),TRUE)
COMP := hip
HIP_PLATFORM = $(shell hipconfig --compiler)
ifeq ($(HIP_PLATFORM),nvcc)
$(error HIP_PLATFORM nvcc is not supported at this time. Use USE_CUDA to compile for NVIDIA platforms)
# If nvcc is supported, put this back.
# CUDA_ARCH ?= 60
# CUDA_MAX_THREADS ?= 256
# CUDA_MAXREGCOUNT ?= 255
$(error HIP_PLATFORM nvcc is not supported at this time. Use USE_CUDA to compile for NVIDIA platforms)
else ifeq ($(HIP_PLATFORM),hcc)
# do nothing special for now, but also don't abort
$(error HIP_PLATFORM=hcc is no longer supported)
else ifeq ($(HIP_PLATFORM),clang)
else
$(error only HIP_PLATFORM=hcc is supported)
$(error Unknown HIP_PLATFORM=$(HIP_PLATFORM))
endif
endif

Expand Down Expand Up @@ -672,14 +669,7 @@ ifeq ($(USE_HIP),TRUE)

endif

DEFINES += -DAMREX_USE_HIP

ifeq ($(HIP_PLATFORM),nvcc)
$(error HIP_PLATFORM nvcc is not supported at this time. Use USE_CUDA to compile for NVIDIA platforms.)
# DEFINES += -DAMREX_USE_CUDA -DAMREX_HIP_PLATFORM_NVCC
else ifeq ($(HIP_PLATFORM),hcc)
DEFINES += -DAMREX_HIP_PLATFORM_HCC
endif
DEFINES += -DAMREX_USE_HIP -DAMREX_HIP_PLATFORM=$(HIP_PLATFORM)

GPUSuffix := .HIP

Expand Down
38 changes: 9 additions & 29 deletions Tools/GNUMake/comps/hip.mak
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,22 @@ endif # BL_NO_FORT

# =============================================================================================

# This is designed only for dogora for now.
ifeq ($(HIP_PLATFORM),hcc)
ifeq ($(HIP_PLATFORM),clang)

ifeq ($(DEBUG),TRUE)
# From llvm
CXXFLAGS += -g
CFLAGS += -g
CXXFLAGS += -g -O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -ftrapv
CFLAGS += -g -O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -ftrapv
FFLAGS += -g -O0 -ggdb -fbounds-check -fbacktrace -Wuninitialized -Wunused -ffpe-trap=invalid,zero -finit-real=snan -finit-integer=2147483647 -ftrapv
F90FLAGS += -g -O0 -ggdb -fbounds-check -fbacktrace -Wuninitialized -Wunused -ffpe-trap=invalid,zero -finit-real=snan -finit-integer=2147483647 -ftrapv

else # DEBUG=FALSE flags
CXXFLAGS += -g -O3
CFLAGS += -g -O3
FFLAGS += -g -O3
F90FLAGS += -g -O3
endif

# Generic HIP info
ROC_PATH=/opt/rocm
ROC_PATH=$(realpath $(dir $(HIP_PATH)))
INCLUDE_LOCATIONS += $(HIP_PATH)/include

# rocRand
Expand All @@ -76,33 +77,12 @@ ifeq ($(HIP_PLATFORM),hcc)
INCLUDE_LOCATIONS += $(ROC_PATH)/rocprim/include

# rocThrust - Header only
INCLUDE_LOCATIONS += $(ROC_PATH)/rocthrust/include
# INCLUDE_LOCATIONS += $(ROC_PATH)/rocthrust/include

# =============================================================================================

# This is Summit. Likely broken.
else ifeq ($(HIP_PLATFORM),nvcc)
$(error HIP_PLATFORM nvcc is not supported at this time. Use USE_CUDA to compile for NVIDIA platforms.)
#
# CXXFLAGS_FROM_HOST := -ccbin=$(CXX) --std=c++14
# CFLAGS_FROM_HOST := -ccbin=$(CXX)
# HIPCC_FLAGS = -Wno-deprecated-gpu-targets -m64 -arch=compute_$(CUDA_ARCH) -code=sm_$(CUDA_ARCH) -maxrregcount=$(CUDA_MAXREGCOUNT)
#
# ifeq ($(DEBUG),TRUE)
# HIPCC_FLAGS += -g -G
# else
# HIPCC_FLAGS += -lineinfo --ptxas-options=-O3,-v
# endif
#
# ifneq ($(USE_CUDA_FAST_MATH),FALSE)
# HIPCC_FLAGS += --use_fast_math
# endif
#
# CXXFLAGS = $(CXXFLAGS_FROM_HOST) $(HIPCC_FLAGS) -c -dc
# CFLAGS = $(CFLAGS_FROM_HOST) $(HIPCC_FLAGS) -dc
#
# CXXFLAGS += --expt-relaxed-constexpr --expt-extended-lambda
#
endif

# =============================================================================================

0 comments on commit 5131b11

Please sign in to comment.