Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rocm36 #1263

Merged
merged 2 commits into from
Aug 14, 2020
Merged

Rocm36 #1263

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions Src/Base/AMReX_BaseFab.H
Original file line number Diff line number Diff line change
Expand Up @@ -2456,7 +2456,7 @@ BaseFab<T>::maxabs (const Box& subbox, int comp) const noexcept
#endif
{
T r = 0;
amrex::Loop(subbox, [=,&r] (int i, int j, int k) AMREX_NOEXCEPT
amrex::Loop(subbox, [=,&r] (int i, int j, int k) noexcept
{
r = amrex::max(r, amrex::Math::abs(a(i,j,k)));
});
Expand Down Expand Up @@ -3929,7 +3929,7 @@ BaseFab<T>::sum (const Box& bx, DestComp dcomp, NumComps ncomp) const noexcept
} else
#endif
{
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) AMREX_NOEXCEPT
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
{
r += a(i,j,k,n+dcomp.i);
});
Expand Down Expand Up @@ -3969,7 +3969,7 @@ BaseFab<T>::dot (const BaseFab<T>& src, const Box& bx, SrcComp scomp, DestComp d
} else
#endif
{
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) AMREX_NOEXCEPT
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
{
r += d(i,j,k,n+dcomp.i) * s(i,j,k,n+scomp.i);
});
Expand Down Expand Up @@ -4015,7 +4015,7 @@ BaseFab<T>::dot (const Box& bx, DestComp dcomp, NumComps ncomp) const noexcept
} else
#endif
{
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) AMREX_NOEXCEPT
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
{
r += a(i,j,k,n+dcomp.i)*a(i,j,k,n+dcomp.i);
});
Expand Down Expand Up @@ -4059,7 +4059,7 @@ BaseFab<T>::dotmask (const BaseFab<T>& src, const Box& bx, const BaseFab<int>& m
} else
#endif
{
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) AMREX_NOEXCEPT
amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
{
int mi = static_cast<int>(static_cast<bool>(m(i,j,k)));
r += d(i,j,k,n+dcomp.i)*s(i,j,k,n+scomp.i)*mi;
Expand Down
4 changes: 2 additions & 2 deletions Src/Base/AMReX_BaseFabUtility.H
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ template <class Tto, class Tfrom>
AMREX_GPU_HOST_DEVICE
void
cast (BaseFab<Tto>& tofab, BaseFab<Tfrom> const& fromfab,
Box const& bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) AMREX_NOEXCEPT
Box const& bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
{
auto const& tdata = tofab.array();
auto const& fdata = fromfab.const_array();
amrex::LoopConcurrent(bx, ncomp.n, [=] (int i, int j, int k, int n) AMREX_NOEXCEPT
amrex::LoopConcurrent(bx, ncomp.n, [=] (int i, int j, int k, int n) noexcept
{
tdata(i,j,k,n+dcomp.i) = static_cast<Tto>(fdata(i,j,k,n+scomp.i));
});
Expand Down
9 changes: 0 additions & 9 deletions Src/Base/AMReX_Extension.H
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,6 @@

#if !defined(BL_LANG_FORT)

// HIP FIX HERE - noexcept

#ifdef AMREX_HIP_PLATFORM_HCC
#define AMREX_NOEXCEPT
#else
#define AMREX_NOEXCEPT noexcept
#endif


// restrict

#ifdef __cplusplus
Expand Down
7 changes: 1 addition & 6 deletions Src/Base/AMReX_GpuAsyncArray.H
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,10 @@
#include <AMReX_TypeTraits.H>
#include <AMReX_GpuDevice.H>

// HIP FIX HERE - MOVE TO ONE SPOT
#ifdef __HIP_PLATFORM_HCC__
#define HIPRT_CB
#endif

#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
extern "C" {
#if defined(AMREX_USE_HIP)
void HIPRT_CB amrex_asyncarray_delete ( hipStream_t stream, hipError_t error, void* p);
void amrex_asyncarray_delete ( hipStream_t stream, hipError_t error, void* p);
#elif defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10)
void CUDART_CB amrex_asyncarray_delete (void* p);
#elif defined(AMREX_USE_CUDA)
Expand Down
2 changes: 1 addition & 1 deletion Src/Base/AMReX_GpuAsyncArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#if !defined(AMREX_USE_DPCPP)
extern "C" {
#if defined(AMREX_USE_HIP)
void HIPRT_CB amrex_asyncarray_delete ( hipStream_t /*stream*/, hipError_t /*error*/, void* p)
void amrex_asyncarray_delete ( hipStream_t /*stream*/, hipError_t /*error*/, void* p)
#elif defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10)
void CUDART_CB amrex_asyncarray_delete (void* p)
#elif defined(AMREX_USE_CUDA)
Expand Down
6 changes: 1 addition & 5 deletions Src/Base/AMReX_GpuElixir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@
#include <memory>
#include <AMReX_GpuDevice.H>

#ifdef __HIP_PLATFORM_HCC__
#define HIPRT_CB
#endif

namespace amrex {
namespace Gpu {

Expand All @@ -19,7 +15,7 @@ namespace {

extern "C" {
#if defined(AMREX_USE_HIP)
void HIPRT_CB amrex_elixir_delete ( hipStream_t /*stream*/, hipError_t /*error*/, void* p)
void amrex_elixir_delete ( hipStream_t /*stream*/, hipError_t /*error*/, void* p)
#elif defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10)
void CUDART_CB amrex_elixir_delete (void* p)
#elif defined(AMREX_USE_CUDA)
Expand Down
11 changes: 0 additions & 11 deletions Src/Base/AMReX_GpuError.H
Original file line number Diff line number Diff line change
Expand Up @@ -89,17 +89,6 @@ namespace Gpu {
+ " " + hipGetErrorString(amrex_i_err)); \
amrex::Abort(errStr); \
}}

#ifdef AMREX_HIP_PLATFORM_NVCC
#define AMREX_CUDA_SAFE_CALL(call) { \
cudaError_t amrex_i_err = call; \
if (cudaSuccess != amrex_i_err) { \
std::string errStr(std::string("CUDA error in file ") + __FILE__ \
+ " line " + std::to_string(__LINE__) \
+ " " + cudaGetErrorString(amrex_i_err)); \
amrex::Abort(errStr); \
}}
#endif
#endif

#define AMREX_GPU_ERROR_CHECK() amrex::Gpu::ErrorCheck(__FILE__, __LINE__)
Expand Down
8 changes: 4 additions & 4 deletions Src/Base/AMReX_GpuLaunch.H
Original file line number Diff line number Diff line change
Expand Up @@ -248,12 +248,12 @@ namespace Gpu {
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) AMREX_NOEXCEPT \
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
block \
); \
} \
else { \
amrex::LoopConcurrentOnCpu(box, nc, [=] (int i, int j, int k, int n) AMREX_NOEXCEPT \
amrex::LoopConcurrentOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
block \
); \
}
Expand Down Expand Up @@ -288,12 +288,12 @@ namespace Gpu {
#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) AMREX_NOEXCEPT \
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
block \
); \
} \
else { \
amrex::LoopOnCpu(box, nc, [=] (int i, int j, int k, int n) AMREX_NOEXCEPT \
amrex::LoopOnCpu(box, nc, [=] (int i, int j, int k, int n) noexcept \
block \
); \
}
Expand Down
20 changes: 5 additions & 15 deletions Tools/GNUMake/Make.defs
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,12 @@ ifeq ($(USE_HIP),TRUE)
COMP := hip
HIP_PLATFORM = $(shell hipconfig --compiler)
ifeq ($(HIP_PLATFORM),nvcc)
$(error HIP_PLATFORM nvcc is not supported at this time. Use USE_CUDA to compile for NVIDIA platforms)
# If nvcc is supported, put this back.
# CUDA_ARCH ?= 60
# CUDA_MAX_THREADS ?= 256
# CUDA_MAXREGCOUNT ?= 255
$(error HIP_PLATFORM nvcc is not supported at this time. Use USE_CUDA to compile for NVIDIA platforms)
else ifeq ($(HIP_PLATFORM),hcc)
# do nothing special for now, but also don't abort
$(error HIP_PLATFORM=hcc is no longer supported)
else ifeq ($(HIP_PLATFORM),clang)
else
$(error only HIP_PLATFORM=hcc is supported)
$(error Unknown HIP_PLATFORM=$(HIP_PLATFORM))
endif
endif

Expand Down Expand Up @@ -672,14 +669,7 @@ ifeq ($(USE_HIP),TRUE)

endif

DEFINES += -DAMREX_USE_HIP

ifeq ($(HIP_PLATFORM),nvcc)
$(error HIP_PLATFORM nvcc is not supported at this time. Use USE_CUDA to compile for NVIDIA platforms.)
# DEFINES += -DAMREX_USE_CUDA -DAMREX_HIP_PLATFORM_NVCC
else ifeq ($(HIP_PLATFORM),hcc)
DEFINES += -DAMREX_HIP_PLATFORM_HCC
endif
DEFINES += -DAMREX_USE_HIP -DAMREX_HIP_PLATFORM=$(HIP_PLATFORM)

GPUSuffix := .HIP

Expand Down
38 changes: 9 additions & 29 deletions Tools/GNUMake/comps/hip.mak
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,22 @@ endif # BL_NO_FORT

# =============================================================================================

# This is designed only for dogora for now.
ifeq ($(HIP_PLATFORM),hcc)
ifeq ($(HIP_PLATFORM),clang)

ifeq ($(DEBUG),TRUE)
# From llvm
CXXFLAGS += -g
CFLAGS += -g
CXXFLAGS += -g -O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -ftrapv
CFLAGS += -g -O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -ftrapv
FFLAGS += -g -O0 -ggdb -fbounds-check -fbacktrace -Wuninitialized -Wunused -ffpe-trap=invalid,zero -finit-real=snan -finit-integer=2147483647 -ftrapv
F90FLAGS += -g -O0 -ggdb -fbounds-check -fbacktrace -Wuninitialized -Wunused -ffpe-trap=invalid,zero -finit-real=snan -finit-integer=2147483647 -ftrapv

else # DEBUG=FALSE flags
CXXFLAGS += -g -O3
CFLAGS += -g -O3
FFLAGS += -g -O3
F90FLAGS += -g -O3
endif

# Generic HIP info
ROC_PATH=/opt/rocm
ROC_PATH=$(realpath $(dir $(HIP_PATH)))
INCLUDE_LOCATIONS += $(HIP_PATH)/include

# rocRand
Expand All @@ -76,33 +77,12 @@ ifeq ($(HIP_PLATFORM),hcc)
INCLUDE_LOCATIONS += $(ROC_PATH)/rocprim/include

# rocThrust - Header only
INCLUDE_LOCATIONS += $(ROC_PATH)/rocthrust/include
# INCLUDE_LOCATIONS += $(ROC_PATH)/rocthrust/include

# =============================================================================================

# This is Summit. Likely broken.
else ifeq ($(HIP_PLATFORM),nvcc)
$(error HIP_PLATFORM nvcc is not supported at this time. Use USE_CUDA to compile for NVIDIA platforms.)
#
# CXXFLAGS_FROM_HOST := -ccbin=$(CXX) --std=c++14
# CFLAGS_FROM_HOST := -ccbin=$(CXX)
# HIPCC_FLAGS = -Wno-deprecated-gpu-targets -m64 -arch=compute_$(CUDA_ARCH) -code=sm_$(CUDA_ARCH) -maxrregcount=$(CUDA_MAXREGCOUNT)
#
# ifeq ($(DEBUG),TRUE)
# HIPCC_FLAGS += -g -G
# else
# HIPCC_FLAGS += -lineinfo --ptxas-options=-O3,-v
# endif
#
# ifneq ($(USE_CUDA_FAST_MATH),FALSE)
# HIPCC_FLAGS += --use_fast_math
# endif
#
# CXXFLAGS = $(CXXFLAGS_FROM_HOST) $(HIPCC_FLAGS) -c -dc
# CFLAGS = $(CFLAGS_FROM_HOST) $(HIPCC_FLAGS) -dc
#
# CXXFLAGS += --expt-relaxed-constexpr --expt-extended-lambda
#
endif

# =============================================================================================