AMReX-Codes · WeiqunZhang · Jun 1, 2020 · Jun 1, 2020
diff --git a/Src/Base/AMReX_Arena.cpp b/Src/Base/AMReX_Arena.cpp
@@ -78,7 +78,7 @@ Arena::allocate_system (std::size_t nbytes)
             AMREX_HIP_OR_CUDA_OR_DPCPP
                 (AMREX_HIP_SAFE_CALL(hipMalloc(&p, nbytes));,
                  AMREX_CUDA_SAFE_CALL(cudaMallocManaged(&p, nbytes));,
-                 p = sycl::malloc_shared(nbytes, Gpu::Device::syclDevice(), Gpu::Device::syclContext()););
+                 p = sycl::malloc_shared(nbytes, Gpu::Device::syclDevice(), Gpu::Device::syclContext()));
             if (arena_info.device_set_readonly)
             {
                 Gpu::Device::mem_advise_set_readonly(p, nbytes);
@@ -94,7 +94,7 @@ Arena::allocate_system (std::size_t nbytes)
             AMREX_HIP_OR_CUDA_OR_DPCPP
                 (AMREX_HIP_SAFE_CALL ( hipMalloc(&p, nbytes));,
                  AMREX_CUDA_SAFE_CALL(cudaMalloc(&p, nbytes));,
-                 p = sycl::malloc_device(nbytes, Gpu::Device::syclDevice(), Gpu::Device::syclContext()););
+                 p = sycl::malloc_device(nbytes, Gpu::Device::syclDevice(), Gpu::Device::syclContext()));
         }
     }
 #else
@@ -126,7 +126,7 @@ Arena::deallocate_system (void* p, std::size_t nbytes)
         AMREX_HIP_OR_CUDA_OR_DPCPP
             (AMREX_HIP_SAFE_CALL ( hipFree(p));,
              AMREX_CUDA_SAFE_CALL(cudaFree(p));,
-             sycl::free(p,Gpu::Device::syclContext()););
+             sycl::free(p,Gpu::Device::syclContext()));
     }
 #else
     if (p && arena_info.device_use_hostalloc) munlock(p, nbytes);

diff --git a/Src/Base/AMReX_GpuAsyncArray.H b/Src/Base/AMReX_GpuAsyncArray.H
@@ -14,14 +14,14 @@
 #define HIPRT_CB 
 #endif
 
-#if defined(AMREX_USE_GPU) && !defined(AMREX_USE_DPCPP)
+#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
 extern "C" {
-AMREX_HIP_OR_CUDA(
-         void HIPRT_CB  amrex_asyncarray_delete ( hipStream_t stream,  hipError_t error, void* p);,
-#if ( defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10) )
-         void CUDART_CB amrex_asyncarray_delete (void* p);)
-#else
-         void CUDART_CB amrex_asyncarray_delete (cudaStream_t stream, cudaError_t error, void* p);)
+#if defined(AMREX_USE_HIP)
+    void HIPRT_CB  amrex_asyncarray_delete ( hipStream_t stream,  hipError_t error, void* p);
+#elif defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10)
+    void CUDART_CB amrex_asyncarray_delete (void* p);
+#elif defined(AMREX_USE_CUDA)
+    void CUDART_CB amrex_asyncarray_delete (cudaStream_t stream, cudaError_t error, void* p);
 #endif
 }
 #endif
@@ -78,15 +78,15 @@ public:
                 T** p = static_cast<T**>(std::malloc(2*sizeof(T*)));
                 p[0] = d_data;
                 p[1] = h_data;
-                AMREX_HIP_OR_CUDA(
-                    AMREX_HIP_SAFE_CALL ( hipStreamAddCallback(Gpu::gpuStream(),
-                                                               amrex_asyncarray_delete, p, 0));,
-#if ( defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10) )
-                    AMREX_CUDA_SAFE_CALL(cudaLaunchHostFunc(Gpu::gpuStream(),
-                                                            amrex_asyncarray_delete, p)););
-#else                    
-                    AMREX_CUDA_SAFE_CALL(cudaStreamAddCallback(Gpu::gpuStream(),
-                                                               amrex_asyncarray_delete, p, 0)););
+#if defined(AMREX_USE_HIP)
+                AMREX_HIP_SAFE_CALL ( hipStreamAddCallback(Gpu::gpuStream(),
+                                                           amrex_asyncarray_delete, p, 0));
+#elif defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10)
+                AMREX_CUDA_SAFE_CALL(cudaLaunchHostFunc(Gpu::gpuStream(),
+                                                        amrex_asyncarray_delete, p));
+#elif defined(AMREX_USE_CUDA)
+                AMREX_CUDA_SAFE_CALL(cudaStreamAddCallback(Gpu::gpuStream(),
+                                                           amrex_asyncarray_delete, p, 0));
 #endif
                 Gpu::callbackAdded();
 #else

diff --git a/Src/Base/AMReX_GpuAsyncArray.cpp b/Src/Base/AMReX_GpuAsyncArray.cpp
@@ -1,20 +1,15 @@
 #include <AMReX_GpuAsyncArray.H>
-#include <mutex>
 
 #ifdef AMREX_USE_GPU
 
-#ifdef __HIP_PLATFORM_HCC__
-#define HIPRT_CB 
-#endif
-
 #if !defined(AMREX_USE_DPCPP)
 extern "C" {
-AMREX_HIP_OR_CUDA(
-         void HIPRT_CB  amrex_asyncarray_delete ( hipStream_t stream,  hipError_t error, void* p),
-#if ( defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10) )
-         void CUDART_CB amrex_asyncarray_delete (void* p))
-#else
-         void CUDART_CB amrex_asyncarray_delete (cudaStream_t stream, cudaError_t error, void* p))
+#if defined(AMREX_USE_HIP)
+    void HIPRT_CB  amrex_asyncarray_delete ( hipStream_t stream,  hipError_t error, void* p)
+#elif defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10)
+    void CUDART_CB amrex_asyncarray_delete (void* p)
+#elif defined(AMREX_USE_CUDA)
+    void CUDART_CB amrex_asyncarray_delete (cudaStream_t stream, cudaError_t error, void* p)
 #endif
     {
         void** pp = (void**)p;

diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp
@@ -124,7 +124,7 @@ Device::Initialize ()
         AMREX_HIP_OR_CUDA_OR_DPCPP
             ( amrex::Print() << "Initializing HIP...\n";,
               amrex::Print() << "Initializing CUDA...\n";,
-              amrex::Print() << "Initializing oneAPI...\n"; );
+              amrex::Print() << "Initializing oneAPI...\n"; )
     }
 
     // XL CUDA Fortran support needs to be initialized

diff --git a/Src/Base/AMReX_GpuElixir.cpp b/Src/Base/AMReX_GpuElixir.cpp
@@ -18,12 +18,12 @@ namespace {
 #if defined(AMREX_USE_GPU) && !defined(AMREX_USE_DPCPP)
 
 extern "C" {
-AMREX_HIP_OR_CUDA(
-         void HIPRT_CB  amrex_elixir_delete ( hipStream_t stream,  hipError_t error, void* p),
-#if ( defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10) )         
-         void CUDART_CB amrex_elixir_delete (void* p))
-#else
-         void CUDART_CB amrex_elixir_delete (cudaStream_t stream, cudaError_t error, void* p))
+#if defined(AMREX_USE_HIP)
+    void HIPRT_CB  amrex_elixir_delete ( hipStream_t stream,  hipError_t error, void* p)
+#elif defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10)
+    void CUDART_CB amrex_elixir_delete (void* p)
+#elif defined(AMREX_USE_CUDA)
+    void CUDART_CB amrex_elixir_delete (cudaStream_t stream, cudaError_t error, void* p)
 #endif
     {
         void** pp = (void**)p;
@@ -49,15 +49,15 @@ Elixir::clear () noexcept
             void** p = static_cast<void**>(std::malloc(2*sizeof(void*)));
             p[0] = m_p;
             p[1] = (void*)m_arena;
-            AMREX_HIP_OR_CUDA(
-                AMREX_HIP_SAFE_CALL ( hipStreamAddCallback(Gpu::gpuStream(),
-                                                           amrex_elixir_delete, p, 0));,
-#if ( defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10) )
-                AMREX_CUDA_SAFE_CALL(cudaLaunchHostFunc(Gpu::gpuStream(),
-                                                        amrex_elixir_delete, p)););
-#else
-                AMREX_CUDA_SAFE_CALL(cudaStreamAddCallback(Gpu::gpuStream(),
-                                                           amrex_elixir_delete, p, 0)););
+#if defined(AMREX_USE_HIP)
+            AMREX_HIP_SAFE_CALL ( hipStreamAddCallback(Gpu::gpuStream(),
+                                                       amrex_elixir_delete, p, 0));
+#elif defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 10)
+            AMREX_CUDA_SAFE_CALL(cudaLaunchHostFunc(Gpu::gpuStream(),
+                                                    amrex_elixir_delete, p));
+#elif defined(AMREX_USE_CUDA)
+            AMREX_CUDA_SAFE_CALL(cudaStreamAddCallback(Gpu::gpuStream(),
+                                                       amrex_elixir_delete, p, 0));
 #endif
             Gpu::callbackAdded();
 #elif defined(AMREX_USE_DPCPP)

diff --git a/Src/Base/AMReX_GpuReduce.H b/Src/Base/AMReX_GpuReduce.H
@@ -110,7 +110,7 @@ struct warpReduce
 //#ifdef AMREX_USE_CUDA
         for (int offset = warpSize/2; offset > 0; offset /= 2) {
             AMREX_HIP_OR_CUDA(T y = __shfl_down(x, offset);,
-                              T y = __shfl_down_sync(0xffffffff, x, offset); );
+                              T y = __shfl_down_sync(0xffffffff, x, offset); )
             x = F()(x,y);
         }
 //#endif

diff --git a/Src/Base/AMReX_Random.cpp b/Src/Base/AMReX_Random.cpp
@@ -127,10 +127,10 @@ amrex::RandomNormal (amrex::Real mean, amrex::Real stddev)
     int i = get_state(tid);
 #ifdef BL_USE_FLOAT
     AMREX_HIP_OR_CUDA( rand = stddev * hiprand_normal(&d_states_d_ptr[i]) + mean;,
-                       rand = stddev *  curand_normal(&d_states_d_ptr[i]) + mean; );
+                       rand = stddev *  curand_normal(&d_states_d_ptr[i]) + mean; )
 #else
     AMREX_HIP_OR_CUDA( rand = stddev * hiprand_normal_double(&d_states_d_ptr[i]) + mean;,
-                       rand = stddev *  curand_normal_double(&d_states_d_ptr[i]) + mean; );
+                       rand = stddev *  curand_normal_double(&d_states_d_ptr[i]) + mean; )
 #endif
     __threadfence();
     free_state(tid);
@@ -172,10 +172,10 @@ amrex::Random ()
     // std::uniform_real_distribution in [0.0, 1.0)
 #ifdef BL_USE_FLOAT
     AMREX_HIP_OR_CUDA( rand = 1.0f - hiprand_uniform(&d_states_d_ptr[i]);,
-                       rand = 1.0f - curand_uniform(&d_states_d_ptr[i]); );
+                       rand = 1.0f - curand_uniform(&d_states_d_ptr[i]); )
 #else
     AMREX_HIP_OR_CUDA( rand = 1.0 - hiprand_uniform_double(&d_states_d_ptr[i]);,
-                       rand = 1.0 - curand_uniform_double(&d_states_d_ptr[i]); );
+                       rand = 1.0 - curand_uniform_double(&d_states_d_ptr[i]); )
 #endif
 
     __threadfence();
@@ -217,7 +217,7 @@ amrex::RandomPoisson (amrex::Real lambda)
     const auto i = get_state(tid);
 
     AMREX_HIP_OR_CUDA( rand = hiprand_poisson(&d_states_d_ptr[i], lambda);,
-                       rand = curand_poisson(&d_states_d_ptr[i], lambda););
+                       rand = curand_poisson(&d_states_d_ptr[i], lambda);)
 
     __threadfence();
     free_state(tid);
@@ -259,7 +259,7 @@ amrex::Random_int (unsigned int n)
     int i = get_state(tid);
     do {
         AMREX_HIP_OR_CUDA( rand = hiprand(&d_states_d_ptr[i]);,
-                           rand =  curand(&d_states_d_ptr[i]); );
+                           rand =  curand(&d_states_d_ptr[i]); )
     } while (rand > (RAND_M - RAND_M % n));
     __threadfence();
     free_state(tid);
@@ -415,7 +415,7 @@ amrex::ResizeRandomSeed (int N)
         int loc = idx + PrevSize;
 
         AMREX_HIP_OR_CUDA( hiprand_init(seed, seqstart, 0, &d_states_d_ptr[loc]);,
-                            curand_init(seed, seqstart, 0, &d_states_d_ptr[loc]); );
+                            curand_init(seed, seqstart, 0, &d_states_d_ptr[loc]); )
     });
 
 #endif

diff --git a/Src/Base/AMReX_Scan.H b/Src/Base/AMReX_Scan.H
@@ -479,7 +479,7 @@ T PrefixSum (int n, FIN && fin, FOUT && fout, Type type)
             // Scan within a warp
             for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) {
                 AMREX_HIP_OR_CUDA( T s = __shfl_up(x,i);,
-                                   T s = __shfl_up_sync(0xffffffff, x, i); );
+                                   T s = __shfl_up_sync(0xffffffff, x, i); )
                 if (lane >= i) x += s;
             }
 
@@ -585,7 +585,7 @@ T PrefixSum (int n, FIN && fin, FOUT && fout, Type type)
 
                         for (int i = Gpu::Device::warp_size/2; i > 0; i /= 2) {
                             AMREX_HIP_OR_CUDA( x += __shfl_down(x,i);,
-                                               x += __shfl_down_sync(0xffffffff, x, i); );
+                                               x += __shfl_down_sync(0xffffffff, x, i); )
                         }
                     }
 

diff --git a/Tests/GPU/Locking/main.cpp b/Tests/GPU/Locking/main.cpp
@@ -153,7 +153,7 @@ void blockCountingTest ()
 #endif
 
         AMREX_HIP_OR_CUDA( hipPeekAtLastError();,
-                           cudaPeekAtLastError(); );
+                           cudaPeekAtLastError(); )
 
         Gpu::Device::synchronize();