Merge pull request ComputationalRadiationPhysics#3356 from psychocode…

…rHPC/topic-hipRequirements Increase HIP compatibility
ax3l · Sep 23, 2020 · f3e2846 · f3e2846
2 parents 3ee68a0 + a27b90e
commit f3e2846
Show file tree

Hide file tree

Showing 19 changed files with 121 additions and 69 deletions.
diff --git a/include/picongpu/_defaultParam.loader b/include/picongpu/_defaultParam.loader
@@ -26,7 +26,7 @@
 #pragma once
 
 #include "picongpu/param/dimension.param"
-#if( PMACC_CUDA_ENABLED == 1 )
+#if(BOOST_LANG_CUDA || BOOST_COMP_HIP)
 #   include "picongpu/param/mallocMC.param"
 #endif
 #include "picongpu/param/memory.param"

diff --git a/include/picongpu/fields/currentDeposition/Strategy.def b/include/picongpu/fields/currentDeposition/Strategy.def
@@ -143,6 +143,20 @@ namespace traits
         alpaka::acc::AccGpuCudaRt< T_Args... >
     >
     {
+        // GPU Utilization is higher compared to `StridedCachedSupercells`
+        using type = strategy::CachedSupercells;
+    };
+#endif
+
+#if( ALPAKA_ACC_GPU_HIP_ENABLED == 1 )
+    template<
+        typename ... T_Args
+    >
+    struct GetDefaultStrategy<
+        alpaka::acc::AccGpuHipRt< T_Args... >
+    >
+    {
+        // GPU Utilization is higher compared to `StridedCachedSupercells`
         using type = strategy::CachedSupercells;
     };
 #endif

diff --git a/include/picongpu/particles/Particles.hpp b/include/picongpu/particles/Particles.hpp
@@ -49,7 +49,7 @@ namespace picongpu
 {
 using namespace pmacc;
 
-#if( PMACC_CUDA_ENABLED != 1 )
+#if(!BOOST_LANG_CUDA && !BOOST_COMP_HIP)
 /* dummy because we are not using mallocMC with cupla
  * DeviceHeap is defined in `mallocMC.param`
  */

diff --git a/include/picongpu/particles/ParticlesFunctors.hpp b/include/picongpu/particles/ParticlesFunctors.hpp
@@ -125,7 +125,7 @@ struct LogMemoryStatisticsForSpecies
         const std::shared_ptr<T_DeviceHeap>& deviceHeap
     ) const
     {
-#if( PMACC_CUDA_ENABLED == 1 )
+#if( BOOST_LANG_CUDA || BOOST_COMP_HIP)
         log<picLog::MEMORY >("mallocMC: free slots for species %3%: %1% a %2%") %
             deviceHeap->getAvailableSlots(
                 cupla::manager::Device< cupla::AccDev >::get().current(),

diff --git a/include/picongpu/simulation/control/MySimulation.hpp b/include/picongpu/simulation/control/MySimulation.hpp
@@ -373,7 +373,9 @@ class MySimulation : public SimulationHelper<simDim>
 
             this->bremsstrahlungPhotonAngle.init();
         }
+#endif
 
+#if( BOOST_LANG_CUDA || BOOST_COMP_HIP)
         auto nativeCudaStream = cupla::manager::Stream<
             cupla::AccDev,
             cupla::AccStream
@@ -425,7 +427,7 @@ class MySimulation : public SimulationHelper<simDim>
             throw std::runtime_error(msg.str());
         }
 
-#if( PMACC_CUDA_ENABLED == 1 )
+#if( BOOST_LANG_CUDA || BOOST_COMP_HIP)
         size_t heapSize = freeGpuMem - reservedGpuMemorySize;
 
         if( Environment<>::get().MemoryInfo().isSharedMemoryPool() )
@@ -443,10 +445,12 @@ class MySimulation : public SimulationHelper<simDim>
             heapSize
         );
         cuplaStreamSynchronize( 0 );
-
-        auto mallocMCBuffer = std::make_unique< MallocMCBuffer<DeviceHeap> >( deviceHeap );
+#   if( PMACC_CUDA_ENABLED == 1 )
+        auto mallocMCBuffer = std::make_unique< MallocMCBuffer< DeviceHeap > >( deviceHeap );
         dc.consume( std::move( mallocMCBuffer ) );
+#   endif
 #endif
+
         meta::ForEach< VectorAllSpecies, particles::LogMemoryStatisticsForSpecies<bmpl::_1> > logMemoryStatisticsForSpecies;
         logMemoryStatisticsForSpecies( deviceHeap );
 
@@ -455,7 +459,7 @@ class MySimulation : public SimulationHelper<simDim>
 
         IdProvider<simDim>::init();
 
-#if( PMACC_CUDA_ENABLED == 1 )
+#if( BOOST_LANG_CUDA || BOOST_COMP_HIP)
         /* add CUDA streams to the StreamController for concurrent execution */
         Environment<>::get().StreamController().addStreams(6);
 #endif

diff --git a/include/pmacc/Environment.hpp b/include/pmacc/Environment.hpp
@@ -468,7 +468,7 @@ namespace detail
     {
         int num_gpus = 0; //number of gpus
         cuplaGetDeviceCount(&num_gpus);
-#if (PMACC_CUDA_ENABLED == 1)
+#if(BOOST_LANG_CUDA|| BOOST_COMP_HIP)
         //##ERROR handling
         if (num_gpus < 1) //check if cupla device is found
         {
@@ -490,17 +490,23 @@ namespace detail
             const int tryDeviceId = (deviceOffset + deviceNumber) % num_gpus;
 
             log<ggLog::CUDA_RT>("Trying to allocate device %1%.") % tryDeviceId;
-#if (PMACC_CUDA_ENABLED == 1)
+
+#if(BOOST_LANG_CUDA || BOOST_LANG_HIP)
+#   if(BOOST_LANG_CUDA)
             cudaDeviceProp devProp;
-            CUDA_CHECK((cuplaError_t)cudaGetDeviceProperties(&devProp, tryDeviceId));
+#   elif(BOOST_LANG_HIP)
+            hipDeviceProp_t devProp;
+#   endif
+
+            CUDA_CHECK((cuplaError_t)ALPAKA_API_PREFIX(GetDeviceProperties)(&devProp, tryDeviceId));
 
             /* If the cuda gpu compute mode is 'default'
              * (https://docs.nvidia.com/cuda/cuda-c-programming-guide/#compute-modes)
              * then we try to get a device only once.
              * The index used to select a device is based on the local MPI rank so
              * that each rank tries a different device.
              */
-            if (devProp.computeMode == cudaComputeModeDefault)
+            if (devProp.computeMode == ALPAKA_API_PREFIX(ComputeModeDefault))
             {
                 maxTries = 1;
                 log<ggLog::CUDA_RT>("Device %1% is running in default mode.") % tryDeviceId;
@@ -526,18 +532,17 @@ namespace detail
 
             if (rc == cuplaSuccess)
             {
-#if (PMACC_CUDA_ENABLED == 1)
-                cudaDeviceProp dprop;
-                CUDA_CHECK((cuplaError_t)cudaGetDeviceProperties(&dprop, tryDeviceId));
-                log<ggLog::CUDA_RT> ("Set device to %1%: %2%") % tryDeviceId % dprop.name;
-                if(cudaErrorSetOnActiveProcess == cudaSetDeviceFlags(cudaDeviceScheduleSpin))
+#if(BOOST_LANG_CUDA || BOOST_LANG_HIP)
+                CUDA_CHECK((cuplaError_t)ALPAKA_API_PREFIX(GetDeviceProperties)(&devProp, tryDeviceId));
+                log<ggLog::CUDA_RT> ("Set device to %1%: %2%") % tryDeviceId % devProp.name;
+                if(ALPAKA_API_PREFIX(ErrorSetOnActiveProcess) == ALPAKA_API_PREFIX(SetDeviceFlags)(ALPAKA_API_PREFIX(DeviceScheduleSpin)))
                 {
                     cuplaGetLastError(); //reset all errors
                     /* - because of cuplaStreamCreate was called cuplaSetDeviceFlags crashed
                      * - to set the flags reset the device and set flags again
                      */
                     CUDA_CHECK(cuplaDeviceReset());
-                    CUDA_CHECK((cuplaError_t)cudaSetDeviceFlags(cudaDeviceScheduleSpin));
+                    CUDA_CHECK((cuplaError_t)ALPAKA_API_PREFIX(SetDeviceFlags)(ALPAKA_API_PREFIX(DeviceScheduleSpin)));
                 }
 #endif
                 CUDA_CHECK(cuplaGetLastError());

diff --git a/include/pmacc/PMaccConfig.cmake b/include/pmacc/PMaccConfig.cmake
@@ -368,7 +368,7 @@ endif()
 # Find mallocMC
 ################################################################################
 
-if(ALPAKA_ACC_GPU_CUDA_ENABLE)
+if(ALPAKA_ACC_GPU_CUDA_ENABLE OR ALPAKA_ACC_GPU_HIP_ENABLE)
     set(mallocMC_ALPAKA_PROVIDER "extern" CACHE STRING "Select which alpaka is used for mallocMC")
     find_package(mallocMC 2.5.0 QUIET)
 

diff --git a/include/pmacc/algorithms/math/doubleMath/bessel.tpp b/include/pmacc/algorithms/math/doubleMath/bessel.tpp
@@ -39,7 +39,7 @@ namespace bessel
 
         HDINLINE result operator( )( result const & x )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::cyl_bessel_i0( x );
 #else
             return boost::math::cyl_bessel_i(
@@ -57,7 +57,7 @@ namespace bessel
 
         HDINLINE result operator( )( result const & x )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::cyl_bessel_i1( x );
 #else
             return boost::math::cyl_bessel_i(
@@ -75,7 +75,7 @@ namespace bessel
 
         HDINLINE result operator( )( result const & x )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::j0( x );
 #else
             return boost::math::cyl_bessel_j(
@@ -93,7 +93,7 @@ namespace bessel
 
         HDINLINE result operator( )( result const & x )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::j1( x );
 #else
             return boost::math::cyl_bessel_j(
@@ -117,7 +117,7 @@ namespace bessel
             result const & x
         )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::jn(
                 n,
                 x
@@ -138,7 +138,7 @@ namespace bessel
 
         HDINLINE result operator( )( result const & x )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::y0( x );
 #else
             return boost::math::cyl_neumann(
@@ -156,7 +156,7 @@ namespace bessel
 
         HDINLINE result operator( )( result const & x )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::y1( x );
 #else
             return boost::math::cyl_neumann(
@@ -180,7 +180,7 @@ namespace bessel
             result const & x
         )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::yn(
                 n,
                 x

diff --git a/include/pmacc/algorithms/math/doubleMath/floatingPoint.tpp b/include/pmacc/algorithms/math/doubleMath/floatingPoint.tpp
@@ -39,7 +39,7 @@ struct Float2int_ru<double>
 
     HDINLINE result operator( )(double value)
     {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
         return ::__double2int_ru( value );
 #else
         return static_cast<int>(ceil(value));
@@ -54,7 +54,7 @@ struct Float2int_rd<double>
 
     HDINLINE result operator( )(double value)
     {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
         return ::__double2int_rd( value );
 #else
         return static_cast<int>(floor(value));
@@ -69,7 +69,7 @@ struct Float2int_rn<double>
 
     HDINLINE result operator( )(double value)
     {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
         return ::__double2int_rn( value );
 #else
         if(value < 0.0)

diff --git a/include/pmacc/algorithms/math/doubleMath/modf.tpp b/include/pmacc/algorithms/math/doubleMath/modf.tpp
@@ -36,7 +36,7 @@ struct Modf<double>
 
     HDINLINE double operator()(double value, double* intpart)
     {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
         return ::modf(value, intpart);
 #else
         return std::modf(value, intpart);

diff --git a/include/pmacc/algorithms/math/floatMath/bessel.tpp b/include/pmacc/algorithms/math/floatMath/bessel.tpp
@@ -39,7 +39,7 @@ namespace bessel
 
         HDINLINE result operator( )( result const & x )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::cyl_bessel_i0f( x );
 #else
             return boost::math::cyl_bessel_i(
@@ -57,7 +57,7 @@ namespace bessel
 
         HDINLINE result operator( )( result const & x )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::cyl_bessel_i1f( x );
 #else
             return boost::math::cyl_bessel_i(
@@ -75,7 +75,7 @@ namespace bessel
 
         HDINLINE result operator( )( result const & x )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu_
             return ::j0f( x );
 #else
             return boost::math::cyl_bessel_j(
@@ -93,7 +93,7 @@ namespace bessel
 
         HDINLINE result operator( )( result const & x )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::j1f( x );
 #else
             return boost::math::cyl_bessel_j(
@@ -117,7 +117,7 @@ namespace bessel
             result const & x
         )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::jnf(
                 n,
                 x
@@ -138,7 +138,7 @@ namespace bessel
 
         HDINLINE result operator( )( result const & x )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::y0f( x );
 #else
             return boost::math::cyl_neumann(
@@ -156,7 +156,7 @@ namespace bessel
 
         HDINLINE result operator( )( result const & x )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::y1f( x );
 #else
             return boost::math::cyl_neumann(
@@ -180,7 +180,7 @@ namespace bessel
             result const & x
         )
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::ynf(
                 n,
                 x

diff --git a/include/pmacc/algorithms/math/floatMath/exp.tpp b/include/pmacc/algorithms/math/floatMath/exp.tpp
@@ -38,7 +38,7 @@ namespace math
 
         HDINLINE float operator( )(const float& value)
         {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
             return ::log10f( value );
 #else
             return ::log10( value );

diff --git a/include/pmacc/algorithms/math/floatMath/floatingPoint.tpp b/include/pmacc/algorithms/math/floatMath/floatingPoint.tpp
@@ -39,7 +39,7 @@ struct Float2int_ru<float>
 
     HDINLINE result operator( )(float value)
     {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
         return ::__float2int_ru( value );
 #else
         return static_cast<int>(ceil(value));
@@ -54,7 +54,7 @@ struct Float2int_rd<float>
 
     HDINLINE result operator( )(float value)
     {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
         return ::__float2int_rd( value );
 #else
         return static_cast<int>(floor(value));
@@ -69,7 +69,7 @@ struct Float2int_rn<float>
 
     HDINLINE result operator( )(float value)
     {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
         return ::__float2int_rn( value );
 #else
         if(value < 0.0f)

diff --git a/include/pmacc/algorithms/math/floatMath/modf.tpp b/include/pmacc/algorithms/math/floatMath/modf.tpp
@@ -36,7 +36,7 @@ struct Modf<float>
 
     HDINLINE float operator()(float value, float* intpart)
     {
-#if __CUDA_ARCH__
+#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
         return ::modff(value, intpart);
 #else
         return std::modf(value, intpart);