Skip to content

Commit

Permalink
Merge pull request ComputationalRadiationPhysics#3356 from psychocode…
Browse files Browse the repository at this point in the history
…rHPC/topic-hipRequirements

Increase HIP compatibility
  • Loading branch information
sbastrakov authored Sep 23, 2020
2 parents 3ee68a0 + a27b90e commit f3e2846
Show file tree
Hide file tree
Showing 19 changed files with 121 additions and 69 deletions.
2 changes: 1 addition & 1 deletion include/picongpu/_defaultParam.loader
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#pragma once

#include "picongpu/param/dimension.param"
#if( PMACC_CUDA_ENABLED == 1 )
#if(BOOST_LANG_CUDA || BOOST_COMP_HIP)
# include "picongpu/param/mallocMC.param"
#endif
#include "picongpu/param/memory.param"
Expand Down
14 changes: 14 additions & 0 deletions include/picongpu/fields/currentDeposition/Strategy.def
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,20 @@ namespace traits
alpaka::acc::AccGpuCudaRt< T_Args... >
>
{
// GPU Utilization is higher compared to `StridedCachedSupercells`
using type = strategy::CachedSupercells;
};
#endif

#if( ALPAKA_ACC_GPU_HIP_ENABLED == 1 )
template<
typename ... T_Args
>
struct GetDefaultStrategy<
alpaka::acc::AccGpuHipRt< T_Args... >
>
{
// GPU Utilization is higher compared to `StridedCachedSupercells`
using type = strategy::CachedSupercells;
};
#endif
Expand Down
2 changes: 1 addition & 1 deletion include/picongpu/particles/Particles.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ namespace picongpu
{
using namespace pmacc;

#if( PMACC_CUDA_ENABLED != 1 )
#if(!BOOST_LANG_CUDA && !BOOST_COMP_HIP)
/* dummy because we are not using mallocMC with cupla
* DeviceHeap is defined in `mallocMC.param`
*/
Expand Down
2 changes: 1 addition & 1 deletion include/picongpu/particles/ParticlesFunctors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ struct LogMemoryStatisticsForSpecies
const std::shared_ptr<T_DeviceHeap>& deviceHeap
) const
{
#if( PMACC_CUDA_ENABLED == 1 )
#if( BOOST_LANG_CUDA || BOOST_COMP_HIP)
log<picLog::MEMORY >("mallocMC: free slots for species %3%: %1% a %2%") %
deviceHeap->getAvailableSlots(
cupla::manager::Device< cupla::AccDev >::get().current(),
Expand Down
12 changes: 8 additions & 4 deletions include/picongpu/simulation/control/MySimulation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,9 @@ class MySimulation : public SimulationHelper<simDim>

this->bremsstrahlungPhotonAngle.init();
}
#endif

#if( BOOST_LANG_CUDA || BOOST_COMP_HIP)
auto nativeCudaStream = cupla::manager::Stream<
cupla::AccDev,
cupla::AccStream
Expand Down Expand Up @@ -425,7 +427,7 @@ class MySimulation : public SimulationHelper<simDim>
throw std::runtime_error(msg.str());
}

#if( PMACC_CUDA_ENABLED == 1 )
#if( BOOST_LANG_CUDA || BOOST_COMP_HIP)
size_t heapSize = freeGpuMem - reservedGpuMemorySize;

if( Environment<>::get().MemoryInfo().isSharedMemoryPool() )
Expand All @@ -443,10 +445,12 @@ class MySimulation : public SimulationHelper<simDim>
heapSize
);
cuplaStreamSynchronize( 0 );

auto mallocMCBuffer = std::make_unique< MallocMCBuffer<DeviceHeap> >( deviceHeap );
# if( PMACC_CUDA_ENABLED == 1 )
auto mallocMCBuffer = std::make_unique< MallocMCBuffer< DeviceHeap > >( deviceHeap );
dc.consume( std::move( mallocMCBuffer ) );
# endif
#endif

meta::ForEach< VectorAllSpecies, particles::LogMemoryStatisticsForSpecies<bmpl::_1> > logMemoryStatisticsForSpecies;
logMemoryStatisticsForSpecies( deviceHeap );

Expand All @@ -455,7 +459,7 @@ class MySimulation : public SimulationHelper<simDim>

IdProvider<simDim>::init();

#if( PMACC_CUDA_ENABLED == 1 )
#if( BOOST_LANG_CUDA || BOOST_COMP_HIP)
/* add CUDA streams to the StreamController for concurrent execution */
Environment<>::get().StreamController().addStreams(6);
#endif
Expand Down
25 changes: 15 additions & 10 deletions include/pmacc/Environment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ namespace detail
{
int num_gpus = 0; //number of gpus
cuplaGetDeviceCount(&num_gpus);
#if (PMACC_CUDA_ENABLED == 1)
#if(BOOST_LANG_CUDA|| BOOST_COMP_HIP)
//##ERROR handling
if (num_gpus < 1) //check if cupla device is found
{
Expand All @@ -490,17 +490,23 @@ namespace detail
const int tryDeviceId = (deviceOffset + deviceNumber) % num_gpus;

log<ggLog::CUDA_RT>("Trying to allocate device %1%.") % tryDeviceId;
#if (PMACC_CUDA_ENABLED == 1)

#if(BOOST_LANG_CUDA || BOOST_LANG_HIP)
# if(BOOST_LANG_CUDA)
cudaDeviceProp devProp;
CUDA_CHECK((cuplaError_t)cudaGetDeviceProperties(&devProp, tryDeviceId));
# elif(BOOST_LANG_HIP)
hipDeviceProp_t devProp;
# endif

CUDA_CHECK((cuplaError_t)ALPAKA_API_PREFIX(GetDeviceProperties)(&devProp, tryDeviceId));

/* If the cuda gpu compute mode is 'default'
* (https://docs.nvidia.com/cuda/cuda-c-programming-guide/#compute-modes)
* then we try to get a device only once.
* The index used to select a device is based on the local MPI rank so
* that each rank tries a different device.
*/
if (devProp.computeMode == cudaComputeModeDefault)
if (devProp.computeMode == ALPAKA_API_PREFIX(ComputeModeDefault))
{
maxTries = 1;
log<ggLog::CUDA_RT>("Device %1% is running in default mode.") % tryDeviceId;
Expand All @@ -526,18 +532,17 @@ namespace detail

if (rc == cuplaSuccess)
{
#if (PMACC_CUDA_ENABLED == 1)
cudaDeviceProp dprop;
CUDA_CHECK((cuplaError_t)cudaGetDeviceProperties(&dprop, tryDeviceId));
log<ggLog::CUDA_RT> ("Set device to %1%: %2%") % tryDeviceId % dprop.name;
if(cudaErrorSetOnActiveProcess == cudaSetDeviceFlags(cudaDeviceScheduleSpin))
#if(BOOST_LANG_CUDA || BOOST_LANG_HIP)
CUDA_CHECK((cuplaError_t)ALPAKA_API_PREFIX(GetDeviceProperties)(&devProp, tryDeviceId));
log<ggLog::CUDA_RT> ("Set device to %1%: %2%") % tryDeviceId % devProp.name;
if(ALPAKA_API_PREFIX(ErrorSetOnActiveProcess) == ALPAKA_API_PREFIX(SetDeviceFlags)(ALPAKA_API_PREFIX(DeviceScheduleSpin)))
{
cuplaGetLastError(); //reset all errors
/* - because of cuplaStreamCreate was called cuplaSetDeviceFlags crashed
* - to set the flags reset the device and set flags again
*/
CUDA_CHECK(cuplaDeviceReset());
CUDA_CHECK((cuplaError_t)cudaSetDeviceFlags(cudaDeviceScheduleSpin));
CUDA_CHECK((cuplaError_t)ALPAKA_API_PREFIX(SetDeviceFlags)(ALPAKA_API_PREFIX(DeviceScheduleSpin)));
}
#endif
CUDA_CHECK(cuplaGetLastError());
Expand Down
2 changes: 1 addition & 1 deletion include/pmacc/PMaccConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ endif()
# Find mallocMC
################################################################################

if(ALPAKA_ACC_GPU_CUDA_ENABLE)
if(ALPAKA_ACC_GPU_CUDA_ENABLE OR ALPAKA_ACC_GPU_HIP_ENABLE)
set(mallocMC_ALPAKA_PROVIDER "extern" CACHE STRING "Select which alpaka is used for mallocMC")
find_package(mallocMC 2.5.0 QUIET)

Expand Down
16 changes: 8 additions & 8 deletions include/pmacc/algorithms/math/doubleMath/bessel.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ namespace bessel

HDINLINE result operator( )( result const & x )
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::cyl_bessel_i0( x );
#else
return boost::math::cyl_bessel_i(
Expand All @@ -57,7 +57,7 @@ namespace bessel

HDINLINE result operator( )( result const & x )
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::cyl_bessel_i1( x );
#else
return boost::math::cyl_bessel_i(
Expand All @@ -75,7 +75,7 @@ namespace bessel

HDINLINE result operator( )( result const & x )
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::j0( x );
#else
return boost::math::cyl_bessel_j(
Expand All @@ -93,7 +93,7 @@ namespace bessel

HDINLINE result operator( )( result const & x )
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::j1( x );
#else
return boost::math::cyl_bessel_j(
Expand All @@ -117,7 +117,7 @@ namespace bessel
result const & x
)
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::jn(
n,
x
Expand All @@ -138,7 +138,7 @@ namespace bessel

HDINLINE result operator( )( result const & x )
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::y0( x );
#else
return boost::math::cyl_neumann(
Expand All @@ -156,7 +156,7 @@ namespace bessel

HDINLINE result operator( )( result const & x )
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::y1( x );
#else
return boost::math::cyl_neumann(
Expand All @@ -180,7 +180,7 @@ namespace bessel
result const & x
)
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::yn(
n,
x
Expand Down
6 changes: 3 additions & 3 deletions include/pmacc/algorithms/math/doubleMath/floatingPoint.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct Float2int_ru<double>

HDINLINE result operator( )(double value)
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::__double2int_ru( value );
#else
return static_cast<int>(ceil(value));
Expand All @@ -54,7 +54,7 @@ struct Float2int_rd<double>

HDINLINE result operator( )(double value)
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::__double2int_rd( value );
#else
return static_cast<int>(floor(value));
Expand All @@ -69,7 +69,7 @@ struct Float2int_rn<double>

HDINLINE result operator( )(double value)
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::__double2int_rn( value );
#else
if(value < 0.0)
Expand Down
2 changes: 1 addition & 1 deletion include/pmacc/algorithms/math/doubleMath/modf.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct Modf<double>

HDINLINE double operator()(double value, double* intpart)
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::modf(value, intpart);
#else
return std::modf(value, intpart);
Expand Down
16 changes: 8 additions & 8 deletions include/pmacc/algorithms/math/floatMath/bessel.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ namespace bessel

HDINLINE result operator( )( result const & x )
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::cyl_bessel_i0f( x );
#else
return boost::math::cyl_bessel_i(
Expand All @@ -57,7 +57,7 @@ namespace bessel

HDINLINE result operator( )( result const & x )
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::cyl_bessel_i1f( x );
#else
return boost::math::cyl_bessel_i(
Expand All @@ -75,7 +75,7 @@ namespace bessel

HDINLINE result operator( )( result const & x )
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu_
return ::j0f( x );
#else
return boost::math::cyl_bessel_j(
Expand All @@ -93,7 +93,7 @@ namespace bessel

HDINLINE result operator( )( result const & x )
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::j1f( x );
#else
return boost::math::cyl_bessel_j(
Expand All @@ -117,7 +117,7 @@ namespace bessel
result const & x
)
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::jnf(
n,
x
Expand All @@ -138,7 +138,7 @@ namespace bessel

HDINLINE result operator( )( result const & x )
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::y0f( x );
#else
return boost::math::cyl_neumann(
Expand All @@ -156,7 +156,7 @@ namespace bessel

HDINLINE result operator( )( result const & x )
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::y1f( x );
#else
return boost::math::cyl_neumann(
Expand All @@ -180,7 +180,7 @@ namespace bessel
result const & x
)
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::ynf(
n,
x
Expand Down
2 changes: 1 addition & 1 deletion include/pmacc/algorithms/math/floatMath/exp.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace math

HDINLINE float operator( )(const float& value)
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::log10f( value );
#else
return ::log10( value );
Expand Down
6 changes: 3 additions & 3 deletions include/pmacc/algorithms/math/floatMath/floatingPoint.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct Float2int_ru<float>

HDINLINE result operator( )(float value)
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::__float2int_ru( value );
#else
return static_cast<int>(ceil(value));
Expand All @@ -54,7 +54,7 @@ struct Float2int_rd<float>

HDINLINE result operator( )(float value)
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::__float2int_rd( value );
#else
return static_cast<int>(floor(value));
Expand All @@ -69,7 +69,7 @@ struct Float2int_rn<float>

HDINLINE result operator( )(float value)
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::__float2int_rn( value );
#else
if(value < 0.0f)
Expand Down
2 changes: 1 addition & 1 deletion include/pmacc/algorithms/math/floatMath/modf.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct Modf<float>

HDINLINE float operator()(float value, float* intpart)
{
#if __CUDA_ARCH__
#if( CUPLA_DEVICE_COMPILE == 1) //we are on gpu
return ::modff(value, intpart);
#else
return std::modf(value, intpart);
Expand Down
Loading

0 comments on commit f3e2846

Please sign in to comment.