Skip to content

Commit

Permalink
Squashed 'thirdParty/mallocMC/' changes from 4b779a34c..e2533d141
Browse files Browse the repository at this point in the history
e2533d141 Merge pull request ComputationalRadiationPhysics#153 from psychocoderHPC/topic-versionIncrease2.3.1
2723bc13d Merge pull request ComputationalRadiationPhysics#154 from ax3l/topic-cmake312rootHints
60c467ece version increase to 2.3.1
5f57e6d1f CMake: Honor _ROOT Env Hints
e0bbb5fdd Merge pull request ComputationalRadiationPhysics#151 from ax3l/merge-v230master
16cd2b9a5 Merge remote-tracking branch 'mainline/master' into merge-v230master
8dbb2dd6e Merge pull request ComputationalRadiationPhysics#150 from psychocoderHPC/fix-warpsPerSM
cab1dd5fc fix style, fix wrong used qualifier
5a71062db fix illegal memory access

git-subtree-dir: thirdParty/mallocMC
git-subtree-split: e2533d14101c9fa7af3d11b9d02277591e06d8e4
  • Loading branch information
Third Party authored and ax3l committed Feb 14, 2019
1 parent e278852 commit 044f2f1
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 7 deletions.
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@
Change Log / Release Log for mallocMC
================================================================

2.3.1crp
--------
**Date:** 2019-02-14

A critical bug was fixed which can result in an illegal memory access.

### Changes to mallocMC 2.3.0crp

**Bug fixes**
- fix illegal memory access in `XMallocSIMD` #150

**Misc:**
- CMake: Honor `<packageName>_ROOT` Env Hints #154


2.3.0crp
--------
**Date:** 2018-06-11
Expand Down
12 changes: 12 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ set(CMAKE_PREFIX_PATH "/usr/lib/x86_64-linux-gnu/"
"$ENV{CUDA_ROOT}" "$ENV{BOOST_ROOT}")


################################################################################
# CMake policies
#
# Search in <PackageName>_ROOT:
# https://cmake.org/cmake/help/v3.12/policy/CMP0074.html
################################################################################

if(POLICY CMP0074)
cmake_policy(SET CMP0074 NEW)
endif()


###############################################################################
# CUDA
###############################################################################
Expand Down
6 changes: 3 additions & 3 deletions src/include/mallocMC/creationPolicies/Scatter_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -933,15 +933,15 @@ namespace ScatterKernelDetail{
*/
__device__ unsigned getAvailableSlotsAccelerator(size_t slotSize){
int linearId;
int wId = threadIdx.x >> 5; //do not use warpid-function, since this value is not guaranteed to be stable across warp lifetime
int wId = warpid_withinblock(); //do not use warpid-function, since this value is not guaranteed to be stable across warp lifetime

#if(__CUDACC_VER_MAJOR__ >= 9)
uint32 activeThreads = __popc(__activemask());
#else
uint32 activeThreads = __popc(__ballot(true));
#endif
__shared__ uint32 activePerWarp[32]; //32 is the maximum number of warps in a block
__shared__ unsigned warpResults[32];
__shared__ uint32 activePerWarp[MaxThreadsPerBlock::value / WarpSize::value]; //maximum number of warps in a block
__shared__ unsigned warpResults[MaxThreadsPerBlock::value / WarpSize::value];
warpResults[wId] = 0;
activePerWarp[wId] = 0;

Expand Down
10 changes: 7 additions & 3 deletions src/include/mallocMC/distributionPolicies/XMallocSIMD_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ namespace DistributionPolicies{
public:
typedef T_Config Properties;

MAMC_ACCELERATOR
XMallocSIMD() : can_use_coalescing(false), warpid(warpid_withinblock()),
myoffset(0), threadcount(0), req_size(0)
{}

private:
/** Allow for a hierarchical validation of parameters:
*
Expand Down Expand Up @@ -89,12 +94,11 @@ namespace DistributionPolicies{
uint32 collect(uint32 bytes){

can_use_coalescing = false;
warpid = mallocMC::warpid();
myoffset = 0;
threadcount = 0;

//init with initial counter
__shared__ uint32 warp_sizecounter[32];
__shared__ uint32 warp_sizecounter[MaxThreadsPerBlock::value / WarpSize::value];
warp_sizecounter[warpid] = 16;

//second half: make sure that all coalesced allocations can fit within one page
Expand All @@ -121,7 +125,7 @@ namespace DistributionPolicies{

MAMC_ACCELERATOR
void* distribute(void* allocatedMem){
__shared__ char* warp_res[32];
__shared__ char* warp_res[MaxThreadsPerBlock::value / WarpSize::value];

char* myalloc = (char*) allocatedMem;
if (req_size && can_use_coalescing)
Expand Down
47 changes: 47 additions & 0 deletions src/include/mallocMC/mallocMC_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,24 @@ namespace mallocMC
return mylaneid;
}

/** warp index within a multiprocessor
*
* Index of the warp within the multiprocessor at the moment of the query.
* The result is volatile and can be different with each query.
*
* @return current index of the warp
*/
MAMC_ACCELERATOR inline boost::uint32_t warpid()
{
boost::uint32_t mywarpid;
asm("mov.u32 %0, %%warpid;" : "=r" (mywarpid));
return mywarpid;
}

/** maximum number of warps on a multiprocessor
*
* @return maximum number of warps on a multiprocessor
*/
MAMC_ACCELERATOR inline boost::uint32_t nwarpid()
{
boost::uint32_t mynwarpid;
Expand Down Expand Up @@ -186,4 +198,39 @@ namespace mallocMC
template<class T>
MAMC_HOST MAMC_ACCELERATOR inline T divup(T a, T b) { return (a + b - 1)/b; }

/** the maximal number threads per block
*
* https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities
*/
struct MaxThreadsPerBlock
{
// valid for sm_2.X - sm_7.5
BOOST_STATIC_CONSTEXPR uint32_t value = 1024;
};

/** number of threads within a warp
*
* https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities
*/
struct WarpSize
{
// valid for sm_2.X - sm_7.5
BOOST_STATIC_CONSTEXPR uint32_t value = 32;
};

/** warp id within a cuda block
*
* The id is constant over the lifetime of the thread.
* The id is not equal to warpid().
*
* @return warp id within the block
*/
MAMC_ACCELERATOR inline boost::uint32_t warpid_withinblock()
{
return (
threadIdx.z * blockDim.y * blockDim.x +
threadIdx.y * blockDim.x +
threadIdx.x
) / WarpSize::value;
}
}
2 changes: 1 addition & 1 deletion src/include/mallocMC/version.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
/** the mallocMC version: major API changes should be reflected here */
#define MALLOCMC_VERSION_MAJOR 2
#define MALLOCMC_VERSION_MINOR 3
#define MALLOCMC_VERSION_PATCH 0
#define MALLOCMC_VERSION_PATCH 1

/** the mallocMC flavor is used to differentiate the releases of the
* Computational Radiation Physics group (crp) from other releases
Expand Down

0 comments on commit 044f2f1

Please sign in to comment.