Skip to content

Commit

Permalink
Merge pull request ComputationalRadiationPhysics#73 from slizzered/is…
Browse files Browse the repository at this point in the history
…sue72-remove_superfluous_overwrites

Removed overwriting of CUDA malloc/new
  • Loading branch information
psychocoderHPC committed Feb 9, 2015
2 parents 7b02d29 + 328b05b commit eabb314
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 119 deletions.
2 changes: 1 addition & 1 deletion examples/mallocMC_example01.cu
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ void run()
size_t block = 32;
size_t grid = 32;
int length = 100;
assert(length<= block*grid); //necessary for used algorithm
assert((unsigned)length<= block*grid); //necessary for used algorithm

//init the heap
std::cerr << "initHeap...";
Expand Down
27 changes: 11 additions & 16 deletions examples/mallocMC_example02.cu
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,6 @@ typedef mallocMC::Allocator<
// use "ScatterAllocator" as mallocMC
MALLOCMC_SET_ALLOCATOR_TYPE(ScatterAllocator)

// replace all standard malloc()-calls on the device by mallocMC calls
// This will not work with the CreationPolicy "OldMalloc"!
MALLOCMC_OVERWRITE_MALLOC()

///////////////////////////////////////////////////////////////////////////////
// End of mallocMC configuration
Expand All @@ -108,7 +105,7 @@ int main()
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, 0);

if( deviceProp.major < 2 ) {
if( deviceProp.major < int(2) ) {
std::cerr << "Error: Compute Capability >= 2.0 required. (is ";
std::cerr << deviceProp.major << "."<< deviceProp.minor << ")" << std::endl;
return 1;
Expand All @@ -128,20 +125,18 @@ __device__ int** c;


__global__ void createArrays(int x, int y){
a = (int**) malloc(sizeof(int*) * x*y);
b = (int**) malloc(sizeof(int*) * x*y);
c = (int**) malloc(sizeof(int*) * x*y);
a = (int**) mallocMC::malloc(sizeof(int*) * x*y);
b = (int**) mallocMC::malloc(sizeof(int*) * x*y);
c = (int**) mallocMC::malloc(sizeof(int*) * x*y);
}


__global__ void fillArrays(int length, int* d){
int id = threadIdx.x + blockIdx.x*blockDim.x;

// using the MALLOCMC_OVERWRITE_MALLOC() macro
// allows also the use of "new"
a[id] = new int[length];
b[id] = new int[length];
c[id] = new int[length];
a[id] = (int*) mallocMC::malloc(sizeof(int)*length);
b[id] = (int*) mallocMC::malloc(sizeof(int)*length);
c[id] = (int*) mallocMC::malloc(sizeof(int)*length);

for(int i=0 ; i<length; ++i){
a[id][i] = id*length+i;
Expand All @@ -163,9 +158,9 @@ __global__ void addArrays(int length, int* d){

__global__ void freeArrays(){
int id = threadIdx.x + blockIdx.x*blockDim.x;
delete(a[id]);
delete(b[id]);
delete(c[id]);
mallocMC::free(a[id]);
mallocMC::free(b[id]);
mallocMC::free(c[id]);
}


Expand All @@ -174,7 +169,7 @@ void run()
size_t block = 32;
size_t grid = 32;
int length = 100;
assert(length<= block*grid); //necessary for used algorithm
assert((unsigned)length <= block*grid); //necessary for used algorithm

//init the heap
std::cerr << "initHeap...";
Expand Down
100 changes: 0 additions & 100 deletions src/include/mallocMC/mallocMC_overwrites.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,28 +82,6 @@ bool providesAvailableSlots(){ \
} /* end namespace mallocMC */



/** Create the functions mallocMC() and mcfree() inside a namespace
*
* This allows to use a function without bothering with name-clashes when
* including a namespace in the global scope. It will call the namespaced
* version of malloc() inside.
*/
#define MALLOCMC_MALLOCMC() \
namespace mallocMC{ \
MAMC_ACCELERATOR \
void* mallocMC(size_t t) __THROW \
{ \
return mallocMC::malloc(t); \
} \
MAMC_ACCELERATOR \
void mcfree(void* p) __THROW \
{ \
mallocMC::free(p); \
} \
} /* end namespace mallocMC */


/** Create the functions malloc() and free() inside a namespace
*
* This allows for a peaceful coexistence between different functions called
Expand All @@ -126,87 +104,13 @@ void free(void* p) __THROW \



/** Override/replace the global implementation of placement new/delete on CUDA
*
* These overrides are for device-side new and delete and need a pointer to the
* memory-allocator object on device (this will be mostly useful when using
* more advanced techniques and managing your own global object instead of
* using the provided macros).
*
* @param h the allocator as returned by initHeap()
*/
#ifdef __CUDACC__
#if __CUDA_ARCH__ >= 200
#define MALLOCMC_OVERWRITE_NEW() \
MAMC_ACCELERATOR \
void* operator new(size_t t, mallocMC::mallocMCType &h) \
{ \
return h.alloc(t); \
} \
MAMC_ACCELERATOR \
void* operator new[](size_t t, mallocMC::mallocMCType &h) \
{ \
return h.alloc(t); \
} \
MAMC_ACCELERATOR \
void operator delete(void* p, mallocMC::mallocMCType &h) \
{ \
h.dealloc(p); \
} \
MAMC_ACCELERATOR \
void operator delete[](void* p, mallocMC::mallocMCType &h) \
{ \
h.dealloc(p); \
}
#endif
#endif



/** Override/replace the global implementation of malloc/free on CUDA devices
*
* Attention: This will also replace "new", "new[]", "delete" and "delete[]",
* since CUDA uses the same malloc/free functions for that. Needs at least
* ComputeCapability 2.0
*/
#ifdef __CUDACC__
#if __CUDA_ARCH__ >= 200
#define MALLOCMC_OVERWRITE_MALLOC() \
MAMC_ACCELERATOR \
void* malloc(size_t t) __THROW \
{ \
return mallocMC::malloc(t); \
} \
MAMC_ACCELERATOR \
void free(void* p) __THROW \
{ \
mallocMC::free(p); \
}
#endif
#endif



/* if the defines do not exist (wrong CUDA version etc),
* create at least empty defines
*/
#ifndef MALLOCMC_MALLOCMC
#define MALLOCMC_MALLOCMC()
#endif

#ifndef MALLOCMC_MALLOC
#define MALLOCMC_MALLOC()
#endif

#ifndef MALLOCMC_OVERWRITE_NEW
#define MALLOCMC_OVERWRITE_NEW()
#endif

#ifndef MALLOCMC_OVERWRITE_MALLOC
#define MALLOCMC_OVERWRITE_MALLOC()
#endif



/** Set up the global variables and functions
*
Expand All @@ -217,8 +121,4 @@ void free(void* p) __THROW \
#define MALLOCMC_SET_ALLOCATOR_TYPE(MALLOCMC_USER_DEFINED_TYPE) \
MALLOCMC_GLOBAL_FUNCTIONS(MALLOCMC_USER_DEFINED_TYPE) \
MALLOCMC_MALLOC() \
MALLOCMC_MALLOCMC() \
MALLOCMC_AVAILABLESLOTS()

//MALLOCMC_OVERWRITE_NEW()

3 changes: 1 addition & 2 deletions tests/verify_heap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,8 @@
#include "src/include/mallocMC/mallocMC_utils.hpp"
#include "verify_heap_config.hpp"

//use ScatterAllocator to replace malloc/free
//use ScatterAllocator
MALLOCMC_SET_ALLOCATOR_TYPE(ScatterAllocator)
MALLOCMC_OVERWRITE_MALLOC()

// global variable for verbosity, might change due to user input '--verbose'
bool verbose = false;
Expand Down

0 comments on commit eabb314

Please sign in to comment.