Skip to content

Commit

Permalink
Close #14 Added AlignmentPolicy Shrink
Browse files Browse the repository at this point in the history
 -
 - might still be done too often
 - there might be a bug in the original code, where memory is not
   resized/shrinked correctly although the alignment was changed
 - pagesize in XMallocSIMD is now dependent on Scatter_impl
  • Loading branch information
Carlchristian Eckert committed Apr 17, 2014
1 parent 563c404 commit 9d42761
Show file tree
Hide file tree
Showing 13 changed files with 179 additions and 77 deletions.
4 changes: 4 additions & 0 deletions src/include/scatteralloc/AlignmentPolicies.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#pragma once

#include "alignmentPolicies/Shrink.hpp"
#include "alignmentPolicies/Shrink_impl.hpp"
12 changes: 12 additions & 0 deletions src/include/scatteralloc/alignmentPolicies/Shrink.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once

namespace PolicyMalloc{
namespace AlignmentPolicies{

template<typename T_Dummy>
class Shrink2;

typedef Shrink2<void> Shrink;

} //namespace AlignmentPolicies
} //namespace PolicyMalloc
67 changes: 67 additions & 0 deletions src/include/scatteralloc/alignmentPolicies/Shrink_impl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#pragma once

#include <boost/cstdint.hpp>
#include <boost/static_assert.hpp>
#include <stdio.h>

#include "Shrink.hpp"

namespace PolicyMalloc{
namespace AlignmentPolicies{

namespace Shrink2NS{

typedef boost::uint32_t uint32;
template<int PSIZE> struct __PointerEquivalent{ typedef unsigned int type;};
template<>
struct __PointerEquivalent<8>{ typedef unsigned long long int type; };

typedef __PointerEquivalent<sizeof(char*)>::type PointerEquivalent;

__global__ void alignPoolKernel(void* memory, uint32 dataAlignment){
PointerEquivalent alignmentstatus = ((PointerEquivalent)memory) & (dataAlignment -1);
if(alignmentstatus != 0)
{
memory =(void*)(((PointerEquivalent)memory) + dataAlignment - alignmentstatus);
printf("Heap Warning: memory to use not 16 byte aligned...\n");
}
}
}// namespace ShrinkNS

template<typename T_Dummy>
class Shrink2{
typedef boost::uint32_t uint32;
typedef Shrink2<T_Dummy> MyType;
typedef typename GetProperties<MyType>::dataAlignment DataAlignment;

static const uint32 dataAlignment = DataAlignment::value;

#ifndef BOOST_NOINLINE
#define BOOST_NOINLINE='__attribute__ ((noinline)'
#define BOOST_NOINLINE_WAS_JUSTDEFINED
#endif
BOOST_STATIC_ASSERT(!std::numeric_limits<typename DataAlignment::type>::is_signed);
BOOST_STATIC_ASSERT(dataAlignment > 0);
//dataAlignment must also be a power of 2!
BOOST_STATIC_ASSERT(dataAlignment && !(dataAlignment & (dataAlignment-1)) );
#ifdef BOOST_NOINLINE_WAS_JUSTDEFINED
#undef BOOST_NOINLINE_WAS_JUSTDEFINED
#undef BOOST_NOINLINE
#endif

public:

static void* alignPool(void* memory){
Shrink2NS::alignPoolKernel<<<1,1>>>(memory,dataAlignment);
//TODO:maybe also take care of the memory-size bug
return memory;
}

__device__ static uint32 alignAccess(uint32 bytes){
return (bytes + dataAlignment - 1) & ~(dataAlignment-1);
}

};

} //namespace AlignmentPolicies
} //namespace PolicyMalloc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ namespace CreationPolicies{
}

template < typename T>
static void destroyHeap(const T& obj){
static void finalizeHeap(const T& obj){
}

};
Expand Down
37 changes: 23 additions & 14 deletions src/include/scatteralloc/creationPolicies/Scatter_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
namespace PolicyMalloc{
namespace CreationPolicies{

namespace Scatter2NS{
template < typename T_Allocator >
__global__ void initKernel(T_Allocator* heap, void* heapmem, size_t memsize){
heap->initDeviceFunction(heapmem, memsize);
}
}

template<class T_Dummy>
class Scatter2
Expand Down Expand Up @@ -47,7 +49,7 @@ namespace CreationPolicies{
#endif
//static const uint32 minChunkSize0 = pagesize/(32*32); // TODO remove? it is used nowhere in the code
static const uint32 minChunkSize1 = 0x10;
static const uint32 dataAlignment = Properties::dataAlignment::value;
//static const uint32 dataAlignment = Properties::dataAlignment::value;

static const uint32 HierarchyThreshold = (pagesize - 2*sizeof(uint32))/33;

Expand All @@ -74,10 +76,10 @@ namespace CreationPolicies{
BOOST_STATIC_ASSERT(!std::numeric_limits<typename Properties::wastefactor::type>::is_signed);
BOOST_STATIC_ASSERT(wastefactor > 0);

BOOST_STATIC_ASSERT(!std::numeric_limits<typename Properties::dataAlignment::type>::is_signed);
BOOST_STATIC_ASSERT(dataAlignment > 0);
//dataAlignment must also be a power of 2!
BOOST_STATIC_ASSERT(dataAlignment && !(dataAlignment & (dataAlignment-1)) );
// BOOST_STATIC_ASSERT(!std::numeric_limits<typename Properties::dataAlignment::type>::is_signed);
// BOOST_STATIC_ASSERT(dataAlignment > 0);
// //dataAlignment must also be a power of 2!
// BOOST_STATIC_ASSERT(dataAlignment && !(dataAlignment & (dataAlignment-1)) );


BOOST_STATIC_ASSERT(!std::numeric_limits<typename Properties::hashingK::type>::is_signed);
Expand Down Expand Up @@ -553,7 +555,7 @@ namespace CreationPolicies{
if(bytes == 0)
return 0;
//take care of padding
bytes = (bytes + dataAlignment - 1) & ~(dataAlignment-1);
//bytes = (bytes + dataAlignment - 1) & ~(dataAlignment-1); // in alignment-policy
if(bytes < pagesize)
//chunck based
return allocChunked(bytes);
Expand Down Expand Up @@ -611,17 +613,22 @@ namespace CreationPolicies{

uint32 numregions = ((unsigned long long)memsize)/( ((unsigned long long)regionsize)*(sizeof(PTE)+pagesize)+sizeof(uint32));
uint32 numpages = numregions*regionsize;
//pointer is copied (copy is called page)
PAGE* page = (PAGE*)(memory);
//sec check for alignment
PointerEquivalent alignmentstatus = ((PointerEquivalent)page) & (dataAlignment -1);
if(alignmentstatus != 0)
{
page =(PAGE*)(((PointerEquivalent)page) + dataAlignment - alignmentstatus);
if(linid == 0) printf("Heap Warning: memory to use not 16 byte aligned...\n");
}
//copy is checked
//PointerEquivalent alignmentstatus = ((PointerEquivalent)page) & (dataAlignment -1);
//if(alignmentstatus != 0)
//{
// //copy is adjusted, potentially pointer to higher address now.
// page =(PAGE*)(((PointerEquivalent)page) + dataAlignment - alignmentstatus);
// if(linid == 0) printf("Heap Warning: memory to use not 16 byte aligned...\n");
//}
PTE* ptes = (PTE*)(page + numpages);
uint32* regions = (uint32*)(ptes + numpages);
//sec check for mem size
//this check refers to the original memory-pointer, which was not adjusted!
//TODO fix the bug, potentially by always using "page" from now on
if( (void*)(regions + numregions) > (((char*)memory) + memsize) )
{
--numregions;
Expand Down Expand Up @@ -659,6 +666,7 @@ namespace CreationPolicies{


__device__ bool isOOM(void* p){
// all threads in a warp return get NULL
return 32 == __popc(__ballot(p == NULL));
}

Expand All @@ -667,14 +675,15 @@ namespace CreationPolicies{
static void* initHeap(const T& obj, void* pool, size_t memsize){
T* heap;
SCATTERALLOC_CUDA_CHECKED_CALL(cudaGetSymbolAddress((void**)&heap,obj));
initKernel<<<1,256>>>(heap, pool, memsize);
Scatter2NS::initKernel<<<1,256>>>(heap, pool, memsize);
return heap;
}


template < typename T>
static void destroyHeap(const T& obj){
static void finalizeHeap(const T& obj){
//TODO: Think about the necessity of a teardown... (inside the pool)
//reset PAGE, memsize, numpages, regions, firstfreedblock, firstfreepagebased,numregions,ptes
}

};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace DistributionPolicies{

public:

__device__ uint32 gather(uint32 bytes){
__device__ uint32 collect(uint32 bytes){
return bytes;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ namespace DistributionPolicies{
typedef XMallocSIMD2<T_Dummy> MyType;
typedef GetProperties<MyType> Properties;
static const uint32 pagesize = Properties::pagesize::value;
static const uint32 dataAlignment = Properties::dataAlignment::value;

#ifndef BOOST_NOINLINE
#define BOOST_NOINLINE='__attribute__ ((noinline)'
Expand All @@ -34,20 +33,14 @@ namespace DistributionPolicies{
BOOST_STATIC_ASSERT(!std::numeric_limits<typename Properties::pagesize::type>::is_signed);
BOOST_STATIC_ASSERT(pagesize > 0);

BOOST_STATIC_ASSERT(!std::numeric_limits<typename Properties::dataAlignment::type>::is_signed);
BOOST_STATIC_ASSERT(dataAlignment > 0);
//dataAlignment must also be a power of 2!
BOOST_STATIC_ASSERT(dataAlignment && !(dataAlignment & (dataAlignment-1)) );

#ifdef BOOST_NOINLINE_WAS_JUSTDEFINED
#undef BOOST_NOINLINE_WAS_JUSTDEFINED
#undef BOOST_NOINLINE
#endif

public:

__device__ uint32 gather(uint32 bytes){
bytes = (bytes + dataAlignment - 1) & ~(dataAlignment-1);
__device__ uint32 collect(uint32 bytes){

can_use_coalescing = false;
warpid = PolicyMalloc::warpid();
Expand All @@ -58,6 +51,8 @@ namespace DistributionPolicies{
__shared__ uint32 warp_sizecounter[32];
warp_sizecounter[warpid] = 16;

//second half: make sure that all coalesced allocations can fit within one page
//necessary for offset calculation
bool coalescible = bytes > 0 && bytes < (pagesize / 32);
uint32 threadcount = __popc(__ballot(coalescible));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace PolicyMalloc{
namespace GetHeapPolicies{

struct CudaSetLimits{
static void* getMemPool(size_t memsize){
static void* setMemPool(size_t memsize){
cudaDeviceSetLimit(cudaLimitMallocHeapSize, memsize);
return NULL;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace PolicyMalloc{
namespace GetHeapPolicies{

struct SimpleCudaMalloc{
static void* getMemPool(size_t memsize){
static void* setMemPool(size_t memsize){
void* pool;
SCATTERALLOC_CUDA_CHECKED_CALL(cudaMalloc(&pool, memsize));
return pool;
Expand Down
26 changes: 18 additions & 8 deletions src/include/scatteralloc/oOMPolicies/BadAllocException_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,29 @@

#include <assert.h>

#include "ReturnNull.hpp"
#include "BadAllocException.hpp"

namespace PolicyMalloc{
namespace OOMPolicies{

class BadAllocException
struct BadAllocException
{
public:
__device__ static void* handleOOM(void* mem){
assert(false);
// TODO exception handling does not work on device!
return NULL;
}
__device__ static void* handleOOM(void* mem){
#ifdef __CUDACC__
//#if __CUDA_ARCH__ < 350
#define PM_EXCEPTIONS_NOT_SUPPORTED_HERE
//#endif
#endif

#ifdef PM_EXCEPTIONS_NOT_SUPPORTED_HERE
#undef PM_EXCEPTIONS_NOT_SUPPORTED_HERE
assert(false);
#else
std::bad_alloc exception;
throw exception;
#endif
return mem;
}
};

} //namespace OOMPolicies
Expand Down
48 changes: 26 additions & 22 deletions src/include/scatteralloc/policy_malloc_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,26 @@
#include "DistributionPolicies.hpp"
#include "OOMPolicies.hpp"
#include "GetHeapPolicies.hpp"
#include "AlignmentPolicies.hpp"



typedef PolicyMalloc::PolicyAllocator<
PolicyMalloc::CreationPolicies::Scatter,
PolicyMalloc::DistributionPolicies::XMallocSIMD,
PolicyMalloc::OOMPolicies::ReturnNull,
PolicyMalloc::GetHeapPolicies::SimpleCudaMalloc,
PolicyMalloc::AlignmentPolicies::Shrink
> ScatterAllocator;

typedef PolicyMalloc::PolicyAllocator<
PolicyMalloc::CreationPolicies::OldMalloc,
PolicyMalloc::DistributionPolicies::Noop,
PolicyMalloc::OOMPolicies::ReturnNull,
PolicyMalloc::GetHeapPolicies::CudaSetLimits,
PolicyMalloc::AlignmentPolicies::Shrink
> OldAllocator;

template<>
struct PolicyMalloc::GetProperties<PolicyMalloc::CreationPolicies::Scatter>{
typedef boost::mpl::int_<4096> pagesize;
Expand All @@ -20,38 +38,24 @@ struct PolicyMalloc::GetProperties<PolicyMalloc::CreationPolicies::Scatter>{
typedef boost::mpl::int_<2> wastefactor;
typedef boost::mpl::bool_<false> resetfreedpages;

typedef boost::mpl::int_<16> dataAlignment;

typedef boost::mpl::int_<38183> hashingK;
typedef boost::mpl::int_<17497> hashingDistMP;
typedef boost::mpl::int_<1> hashingDistWP;
typedef boost::mpl::int_<1> hashingDistWPRel;

};

template<>
struct PolicyMalloc::GetProperties<PolicyMalloc::DistributionPolicies::XMallocSIMD>{
typedef boost::mpl::int_<4096> pagesize;
typedef boost::mpl::int_<16> dataAlignment;
typedef GetProperties<CreationPolicies::Scatter>::pagesize pagesize;
};

typedef PolicyMalloc::PolicyAllocator<
PolicyMalloc::CreationPolicies::Scatter,
PolicyMalloc::DistributionPolicies::XMallocSIMD,
PolicyMalloc::OOMPolicies::ReturnNull,
PolicyMalloc::GetHeapPolicies::SimpleCudaMalloc
> ScatterAllocator;

typedef PolicyMalloc::PolicyAllocator<
PolicyMalloc::CreationPolicies::OldMalloc,
PolicyMalloc::DistributionPolicies::Noop,
PolicyMalloc::OOMPolicies::ReturnNull,
PolicyMalloc::GetHeapPolicies::CudaSetLimits
> OldAllocator;

template<>
struct PolicyMalloc::GetProperties<PolicyMalloc::AlignmentPolicies::Shrink>{
typedef boost::mpl::int_<16> dataAlignment;
};

//SET_ACCELERATOR_MEMORY_ALLOCATOR_TYPE(ScatterAllocator)
SET_ACCELERATOR_MEMORY_ALLOCATOR_TYPE(ScatterAllocator)

SET_ACCELERATOR_MEMORY_ALLOCATOR_TYPE(OldAllocator)
//SET_ACCELERATOR_MEMORY_ALLOCATOR_TYPE(OldAllocator)

//POLICY_MALLOC_MEMORY_ALLOCATOR_MALLOC_OVERWRITE()
POLICY_MALLOC_MEMORY_ALLOCATOR_MALLOC_OVERWRITE()
Loading

0 comments on commit 9d42761

Please sign in to comment.