Skip to content

Commit

Permalink
Squashed 'thirdParty/mallocMC/' changes from 07e781c..977b55f
Browse files Browse the repository at this point in the history
977b55f Merge pull request ComputationalRadiationPhysics#141 from ax3l/doc-usagelink
0f79cd5 Link Usage.md
8415cb1 Merge pull request ComputationalRadiationPhysics#140 from psychocoderHPC/topic-makeAllocatorSelfConstistent
ee6f58e update examples
8822cb0 remove `finalizeHeap()` from `creationPolicies`
ebb37f5 update `Usage.md`
66785ac selfe consistent allocator
a036d5c Merge pull request ComputationalRadiationPhysics#135 from psychocoderHPC/fix-missingPoolSizeReset
449bc7b Merge pull request ComputationalRadiationPhysics#136 from psychocoderHPC/topic-destructiveResize
b2e0d76 add `destructiveResize` method
d0ecb62 fix missing local size change in `finalizeHeap()`
9c58f4d Merge pull request ComputationalRadiationPhysics#133 from psychocoderHPC/fix-ptxSpecialRegisterUsage
4cd47f0 fix missing '%%' to use ptx special register
cee846f Merge pull request ComputationalRadiationPhysics#127 from slizzered/documentation-add-thesis
b92f67f Merge pull request ComputationalRadiationPhysics#128 from slizzered/travis-fix_Werror
53e86b4 simpler handling of CXX_FLAGS
d2c4e08 Merge pull request ComputationalRadiationPhysics#126 from slizzered/fix_115
85143c0 Updated Readme.md
5f84db9 fixed string concatenation
eb1ad2d Fix to travis file: -Werror
ad72a61 Check heap pointer in Scatter creation policy
ffa4f7e Merge pull request ComputationalRadiationPhysics#125 from ComputationalRadiationPhysics/slizzered-patch-1
368478c moar fixes
a0bd2eb Update Usage.md
69a6973 Merge pull request ComputationalRadiationPhysics#109 from Flamefire/staticConstexpr
638fa5b Use BOOST_STATIC_CONSTEXPR
c0c6450 Merge pull request ComputationalRadiationPhysics#116 from slizzered/issue113-separate_object_host_device
4070f40 some renamig and freeing at end of examples
b9fe440 added implicit conversion to device handle
7f742b8 fixed OldMalloc policy to comply with new interface
0de5416 removed unnecessary inheritance
28c52c2 fixed a return type to comply with defined interface
503f0bd changed pointer to handle
d281cbe replaced initHeap function with constructor
74ba8aa Replaced the global __device__ object
4fa2d75 Merge pull request ComputationalRadiationPhysics#121 from ax3l/fix-cincludes
d99a6f3 Fix: Includes from C headers
b52c2b2 Merge pull request ComputationalRadiationPhysics#119 from ax3l/fix-travis
eb096f7 Updating to Travis-CI Trusty Beta
93ab74b Merge pull request ComputationalRadiationPhysics#112 from slizzered/issue110-error-handling
90881fd Fixed problems leading to uninitialized pointers
8980ead Merge pull request ComputationalRadiationPhysics#108 from Flamefire/exampleC++11
a92d4e1 Rename some shadowed variables in C++11 mode

git-subtree-dir: thirdParty/mallocMC
git-subtree-split: 977b55f98fe0e6e4545f6e092660b7b4f1b54493
  • Loading branch information
Third Party authored and psychocoderHPC committed Feb 6, 2017
1 parent 2dbd42c commit 5f57860
Show file tree
Hide file tree
Showing 24 changed files with 897 additions and 682 deletions.
24 changes: 16 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,31 +1,39 @@
language: cpp

sudo: required

dist: trusty

compiler:
- gcc

env:
global:
- INSTALL_DIR=~/mylibs
- CXXFLAGS="-Werror"

script:
- mkdir build_tmp && cd build_tmp
- CXXFLAGS="-Werror" cmake -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR $TRAVIS_BUILD_DIR
- cmake -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR $TRAVIS_BUILD_DIR
- make
- make install
- make examples

before_script:
- sudo add-apt-repository --yes ppa:smspillaz/cmake-2.8.12
- cat /etc/apt/sources.list
- cat /etc/apt/sources.list.d/*
- sudo apt-add-repository multiverse
- sudo apt-get update -qq
- sudo dpkg --configure -a
- sudo apt-get install -f -qq
- sudo dpkg --get-selections | grep hold || { echo "All packages OK."; }
- sudo apt-get install -q -y cmake-data cmake
- sudo apt-get install -qq build-essential
- sudo apt-get install -qq gcc-4.4 g++-4.4
- sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.4 60 --slave /usr/bin/g++ g++ /usr/bin/g++-4.4
- gcc --version && g++ --version
- gcc --version && g++ --version # 4.8
- apt-cache search nvidia-*
- sudo apt-get install -qq nvidia-common
- sudo apt-get install -qq nvidia-current
- sudo apt-get install -qq nvidia-cuda-toolkit nvidia-cuda-dev
- sudo apt-get install -qq libboost1.48-dev
- sudo apt-get install -qq nvidia-cuda-dev nvidia-cuda-toolkit # 5.5
- sudo apt-get install -qq libboost-dev # 1.54.0
- sudo find /usr/ -name libcuda*.so

after_script:
Expand Down
6 changes: 5 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,22 @@ INSTALL(
###############################################################################
# Executables
###############################################################################
add_custom_target(examples DEPENDS mallocMC_Example01 mallocMC_Example02 VerifyHeap)
add_custom_target(examples DEPENDS mallocMC_Example01 mallocMC_Example02 mallocMC_Example03 VerifyHeap)

cuda_add_executable(mallocMC_Example01
EXCLUDE_FROM_ALL
examples/mallocMC_example01.cu )
cuda_add_executable(mallocMC_Example02
EXCLUDE_FROM_ALL
examples/mallocMC_example02.cu )
cuda_add_executable(mallocMC_Example03
EXCLUDE_FROM_ALL
examples/mallocMC_example03.cu )
cuda_add_executable(VerifyHeap
EXCLUDE_FROM_ALL
tests/verify_heap.cu )

target_link_libraries(mallocMC_Example01 ${LIBS})
target_link_libraries(mallocMC_Example02 ${LIBS})
target_link_libraries(mallocMC_Example03 ${LIBS})
target_link_libraries(VerifyHeap ${LIBS})
24 changes: 18 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@ accelerators**. Currently, it supports **NVIDIA GPUs** of compute capability
`sm_20` or higher through the *ScatterAlloc* algorithm.


Usage
-------

Follow the step-by-step instructions in [Usage.md](Usage.md) to replace your
`new`/`malloc` calls with a *blacingly fast* mallocMC heap! :rocket:


Install
-------

mallocMC is header-only, but requires a few other C++ libraries to be
available. Our installation notes can be found in [INSTALL.md](INSTALL.md).


On the ScatterAlloc Algorithm
-----------------------------

Expand Down Expand Up @@ -42,16 +56,11 @@ Branches

| *branch* | *state* | *description* |
| ----------- | ------- | ----------------------- |
| **master** | [![Build Status Master](https://travis-ci.org/ComputationalRadiationPhysics/mallocMC.png?branch=master)](https://travis-ci.org/ComputationalRadiationPhysics/mallocMC "master") | our stable new releases |
| **master** | [![Build Status Master](https://travis-ci.org/ComputationalRadiationPhysics/mallocMC.png?branch=master)](https://travis-ci.org/ComputationalRadiationPhysics/mallocMC "master") | our latest stable release |
| **dev** | [![Build Status Development](https://travis-ci.org/ComputationalRadiationPhysics/mallocMC.png?branch=dev)](https://travis-ci.org/ComputationalRadiationPhysics/mallocMC "dev") | our development branch - start and merge new branches here |
| **tugraz** | n/a | *ScatterAlloc* "upstream" branch: not backwards compatible mirror for algorithmic changes |


Install
-------

Installation notes can be found in [INSTALL.md](INSTALL.md).


Literature
----------
Expand All @@ -64,6 +73,9 @@ Just an incomplete link collection for now:
- 2012, May 5th: [Presentation](http://innovativeparallel.org/Presentations/inPar_kainz.pdf)
at *Innovative Parallel Computing 2012* by *Bernhard Kainz*

- Junior Thesis [![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.34461.svg)](http://dx.doi.org/10.5281/zenodo.34461) by
Carlchristian Eckert (2014)


License
-------
Expand Down
60 changes: 32 additions & 28 deletions Usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ There is one header file that will include *all* necessary files:
Step 2a: choose policies
-----------------------

Each instance of a policy based allocator is composed through 5 **policies**. Each policy is expressed as a **policy class**.
Each instance of a policy based allocator is composed through 5 **policies**. Each policy is expressed as a **policy class**.

Currently, there are the following policy classes available:

Expand Down Expand Up @@ -73,7 +73,7 @@ could create the following typedef instead:
```c++
using namespace mallocMC;

typedef mallocMC::Allocator<
typedef mallocMC::Allocator<
CreationPolicies::Scatter<>,
DistributionPolicies::XMallocSIMD<>,
OOMPolicies::ReturnNull,
Expand All @@ -90,59 +90,63 @@ configuration struct defined above.
Step 3: instantiate allocator
-----------------------------

To create a default instance of the ScatterAllocator type and add the necessary
functions, the following Macro has to be executed:
To use the defined allocator type, create an instance with the desired heap size:

```c++
MALLOCMC_SET_ALLOCATOR_TYPE(ScatterAllocator)
ScatterAllocator sa( 512U * 1024U * 1024U ); // heap size of 512MiB
```
This will set up the following functions in the namespace `mallocMC`:
The allocator object offers the following methods
| Name | description |
|---------------------- |-------------------------|
| getAvailableSlots(size_t) | Determines number of allocatable slots of a certain size. This only works, if the chosen CreationPolicy supports it (can be found through `mallocMC::Traits<ScatterAllocator>::providesAvailableSlots`) |
| Name | description |
|-----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| mallocMC::initHeap() | Initializes the heap. Must be called before any other calls to the allocator are permitted. Can take the desired size of the heap as a parameter |
| mallocMC::finalizeHeap() | Destroys the heap again |
| mallocMC::malloc() | Allocates memory on the accelerator |
| mallocMC::free() | Frees memory on the accelerator |
| mallocMC::getAvailableSlots() | Determines number of allocatable slots of a certain size. This only works, if the chosen CreationPolicy supports it (can be found through `mallocMC::Traits<ScatterAllocator>::providesAvailableSlots`) |
Step 4: use dynamic memory allocation in a kernel
-------------------------------------------------
A handle to the allocator object must be passed to each kernel. The handle type is defined as an internal type of the allocator. Inside the kernel, this handle can be used to request memory.
The handle offers the following methods:
| Name | description |
|---------------------- |-------------------------|
| malloc(size_t) | Allocates memory on the accelerator |
| free(size_t) | Frees memory on the accelerator |
| getAvailableSlots() | Determines number of allocatable slots of a certain size. This only works, if the chosen CreationPolicy supports it (can be found through `mallocMC::Traits<ScatterAllocator>::providesAvailableSlots`) |
Step 4: use dynamic memory allocation
-------------------------------------
A simplistic example would look like this:
```c++
#include <mallocMC/mallocMC.hpp>
namespace mallocMC = MC;
typedef MC::Allocator<
typedef MC::Allocator<
MC::CreationPolicies::Scatter<>,
MC::DistributionPolicies::XMallocSIMD<>,
MC::OOMPolicies::ReturnNull,
MC::ReservePoolPolicies::SimpleCudaMalloc,
MC::AlignmentPolicies::Shrink<ShrinkConfig>
> ScatterAllocator;
MALLOCMC_SET_ALLOCATOR_TYPE(ScatterAllocator)
__global__ exampleKernel()
__global__ exampleKernel(ScatterAllocator::AllocatorHandle sah)
{
// some code ...
int* a = (int*) MC::malloc(sizeof(int)*42);
int* a = (int*) sah.malloc(sizeof(int)*42);
// some more code, using *a
MC::free(a);
sah.free(a);
}
int main(){
MC::initHeap(512); // heapsize of 512MB
ScatterAllocator sa( 1U * 512U * 1024U * 1024U ); // heap size of 512MiB
exampleKernel<<< 32, 32 >>>(sa);
exampleKernel<<<32,32>>>();
MC::finalizeHeap();
return 0;
}
```

For more usage examples, have a look at the [examples](examples).
63 changes: 35 additions & 28 deletions examples/mallocMC_example01.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
*/

#include <iostream>
#include <assert.h>
#include <cassert>
#include <vector>
#include <numeric>

Expand Down Expand Up @@ -55,28 +55,28 @@ int main()
}


__device__ int** a;
__device__ int** b;
__device__ int** c;
__device__ int** arA;
__device__ int** arB;
__device__ int** arC;


__global__ void createArrays(int x, int y){
a = (int**) mallocMC::malloc(sizeof(int*) * x*y);
b = (int**) mallocMC::malloc(sizeof(int*) * x*y);
c = (int**) mallocMC::malloc(sizeof(int*) * x*y);
__global__ void createArrayPointers(int x, int y, ScatterAllocator::AllocatorHandle mMC){
arA = (int**) mMC.malloc(sizeof(int*) * x*y);
arB = (int**) mMC.malloc(sizeof(int*) * x*y);
arC = (int**) mMC.malloc(sizeof(int*) * x*y);
}


__global__ void fillArrays(int length, int* d){
__global__ void fillArrays(int length, int* d, ScatterAllocator::AllocatorHandle mMC){
int id = threadIdx.x + blockIdx.x*blockDim.x;

a[id] = (int*) mallocMC::malloc(length*sizeof(int));
b[id] = (int*) mallocMC::malloc(length*sizeof(int));
c[id] = (int*) mallocMC::malloc(sizeof(int)*length);
arA[id] = (int*) mMC.malloc(length*sizeof(int));
arB[id] = (int*) mMC.malloc(length*sizeof(int));
arC[id] = (int*) mMC.malloc(sizeof(int)*length);

for(int i=0 ; i<length; ++i){
a[id][i] = id*length+i;
b[id][i] = id*length+i;
arA[id][i] = id*length+i;
arB[id][i] = id*length+i;
}
}

Expand All @@ -86,17 +86,24 @@ __global__ void addArrays(int length, int* d){

d[id] = 0;
for(int i=0 ; i<length; ++i){
c[id][i] = a[id][i] + b[id][i];
d[id] += c[id][i];
arC[id][i] = arA[id][i] + arB[id][i];
d[id] += arC[id][i];
}
}


__global__ void freeArrays(){
__global__ void freeArrays(ScatterAllocator::AllocatorHandle mMC){
int id = threadIdx.x + blockIdx.x*blockDim.x;
mallocMC::free(a[id]);
mallocMC::free(b[id]);
mallocMC::free(c[id]);
mMC.free(arA[id]);
mMC.free(arB[id]);
mMC.free(arC[id]);
}


__global__ void freeArrayPointers(ScatterAllocator::AllocatorHandle mMC){
mMC.free(arA);
mMC.free(arB);
mMC.free(arC);
}


Expand All @@ -109,7 +116,7 @@ void run()

//init the heap
std::cerr << "initHeap...";
mallocMC::initHeap(1U*1024U*1024U*1024U); //1GB for device-side malloc
ScatterAllocator mMC(1U*1024U*1024U*1024U); //1GB for device-side malloc
std::cerr << "done" << std::endl;

std::cout << ScatterAllocator::info("\n") << std::endl;
Expand All @@ -122,18 +129,18 @@ void run()
std::vector<int> array_sums(block*grid,0);

// create arrays of arrays on the device
createArrays<<<1,1>>>(grid,block);
createArrayPointers<<<1,1>>>(grid,block, mMC );

// fill 2 of them all with ascending values
fillArrays<<<grid,block>>>(length, d);
fillArrays<<<grid,block>>>(length, d, mMC );

// add the 2 arrays (vector addition within each thread)
// and do a thread-wise reduce to d
addArrays<<<grid,block>>>(length, d);

cudaMemcpy(&array_sums[0],d,sizeof(int)*block*grid,cudaMemcpyDeviceToHost);

mallocMC::getAvailableSlots(1024U*1024U); //get available megabyte-sized slots
mMC.getAvailableSlots(1024U*1024U); //get available megabyte-sized slots

int sum = std::accumulate(array_sums.begin(),array_sums.end(),0);
std::cout << "The sum of the arrays on GPU is " << sum << std::endl;
Expand All @@ -142,8 +149,8 @@ void run()
int gaussian = n*(n-1);
std::cout << "The gaussian sum as comparison: " << gaussian << std::endl;

freeArrays<<<grid,block>>>();
freeArrays<<<grid,block>>>( mMC );
freeArrayPointers<<<1,1>>>( mMC );
cudaFree(d);
//finalize the heap again
mallocMC::finalizeHeap();

}
4 changes: 0 additions & 4 deletions examples/mallocMC_example01_config.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
#include <boost/mpl/bool.hpp>

// basic files for mallocMC
#include "src/include/mallocMC/mallocMC_overwrites.hpp"
#include "src/include/mallocMC/mallocMC_hostclass.hpp"

// Load all available policies for mallocMC
Expand Down Expand Up @@ -79,6 +78,3 @@ typedef mallocMC::Allocator<
mallocMC::ReservePoolPolicies::SimpleCudaMalloc,
mallocMC::AlignmentPolicies::Shrink<ShrinkConfig>
> ScatterAllocator;

// use "ScatterAllocator" as PolicyAllocator
MALLOCMC_SET_ALLOCATOR_TYPE(ScatterAllocator)
Loading

0 comments on commit 5f57860

Please sign in to comment.