forked from ComputationalRadiationPhysics/picongpu
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Close #3 examples for the use of PolicyAllocator
- example01 uses ScatterAlloc with namespaces, adds 2 vectors and sums them - example02 uses ScatterAlloc with the malloc overwrite mechanism - example03 (newdelete_example) implements a std::vector-like container on GPU. This example comes from NVIDIA and was adapted to use ScatterAlloc.
- Loading branch information
Carlchristian Eckert
committed
Apr 17, 2014
1 parent
9d42761
commit b23fc1a
Showing
17 changed files
with
2,846 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
################################################################################ | ||
# | ||
# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. | ||
# | ||
# NOTICE TO USER: | ||
# | ||
# This source code is subject to NVIDIA ownership rights under U.S. and | ||
# international Copyright laws. | ||
# | ||
# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE | ||
# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR | ||
# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH | ||
# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF | ||
# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. | ||
# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, | ||
# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS | ||
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE | ||
# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE | ||
# OR PERFORMANCE OF THIS SOURCE CODE. | ||
# | ||
# U.S. Government End Users. This source code is a "commercial item" as | ||
# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of | ||
# "commercial computer software" and "commercial computer software | ||
# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) | ||
# and is provided to the U.S. Government only as a commercial end item. | ||
# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through | ||
# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the | ||
# source code with only those rights set forth herein. | ||
# | ||
################################################################################ | ||
# | ||
# Makefile project only supported on Mac OS X and Linux Platforms) | ||
# | ||
################################################################################ | ||
|
||
include ./findcudalib.mk | ||
|
||
# Location of the CUDA Toolkit | ||
CUDA_PATH ?= "/opt/pkg/devel/cuda/5.5" | ||
|
||
# internal flags | ||
NVCCFLAGS := -m${OS_SIZE} | ||
CCFLAGS := | ||
NVCCLDFLAGS := | ||
LDFLAGS := | ||
|
||
# Extra user flags | ||
EXTRA_NVCCFLAGS ?= | ||
EXTRA_NVCCLDFLAGS ?= | ||
EXTRA_LDFLAGS ?= | ||
EXTRA_CCFLAGS ?= | ||
|
||
# OS-specific build flags | ||
ifneq ($(DARWIN),) | ||
LDFLAGS += -rpath $(CUDA_PATH)/lib | ||
CCFLAGS += -arch $(OS_ARCH) $(STDLIB) | ||
else | ||
ifeq ($(OS_ARCH),armv7l) | ||
ifeq ($(abi),gnueabi) | ||
CCFLAGS += -mfloat-abi=softfp | ||
else | ||
# default to gnueabihf | ||
override abi := gnueabihf | ||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 | ||
CCFLAGS += -mfloat-abi=hard | ||
endif | ||
endif | ||
endif | ||
|
||
ifeq ($(ARMv7),1) | ||
NVCCFLAGS += -target-cpu-arch ARM | ||
ifneq ($(TARGET_FS),) | ||
CCFLAGS += --sysroot=$(TARGET_FS) | ||
LDFLAGS += --sysroot=$(TARGET_FS) | ||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib | ||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib | ||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-$(abi) | ||
endif | ||
endif | ||
|
||
# Debug build flags | ||
ifeq ($(dbg),1) | ||
NVCCFLAGS += -g -G | ||
TARGET := debug | ||
else | ||
TARGET := release | ||
endif | ||
|
||
ALL_CCFLAGS := | ||
ALL_CCFLAGS += $(NVCCFLAGS) | ||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) | ||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) | ||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) | ||
|
||
ALL_LDFLAGS := | ||
ALL_LDFLAGS += $(ALL_CCFLAGS) | ||
ALL_LDFLAGS += $(NVCCLDFLAGS) | ||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) | ||
ALL_LDFLAGS += $(EXTRA_NVCCLDFLAGS) | ||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) | ||
|
||
# Common includes and paths for CUDA | ||
INCLUDES := -I../../common/inc | ||
LIBRARIES := | ||
|
||
################################################################################ | ||
|
||
# CUDA code generation flags | ||
GENCODE_SM20 := -gencode arch=compute_20,code=sm_20 | ||
GENCODE_SM30 := -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=\"sm_35,compute_35\" | ||
GENCODE_FLAGS := $(GENCODE_SM10) $(GENCODE_SM20) $(GENCODE_SM30) | ||
|
||
################################################################################ | ||
|
||
# Target rules | ||
all: build | ||
|
||
build: newdelete | ||
|
||
newdelete.o: newdelete.cu | ||
$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $< | ||
|
||
newdelete: newdelete.o | ||
$(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES) | ||
mkdir -p ../../bin/$(OS_ARCH)/$(OSLOWER)/$(TARGET)$(if $(abi),/$(abi)) | ||
cp $@ ../../bin/$(OS_ARCH)/$(OSLOWER)/$(TARGET)$(if $(abi),/$(abi)) | ||
|
||
run: build | ||
./newdelete | ||
|
||
clean: | ||
rm -f newdelete newdelete.o | ||
rm -rf ../../bin/$(OS_ARCH)/$(OSLOWER)/$(TARGET)$(if $(abi),/$(abi))/newdelete | ||
|
||
clobber: clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
<entry> | ||
<name>NewDelete</name> | ||
<description> | ||
<![CDATA[This sample demonstrates dynamic global memory allocation through device C++ new and delete operators and virtual function declarations available with CUDA 4.0. ]]></description> | ||
<whitepaper></whitepaper> | ||
<minspec>SM 2.0</minspec> | ||
<keywords>CUDA, C++, Fermi</keywords> | ||
<scope>1:CUDA Advanced Topics</scope> | ||
<files> | ||
</files> | ||
|
||
|
||
<!-- *** Additional project definition information *** --> | ||
<!-- Libraries that the project should be linked against. Document may contain any number of "libraries" elements. --> | ||
<!-- Attributes "os" and "arch" are optional and may be used in any combination. --> | ||
<!-- "framework" attribute will be ignored on platforms other then Mac OS X --> | ||
|
||
<!-- Either absolute or relative to sample --> | ||
<library-path os="windows"></library-path> | ||
|
||
<!-- Either absolute or relative to sample --> | ||
<include-path>./</include-path> | ||
<include-path>../</include-path> | ||
<include-path>../../common/inc</include-path> | ||
|
||
<!-- Can be either "separate" or "whole", omitting this element means that sample can be compiled either way --> | ||
<devicecompilation>whole</devicecompilation> | ||
|
||
<!-- These options will be passed to NVCC compiler as is and for all files --> | ||
<nvcc-compiler></nvcc-compiler> | ||
<nvcc-compiler file="newdelete.cu"></nvcc-compiler> | ||
<sm-arch>sm20</sm-arch> | ||
<sm-arch>sm30</sm-arch> | ||
|
||
<!-- One of "exe|dll|lib" (exe is default) --> | ||
<type>exe</type> | ||
|
||
<!-- By default IDE will open file "samplename.(cu|cpp) --> | ||
<primary-file>newdelete.cu</primary-file> | ||
|
||
<!-- Collection of [OS][:arch] tokens --> | ||
<supported-env>linux:x86_64, linux:i686, windows7, macosx:x86_64, macosx:i686, :arm</supported-env> | ||
</entry> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/* | ||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved. | ||
* | ||
* Please refer to the NVIDIA end user license agreement (EULA) associated | ||
* with this source code for terms and conditions that govern your use of | ||
* this software. Any use, reproduction, disclosure, or distribution of | ||
* this software and related documentation outside the terms of the EULA | ||
* is strictly prohibited. | ||
* | ||
*/ | ||
|
||
|
||
///////////////////////////////////////////////////////////////////////////// | ||
// | ||
// Container parent class. | ||
// | ||
//////////////////////////////////////////////////////////////////////////// | ||
|
||
|
||
template<class T> | ||
class Container { | ||
|
||
public: | ||
__device__ | ||
Container() {;} | ||
|
||
__device__ | ||
virtual ~Container() {;} | ||
|
||
__device__ | ||
virtual void push(T e) = 0; | ||
|
||
__device__ | ||
virtual bool pop(T &e) = 0; | ||
}; | ||
|
||
///////////////////////////////////////////////////////////////////////////// | ||
// | ||
// Vector class derived from Container class using linear memory as data storage | ||
// NOTE: This education purpose implementation has restricted functionality. | ||
// Eor example, concurrent push and pop operations will not work correctly. | ||
// | ||
//////////////////////////////////////////////////////////////////////////// | ||
|
||
|
||
template<class T> | ||
class Vector : public Container<T> { | ||
|
||
public: | ||
// Constructor, data is allocated on the heap | ||
// NOTE: This must be called from only one thread | ||
__device__ | ||
Vector(int max_size) : m_top(-1) { | ||
m_data = new T[max_size]; | ||
} | ||
|
||
// Constructor, data uses preallocated buffer via placement new | ||
__device__ | ||
Vector(int max_size, T* preallocated_buffer) : m_top(-1) { | ||
m_data = new (preallocated_buffer) T[max_size]; | ||
} | ||
|
||
// Destructor, data is freed | ||
// NOTE: This must be called from only one thread | ||
__device__ | ||
~Vector() { | ||
if( m_data ) delete [] m_data; | ||
} | ||
|
||
__device__ | ||
virtual | ||
void push(T e) { | ||
if( m_data ) { | ||
// Atomically increment the top idx | ||
int idx = atomicAdd(&(this->m_top), 1); | ||
m_data[idx+1] = e; | ||
} | ||
} | ||
|
||
__device__ | ||
virtual | ||
bool pop(T &e) { | ||
if( m_data && m_top >= 0 ) { | ||
// Atomically decrement the top idx | ||
int idx = atomicAdd( &(this->m_top), -1 ); | ||
if( idx >= 0 ) { | ||
e = m_data[idx]; | ||
return true; | ||
} | ||
} | ||
return false; | ||
|
||
} | ||
|
||
|
||
private: | ||
int m_size; | ||
T* m_data; | ||
|
||
int m_top; | ||
}; |
Oops, something went wrong.