Skip to content

Commit

Permalink
Close #3 examples for the use of PolicyAllocator
Browse files Browse the repository at this point in the history
 - example01 uses ScatterAlloc with namespaces, adds 2 vectors and sums
   them
 - example02 uses ScatterAlloc with the malloc overwrite mechanism
 - example03 (newdelete_example) implements a std::vector-like container
   on GPU. This example comes from NVIDIA and was adapted to use
   ScatterAlloc.
  • Loading branch information
Carlchristian Eckert committed Apr 17, 2014
1 parent 9d42761 commit b23fc1a
Show file tree
Hide file tree
Showing 17 changed files with 2,846 additions and 1 deletion.
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,9 @@ cuda_add_executable(ScatterAllocExample
examples/example.cu )
cuda_add_executable(PolicyAllocExample
examples/policy_example.cu )
cuda_add_executable(PolicyAllocExample01
examples/policy_example01.cu )
cuda_add_executable(PolicyAllocExample02
examples/policy_example02.cu )
cuda_add_executable(PolicyAllocExample03
examples/newdelete_example/newdelete.cu )
135 changes: 135 additions & 0 deletions examples/newdelete_example/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
################################################################################
#
# Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
#
# NOTICE TO USER:
#
# This source code is subject to NVIDIA ownership rights under U.S. and
# international Copyright laws.
#
# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
# OR PERFORMANCE OF THIS SOURCE CODE.
#
# U.S. Government End Users. This source code is a "commercial item" as
# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
# "commercial computer software" and "commercial computer software
# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
# and is provided to the U.S. Government only as a commercial end item.
# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
# source code with only those rights set forth herein.
#
################################################################################
#
# Makefile project only supported on Mac OS X and Linux Platforms)
#
################################################################################

include ./findcudalib.mk

# Location of the CUDA Toolkit
CUDA_PATH ?= "/opt/pkg/devel/cuda/5.5"

# internal flags
NVCCFLAGS := -m${OS_SIZE}
CCFLAGS :=
NVCCLDFLAGS :=
LDFLAGS :=

# Extra user flags
EXTRA_NVCCFLAGS ?=
EXTRA_NVCCLDFLAGS ?=
EXTRA_LDFLAGS ?=
EXTRA_CCFLAGS ?=

# OS-specific build flags
ifneq ($(DARWIN),)
LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(OS_ARCH) $(STDLIB)
else
ifeq ($(OS_ARCH),armv7l)
ifeq ($(abi),gnueabi)
CCFLAGS += -mfloat-abi=softfp
else
# default to gnueabihf
override abi := gnueabihf
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
CCFLAGS += -mfloat-abi=hard
endif
endif
endif

ifeq ($(ARMv7),1)
NVCCFLAGS += -target-cpu-arch ARM
ifneq ($(TARGET_FS),)
CCFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-$(abi)
endif
endif

# Debug build flags
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
TARGET := debug
else
TARGET := release
endif

ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))

ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(NVCCLDFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(EXTRA_NVCCLDFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))

# Common includes and paths for CUDA
INCLUDES := -I../../common/inc
LIBRARIES :=

################################################################################

# CUDA code generation flags
GENCODE_SM20 := -gencode arch=compute_20,code=sm_20
GENCODE_SM30 := -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=\"sm_35,compute_35\"
GENCODE_FLAGS := $(GENCODE_SM10) $(GENCODE_SM20) $(GENCODE_SM30)

################################################################################

# Target rules
all: build

build: newdelete

newdelete.o: newdelete.cu
$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<

newdelete: newdelete.o
$(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES)
mkdir -p ../../bin/$(OS_ARCH)/$(OSLOWER)/$(TARGET)$(if $(abi),/$(abi))
cp $@ ../../bin/$(OS_ARCH)/$(OSLOWER)/$(TARGET)$(if $(abi),/$(abi))

run: build
./newdelete

clean:
rm -f newdelete newdelete.o
rm -rf ../../bin/$(OS_ARCH)/$(OSLOWER)/$(TARGET)$(if $(abi),/$(abi))/newdelete

clobber: clean
43 changes: 43 additions & 0 deletions examples/newdelete_example/NsightEclipse.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<entry>
<name>NewDelete</name>
<description>
<![CDATA[This sample demonstrates dynamic global memory allocation through device C++ new and delete operators and virtual function declarations available with CUDA 4.0. ]]></description>
<whitepaper></whitepaper>
<minspec>SM 2.0</minspec>
<keywords>CUDA, C++, Fermi</keywords>
<scope>1:CUDA Advanced Topics</scope>
<files>
</files>


<!-- *** Additional project definition information *** -->
<!-- Libraries that the project should be linked against. Document may contain any number of "libraries" elements. -->
<!-- Attributes "os" and "arch" are optional and may be used in any combination. -->
<!-- "framework" attribute will be ignored on platforms other then Mac OS X -->

<!-- Either absolute or relative to sample -->
<library-path os="windows"></library-path>

<!-- Either absolute or relative to sample -->
<include-path>./</include-path>
<include-path>../</include-path>
<include-path>../../common/inc</include-path>

<!-- Can be either "separate" or "whole", omitting this element means that sample can be compiled either way -->
<devicecompilation>whole</devicecompilation>

<!-- These options will be passed to NVCC compiler as is and for all files -->
<nvcc-compiler></nvcc-compiler>
<nvcc-compiler file="newdelete.cu"></nvcc-compiler>
<sm-arch>sm20</sm-arch>
<sm-arch>sm30</sm-arch>

<!-- One of "exe|dll|lib" (exe is default) -->
<type>exe</type>

<!-- By default IDE will open file "samplename.(cu|cpp) -->
<primary-file>newdelete.cu</primary-file>

<!-- Collection of [OS][:arch] tokens -->
<supported-env>linux:x86_64, linux:i686, windows7, macosx:x86_64, macosx:i686, :arm</supported-env>
</entry>
101 changes: 101 additions & 0 deletions examples/newdelete_example/container.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/


/////////////////////////////////////////////////////////////////////////////
//
// Container parent class.
//
////////////////////////////////////////////////////////////////////////////


template<class T>
class Container {

public:
__device__
Container() {;}

__device__
virtual ~Container() {;}

__device__
virtual void push(T e) = 0;

__device__
virtual bool pop(T &e) = 0;
};

/////////////////////////////////////////////////////////////////////////////
//
// Vector class derived from Container class using linear memory as data storage
// NOTE: This education purpose implementation has restricted functionality.
// Eor example, concurrent push and pop operations will not work correctly.
//
////////////////////////////////////////////////////////////////////////////


template<class T>
class Vector : public Container<T> {

public:
// Constructor, data is allocated on the heap
// NOTE: This must be called from only one thread
__device__
Vector(int max_size) : m_top(-1) {
m_data = new T[max_size];
}

// Constructor, data uses preallocated buffer via placement new
__device__
Vector(int max_size, T* preallocated_buffer) : m_top(-1) {
m_data = new (preallocated_buffer) T[max_size];
}

// Destructor, data is freed
// NOTE: This must be called from only one thread
__device__
~Vector() {
if( m_data ) delete [] m_data;
}

__device__
virtual
void push(T e) {
if( m_data ) {
// Atomically increment the top idx
int idx = atomicAdd(&(this->m_top), 1);
m_data[idx+1] = e;
}
}

__device__
virtual
bool pop(T &e) {
if( m_data && m_top >= 0 ) {
// Atomically decrement the top idx
int idx = atomicAdd( &(this->m_top), -1 );
if( idx >= 0 ) {
e = m_data[idx];
return true;
}
}
return false;

}


private:
int m_size;
T* m_data;

int m_top;
};
Loading

0 comments on commit b23fc1a

Please sign in to comment.