Close #3 examples for the use of PolicyAllocator

- example01 uses ScatterAlloc with namespaces, adds 2 vectors and sums them - example02 uses ScatterAlloc with the malloc overwrite mechanism - example03 (newdelete_example) implements a std::vector-like container on GPU. This example comes from NVIDIA and was adapted to use ScatterAlloc.
ax3l · Apr 17, 2014 · b23fc1a · b23fc1a
1 parent 9d42761
commit b23fc1a
Show file tree

Hide file tree

Showing 17 changed files with 2,846 additions and 1 deletion.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -22,3 +22,9 @@ cuda_add_executable(ScatterAllocExample
                     examples/example.cu )
 cuda_add_executable(PolicyAllocExample
                     examples/policy_example.cu )
+cuda_add_executable(PolicyAllocExample01
+                    examples/policy_example01.cu )
+cuda_add_executable(PolicyAllocExample02
+                    examples/policy_example02.cu )
+cuda_add_executable(PolicyAllocExample03
+                    examples/newdelete_example/newdelete.cu )
diff --git a/examples/newdelete_example/Makefile b/examples/newdelete_example/Makefile
@@ -0,0 +1,135 @@
+################################################################################
+#
+# Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
+#
+# NOTICE TO USER:   
+#
+# This source code is subject to NVIDIA ownership rights under U.S. and 
+# international Copyright laws.  
+#
+# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE 
+# CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR 
+# IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH 
+# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF 
+# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.   
+# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, 
+# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS 
+# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE 
+# OR PERFORMANCE OF THIS SOURCE CODE.  
+#
+# U.S. Government End Users.  This source code is a "commercial item" as 
+# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting  of 
+# "commercial computer software" and "commercial computer software 
+# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) 
+# and is provided to the U.S. Government only as a commercial end item.  
+# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through 
+# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the 
+# source code with only those rights set forth herein.
+#
+################################################################################
+#
+# Makefile project only supported on Mac OS X and Linux Platforms)
+#
+################################################################################
+
+include ./findcudalib.mk
+
+# Location of the CUDA Toolkit
+CUDA_PATH ?= "/opt/pkg/devel/cuda/5.5"
+
+# internal flags
+NVCCFLAGS   := -m${OS_SIZE}
+CCFLAGS     :=
+NVCCLDFLAGS :=
+LDFLAGS     :=
+
+# Extra user flags
+EXTRA_NVCCFLAGS   ?=
+EXTRA_NVCCLDFLAGS ?=
+EXTRA_LDFLAGS     ?=
+EXTRA_CCFLAGS     ?=
+
+# OS-specific build flags
+ifneq ($(DARWIN),) 
+  LDFLAGS += -rpath $(CUDA_PATH)/lib
+  CCFLAGS += -arch $(OS_ARCH) $(STDLIB)  
+else
+  ifeq ($(OS_ARCH),armv7l)
+    ifeq ($(abi),gnueabi)
+      CCFLAGS += -mfloat-abi=softfp
+    else
+      # default to gnueabihf
+      override abi := gnueabihf
+      LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
+      CCFLAGS += -mfloat-abi=hard
+    endif
+  endif
+endif
+
+ifeq ($(ARMv7),1)
+NVCCFLAGS += -target-cpu-arch ARM
+ifneq ($(TARGET_FS),) 
+CCFLAGS += --sysroot=$(TARGET_FS)
+LDFLAGS += --sysroot=$(TARGET_FS)
+LDFLAGS += -rpath-link=$(TARGET_FS)/lib
+LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
+LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-$(abi)
+endif
+endif
+
+# Debug build flags
+ifeq ($(dbg),1)
+      NVCCFLAGS += -g -G
+      TARGET := debug
+else
+      TARGET := release
+endif
+
+ALL_CCFLAGS :=
+ALL_CCFLAGS += $(NVCCFLAGS)
+ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
+ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
+ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
+
+ALL_LDFLAGS :=
+ALL_LDFLAGS += $(ALL_CCFLAGS)
+ALL_LDFLAGS += $(NVCCLDFLAGS)
+ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
+ALL_LDFLAGS += $(EXTRA_NVCCLDFLAGS)
+ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
+
+# Common includes and paths for CUDA
+INCLUDES  := -I../../common/inc
+LIBRARIES :=
+
+################################################################################
+
+# CUDA code generation flags
+GENCODE_SM20    := -gencode arch=compute_20,code=sm_20
+GENCODE_SM30    := -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=\"sm_35,compute_35\"
+GENCODE_FLAGS   := $(GENCODE_SM10) $(GENCODE_SM20) $(GENCODE_SM30)
+
+################################################################################
+
+# Target rules
+all: build
+
+build: newdelete
+
+newdelete.o: newdelete.cu
+	$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
+
+newdelete: newdelete.o
+	$(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES)
+	mkdir -p ../../bin/$(OS_ARCH)/$(OSLOWER)/$(TARGET)$(if $(abi),/$(abi))
+	cp $@ ../../bin/$(OS_ARCH)/$(OSLOWER)/$(TARGET)$(if $(abi),/$(abi))
+
+run: build
+	./newdelete
+
+clean:
+	rm -f newdelete newdelete.o
+	rm -rf ../../bin/$(OS_ARCH)/$(OSLOWER)/$(TARGET)$(if $(abi),/$(abi))/newdelete
+
+clobber: clean
diff --git a/examples/newdelete_example/NsightEclipse.xml b/examples/newdelete_example/NsightEclipse.xml
@@ -0,0 +1,43 @@
+<entry>
+    <name>NewDelete</name>
+    <description>
+		<![CDATA[This sample demonstrates dynamic global memory allocation through device C++ new and delete operators and virtual function declarations available with CUDA 4.0. ]]></description>
+    <whitepaper></whitepaper>
+    <minspec>SM 2.0</minspec>
+    <keywords>CUDA, C++, Fermi</keywords>
+    <scope>1:CUDA Advanced Topics</scope>
+    <files>
+    </files>
+
+
+    <!-- *** Additional project definition information *** -->
+    <!-- Libraries that the project should be linked against. Document may contain any number of "libraries" elements. -->
+    <!-- Attributes "os" and "arch" are optional and may be used in any combination. -->
+    <!-- "framework" attribute will be ignored on platforms other then Mac OS X -->
+
+    <!-- Either absolute or relative to sample -->
+    <library-path os="windows"></library-path>
+
+    <!-- Either absolute or relative to sample -->
+    <include-path>./</include-path>
+    <include-path>../</include-path>
+    <include-path>../../common/inc</include-path>
+
+    <!-- Can be either "separate" or "whole", omitting this element means that sample can be compiled either way -->
+    <devicecompilation>whole</devicecompilation>
+
+    <!-- These options will be passed to NVCC compiler as is and for all files -->
+    <nvcc-compiler></nvcc-compiler>
+    <nvcc-compiler file="newdelete.cu"></nvcc-compiler>
+    <sm-arch>sm20</sm-arch>
+    <sm-arch>sm30</sm-arch>
+
+    <!-- One of "exe|dll|lib" (exe is default) -->
+    <type>exe</type>
+
+    <!-- By default IDE will open file "samplename.(cu|cpp) -->
+    <primary-file>newdelete.cu</primary-file>
+
+    <!-- Collection of [OS][:arch] tokens -->
+    <supported-env>linux:x86_64, linux:i686, windows7, macosx:x86_64, macosx:i686, :arm</supported-env>
+</entry>
diff --git a/examples/newdelete_example/container.hpp b/examples/newdelete_example/container.hpp
@@ -0,0 +1,101 @@
+/*
+* Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
+*
+* Please refer to the NVIDIA end user license agreement (EULA) associated
+* with this source code for terms and conditions that govern your use of
+* this software. Any use, reproduction, disclosure, or distribution of
+* this software and related documentation outside the terms of the EULA
+* is strictly prohibited.
+*
+*/
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Container parent class.
+//
+////////////////////////////////////////////////////////////////////////////
+
+
+template<class T>
+class Container {
+
+public:
+	__device__
+	Container() {;}
+
+       __device__
+        virtual ~Container() {;}
+
+	__device__
+	virtual void push(T e) = 0;
+
+	__device__
+	virtual bool pop(T &e) = 0;
+};
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Vector class derived from Container class using linear memory as data storage
+// NOTE: This education purpose implementation has restricted functionality. 
+//       Eor example, concurrent push and pop operations will not work correctly.
+//
+////////////////////////////////////////////////////////////////////////////
+
+
+template<class T>
+class Vector : public Container<T> {
+
+public:
+	// Constructor, data is allocated on the heap
+    // NOTE: This must be called from only one thread
+	__device__
+	Vector(int max_size) :  m_top(-1) {
+		m_data = new T[max_size];
+	}
+
+	// Constructor, data uses preallocated buffer via placement new
+	__device__
+	Vector(int max_size, T* preallocated_buffer) :  m_top(-1) {
+		m_data = new (preallocated_buffer) T[max_size];
+	}
+
+    // Destructor, data is freed 
+    // NOTE: This must be called from only one thread
+	__device__
+	~Vector() {
+		if( m_data ) delete [] m_data;
+	}
+
+	__device__
+	virtual
+	void push(T e) {
+        if( m_data ) {
+		    // Atomically increment the top idx
+		    int idx = atomicAdd(&(this->m_top), 1);
+		    m_data[idx+1] = e;
+        }
+	}
+
+	__device__
+	virtual
+	bool pop(T &e) {
+		if( m_data && m_top >= 0 ) {
+			// Atomically decrement the top idx
+			int idx = atomicAdd( &(this->m_top), -1 );
+			if( idx >= 0 ) {
+				e = m_data[idx];
+				return true;
+			}
+		}
+		return false;
+
+	}
+
+
+private:
+	int m_size;
+	T* m_data;
+
+	int m_top;
+};