Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use unified memory in CUDA debug builds #621

Merged
merged 2 commits into from
Aug 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions cuda/base/executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,11 @@ void *CudaExecutor::raw_alloc(size_type num_bytes) const
{
void *dev_ptr = nullptr;
cuda::device_guard g(this->get_device_id());
#ifdef NDEBUG
auto error_code = cudaMalloc(&dev_ptr, num_bytes);
#else
auto error_code = cudaMallocManaged(&dev_ptr, num_bytes);
#endif
if (error_code != cudaErrorMemoryAllocation) {
GKO_ASSERT_NO_CUDA_ERRORS(error_code);
}
Expand All @@ -123,27 +127,27 @@ void CudaExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes,
}


void CudaExecutor::raw_copy_to(const CudaExecutor *src, size_type num_bytes,
void CudaExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes,
const void *src_ptr, void *dest_ptr) const
{
if (num_bytes > 0) {
cuda::device_guard g(this->get_device_id());
GKO_ASSERT_NO_CUDA_ERRORS(cudaMemcpyPeer(dest_ptr, this->device_id_,
src_ptr, src->get_device_id(),
num_bytes));
GKO_ASSERT_NO_CUDA_ERRORS(
cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr,
this->get_device_id(), num_bytes));
}
}


void CudaExecutor::raw_copy_to(const HipExecutor *src, size_type num_bytes,
void CudaExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes,
const void *src_ptr, void *dest_ptr) const
{
#if GINKGO_HIP_PLATFORM_NVCC == 1
if (num_bytes > 0) {
cuda::device_guard g(this->get_device_id());
GKO_ASSERT_NO_CUDA_ERRORS(cudaMemcpyPeer(dest_ptr, this->device_id_,
src_ptr, src->get_device_id(),
num_bytes));
GKO_ASSERT_NO_CUDA_ERRORS(
cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr,
this->get_device_id(), num_bytes));
}
#else
GKO_NOT_SUPPORTED(this);
Expand Down
5 changes: 3 additions & 2 deletions dev_tools/scripts/gdb-ginkgo.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,15 @@ def __init__(self, val):
self.val = val
self.execname = str(self.val['exec_']['_M_ptr'].dereference().dynamic_type)
self.pointer = get_unique_ptr_data_ptr(self.val['data_']);
self.is_cpu = re.match('gko::(Reference|Omp)Executor', str(self.execname)) is not None
# Cuda allows access via unified memory in Debug builds
self.is_cpu = re.match('gko::(Reference|Omp|Cuda)Executor', str(self.execname)) is not None

def children(self):
if self.is_cpu:
return self._iterator(self.pointer, self.val['num_elems_'])
return []

def to_string(self):
def to_string(self):
return ('%s of length %d on %s (%s)' % (str(self.val.type), int(self.val['num_elems_']), self.execname, self.pointer))

def display_hint(self):
Expand Down
16 changes: 10 additions & 6 deletions hip/base/executor.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,11 @@ void *HipExecutor::raw_alloc(size_type num_bytes) const
{
void *dev_ptr = nullptr;
hip::device_guard g(this->get_device_id());
#if defined(NDEBUG) || (GINKGO_HIP_PLATFORM_HCC == 1)
auto error_code = hipMalloc(&dev_ptr, num_bytes);
#else
auto error_code = hipMallocManaged(&dev_ptr, num_bytes);
#endif
if (error_code != hipErrorMemoryAllocation) {
GKO_ASSERT_NO_HIP_ERRORS(error_code);
}
Expand All @@ -123,14 +127,14 @@ void HipExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes,
}


void HipExecutor::raw_copy_to(const CudaExecutor *src, size_type num_bytes,
void HipExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes,
const void *src_ptr, void *dest_ptr) const
{
#if GINKGO_HIP_PLATFORM_NVCC == 1
if (num_bytes > 0) {
hip::device_guard g(this->get_device_id());
GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, this->device_id_,
src_ptr, src->get_device_id(),
GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, dest->get_device_id(),
src_ptr, this->get_device_id(),
num_bytes));
}
#else
Expand All @@ -139,13 +143,13 @@ void HipExecutor::raw_copy_to(const CudaExecutor *src, size_type num_bytes,
}


void HipExecutor::raw_copy_to(const HipExecutor *src, size_type num_bytes,
void HipExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes,
const void *src_ptr, void *dest_ptr) const
{
if (num_bytes > 0) {
hip::device_guard g(this->get_device_id());
GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, this->device_id_,
src_ptr, src->get_device_id(),
GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, dest->get_device_id(),
src_ptr, this->get_device_id(),
num_bytes));
}
}
Expand Down