Skip to content

Commit

Permalink
[SYCL][HIP] Add coarse-grained memory advice for HIP on AMD (#12394)
Browse files Browse the repository at this point in the history
Enables and tests coarse grained memory access via the memadvise
implementation for HIP platforms on AMD hardware.

See related UR changes for the adapter implementation:
oneapi-src/unified-runtime#1249

---------

Co-authored-by: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
Co-authored-by: aarongreig <aarongreig01@gmail.com>
  • Loading branch information
3 people authored Feb 5, 2024
1 parent b781e6c commit ab86d0d
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 8 deletions.
5 changes: 4 additions & 1 deletion sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,10 @@
// 14.42 Added piextCommandBufferPrefetchUSM and piextCommandBufferAdviseUSM
// 15.43 Changed the signature of piextMemGetNativeHandle to also take a
// pi_device
// 15.44 Add coarse-grain memory advice flag for HIP.

#define _PI_H_VERSION_MAJOR 15
#define _PI_H_VERSION_MINOR 43
#define _PI_H_VERSION_MINOR 44

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -582,6 +583,8 @@ typedef enum {
PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION_HOST = 1 << 7,
PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST = 1 << 8,
PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST = 1 << 9,
PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED = 1 << 10,
PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED = 1 << 11,
PI_MEM_ADVICE_UNKNOWN = 0x7FFFFFFF,
} _pi_mem_advice;

Expand Down
14 changes: 7 additions & 7 deletions sycl/plugins/unified_runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ endif()
if(SYCL_PI_UR_USE_FETCH_CONTENT)
include(FetchContent)

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime")
# commit 9363574db721d2388c7d76a10edb128764872352
# Merge: 553a6b82 5e513738
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit cd97e172cbbfc411fccb0b80e0fff6f9126574f4
# Merge: bd745d10 2a9ded6f
# Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
# Date: Thu Feb 1 11:50:16 2024 +0000
# Merge pull request #1302 from kbenzie/benie/cl-binary-type-intermediate
# [CL] Handle INTERMEDIATE binary type
set(UNIFIED_RUNTIME_TAG 9363574db721d2388c7d76a10edb128764872352)
# Date: Fri Feb 2 14:24:16 2024 +0000
# Merge pull request #1249 from GeorgeWeb/georgi/hip_memadvise_coarse_grained
# [HIP] Implement coarse-grained memory advice for the HIP adapter
set(UNIFIED_RUNTIME_TAG cd97e172cbbfc411fccb0b80e0fff6f9126574f4)

if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
Expand Down
6 changes: 6 additions & 0 deletions sycl/plugins/unified_runtime/pi2ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3375,6 +3375,12 @@ inline pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr,
if (Advice & PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST) {
UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST;
}
if (Advice & PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED) {
UrAdvice |= UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY;
}
if (Advice & PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED) {
UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY;
}
if (Advice & PI_MEM_ADVICE_RESET) {
UrAdvice |= UR_USM_ADVICE_FLAG_DEFAULT;
}
Expand Down
2 changes: 2 additions & 0 deletions sycl/test-e2e/USM/memadvise_flags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ int main() {
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_PREFERRED_LOCATION_HOST);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_ACCESSED_BY_HOST);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_ACCESSED_BY_HOST);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED);
} else {
// Skip
return 0;
Expand Down
147 changes: 147 additions & 0 deletions sycl/test-e2e/USM/memory_coherency_hip.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
// RUN: %{build} -o %t1.out
// REQUIRES: hip_amd
// RUN: %{run} %t1.out

//==---- memory_coherency_hip.cpp -----------------------------------------==//
// USM coarse/fine grain memory coherency test for the HIP-AMD backend.
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <sycl/sycl.hpp>

#include <chrono>
#include <iostream>
#include <thread>

namespace kernels {
class SquareKrnl final {
int *mPtr;

public:
SquareKrnl(int *ptr) : mPtr{ptr} {}

void operator()(sycl::id<1>) const { *mPtr = (*mPtr) * (*mPtr); }
};

class CoherencyTestKrnl final {
int *mPtr;

public:
CoherencyTestKrnl(int *ptr) : mPtr{ptr} {}

void operator()(sycl::id<1>) const {
auto atm = sycl::atomic_ref<int, sycl::memory_order::relaxed,
sycl::memory_scope::device>(mPtr[0]);

// mPtr was initialized to 1 by the host, now set it to 2.
atm.fetch_add(1);

// spin until mPtr is 3, then change it to 4.
int expected{3};
int old = atm.load();
while (true) {
old = atm.load();
if (old == expected) {
if (atm.compare_exchange_strong(old, 4)) {
break;
}
}
}
}
};
} // namespace kernels

int main() {
sycl::queue q{};
sycl::device dev = q.get_device();
sycl::context ctx = q.get_context();
if (!dev.get_info<sycl::info::device::usm_shared_allocations>()) {
std::cout << "Shared USM is not supported. Skipping test.\n";
return 0;
}

bool coherent{false};

int *ptr = sycl::malloc_shared<int>(1, q);

// Coherency test 1
//
// The following test validates if memory access is fine with memory allocated
// using malloc_managed() and COARSE_GRAINED advice set via mem_advise().
//
// Coarse grained memory is only guaranteed to be coherent outside of GPU
// kernels that modify it. Changes applied to coarse-grained memory by a GPU
// kernel are only visible to the rest of the system (CPU or other GPUs) when
// the kernel has completed. A GPU kernel is only guaranteed to see changes
// applied to coarse grained memory by the rest of the system (CPU or other
// GPUs) if those changes were made before the kernel launched.

// Hint to use coarse-grain memory.
q.mem_advise(ptr, sizeof(int), int{PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED});

int init_val{9};
int expected{init_val * init_val};

*ptr = init_val;
q.parallel_for(sycl::range{1}, kernels::SquareKrnl{ptr});
// Synchronise the underlying stream.
q.wait();

// Check if caches are flushed correctly and same memory is between devices.
if (*ptr == expected) {
coherent = true;
} else {
std::cerr << "Coherency test failed. Value: " << *ptr
<< " (expected: " << expected << ")\n";
coherent = false;
}

// Coherency test 2
//
// The following test validates if fine-grain behavior is observed or not with
// memory allocated using malloc_managed().
//
// Fine grained memory allows CPUs and GPUs to synchronize (via atomics) and
// coherently communicate with each other while the GPU kernel is running.

// Hint to use fine-grain memory.
q.mem_advise(ptr, sizeof(int), int{PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED});

init_val = 1;
expected = 4;

*ptr = init_val;
q.parallel_for(sycl::range{1}, kernels::CoherencyTestKrnl{ptr});

// wait until ptr is 2 from the kernel (or 3 seconds), then increment to 3.
while (*ptr == 2) {
using std::chrono_literals::operator""s;
std::this_thread::sleep_for(3s);
break;
}
*ptr += 1;

// Synchronise the underlying stream.
q.wait();

// Check if caches are flushed correctly and same memory is between devices.
if (*ptr == expected) {
coherent &= true;
} else {
std::cerr << "Coherency test failed. Value: " << *ptr
<< " (expected: " << expected << ")\n";
coherent = false;
}

// Cleanup
sycl::free(ptr, q);

// Check if all coherency tests passed.
assert(coherent);
// The above assert won't trigger with NDEBUG, so ensure the right exit code.
return coherent ? 0 : 1;
}

0 comments on commit ab86d0d

Please sign in to comment.