From 606c4ddeef52e6de78a271adb05bb7c3de334e22 Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Tue, 8 Aug 2023 16:56:18 +0100 Subject: [PATCH] [SYCL][HIP] Add coarse-grained memory advice for HIP-AMD --- sycl/include/sycl/detail/pi.h | 2 + sycl/plugins/unified_runtime/pi2ur.hpp | 6 ++ .../ur/adapters/hip/enqueue.cpp | 63 +++++++++++-------- sycl/test-e2e/USM/memadvise_hip.cpp | 58 +++++++++++++++++ sycl/test-e2e/USM/memadvise_hip_coherency.cpp | 57 +++++++++++++++++ 5 files changed, 161 insertions(+), 25 deletions(-) create mode 100644 sycl/test-e2e/USM/memadvise_hip.cpp create mode 100644 sycl/test-e2e/USM/memadvise_hip_coherency.cpp diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 4ad418b5ccbb7..c9ed2d95ab287 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -574,6 +574,8 @@ typedef enum { PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION_HOST = 1 << 7, PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST = 1 << 8, PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST = 1 << 9, + PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED = 1 << 10, + PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED = 1 << 11, PI_MEM_ADVICE_UNKNOWN = 0x7FFFFFFF, } _pi_mem_advice; diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 4cb0fadddf783..9a3e6becc2a40 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -3240,6 +3240,12 @@ inline pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, if (Advice & PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST) { UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST; } + if (Advice & PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED) { + UrAdvice |= UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY; + } + if (Advice & PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED) { + UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY; + } if (Advice & PI_MEM_ADVICE_RESET) { UrAdvice |= UR_USM_ADVICE_FLAG_DEFAULT; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/enqueue.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/enqueue.cpp index 00c8e63294ed6..89e6caace0af8 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/hip/enqueue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/hip/enqueue.cpp @@ -12,6 +12,7 @@ #include "kernel.hpp" #include "memory.hpp" #include "queue.hpp" +#include namespace { @@ -88,24 +89,35 @@ ur_result_t setHipMemAdvise(const void *DevPtr, size_t Size, hipDevice_t Device) { using ur_to_hip_advice_t = std::pair; - static constexpr std::array - URToHIPMemAdviseDeviceFlags{ - std::make_pair(UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, - hipMemAdviseSetReadMostly), - std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, - hipMemAdviseUnsetReadMostly), - std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, - hipMemAdviseSetPreferredLocation), - std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, - hipMemAdviseUnsetPreferredLocation), - std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE, - hipMemAdviseSetAccessedBy), - std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE, - hipMemAdviseUnsetAccessedBy), - }; - for (auto &FlagPair : URToHIPMemAdviseDeviceFlags) { - if (URAdviceFlags & FlagPair.first) { - UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, FlagPair.second, Device)); +#if defined(__HIP_PLATFORM_AMD__) + constexpr int DeviceFlagCount = 8; +#else + constexpr int DeviceFlagCount = 6; +#endif + static constexpr std::array + URToHIPMemAdviseDeviceFlags { + std::make_pair(UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, + hipMemAdviseSetReadMostly), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, + hipMemAdviseUnsetReadMostly), + std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, + hipMemAdviseSetPreferredLocation), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, + hipMemAdviseUnsetPreferredLocation), + std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE, + hipMemAdviseSetAccessedBy), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE, + hipMemAdviseUnsetAccessedBy), +#if defined(__HIP_PLATFORM_AMD__) + std::make_pair(UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY, + hipMemAdviseSetCoarseGrain), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY, + hipMemAdviseUnsetCoarseGrain), +#endif + }; + for (auto &[URDevAdvice, HipDevAdvice] : URToHIPMemAdviseDeviceFlags) { + if (URAdviceFlags & URDevAdvice) { + UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, HipDevAdvice, Device)); } } @@ -120,18 +132,15 @@ ur_result_t setHipMemAdvise(const void *DevPtr, size_t Size, hipMemAdviseUnsetAccessedBy), }; - for (auto &FlagPair : URToHIPMemAdviseHostFlags) { - if (URAdviceFlags & FlagPair.first) { - UR_CHECK_ERROR( - hipMemAdvise(DevPtr, Size, FlagPair.second, hipCpuDeviceId)); + for (auto &[URHostAdvice, HipHostAdvice] : URToHIPMemAdviseHostFlags) { + if (URAdviceFlags & URHostAdvice) { + UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, HipHostAdvice, hipCpuDeviceId)); } } // Handle unmapped memory advice flags if (URAdviceFlags & - (UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY | - UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY | - UR_USM_ADVICE_FLAG_BIAS_CACHED | UR_USM_ADVICE_FLAG_BIAS_UNCACHED)) { + (UR_USM_ADVICE_FLAG_BIAS_CACHED | UR_USM_ADVICE_FLAG_BIAS_UNCACHED)) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } @@ -1443,6 +1452,10 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, pMem, size, hipMemAdviseUnsetPreferredLocation, DeviceID)); UR_CHECK_ERROR( hipMemAdvise(pMem, size, hipMemAdviseUnsetAccessedBy, DeviceID)); +#if defined(__HIP_PLATFORM_AMD__) + UR_CHECK_ERROR( + hipMemAdvise(pMem, size, hipMemAdviseUnsetCoarseGrain, DeviceID)); +#endif } else { Result = setHipMemAdvise(HIPDevicePtr, size, advice, DeviceID); // UR_RESULT_ERROR_INVALID_ENUMERATION is returned when using a valid but diff --git a/sycl/test-e2e/USM/memadvise_hip.cpp b/sycl/test-e2e/USM/memadvise_hip.cpp new file mode 100644 index 0000000000000..c9e010efa3f70 --- /dev/null +++ b/sycl/test-e2e/USM/memadvise_hip.cpp @@ -0,0 +1,58 @@ +// RUN: %{build} -o %t1.out +// REQUIRES: hip_amd +// RUN: %{run} %t1.out + +//==---------------- memadvise_cuda.cpp ------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "sycl/context.hpp" +#include "sycl/detail/pi.h" +#include "sycl/device.hpp" +#include +#include +#include + +int main() { + sycl::queue q; + sycl::device dev = q.get_device(); + sycl::context ctx = q.get_context(); + if (!dev.get_info()) { + std::cout << "Shared USM is not supported. Skipping test." << std::endl; + return 0; + } + + constexpr size_t size = 100; + void *ptr = sycl::malloc_shared(size, dev, ctx); + if (ptr == nullptr) { + std::cout << "Allocation failed!" << std::endl; + return -1; + } + + // NOTE: PI_MEM_ADVICE_CUDA_* advice values are mapped to the HIP backend too. + std::vector valid_advices{ + PI_MEM_ADVICE_CUDA_SET_READ_MOSTLY, + PI_MEM_ADVICE_CUDA_UNSET_READ_MOSTLY, + PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION, + PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION, + PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY, + PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY, + PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION_HOST, + PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION_HOST, + PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST, + PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST, + PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED, + PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED, + }; + for (int advice : valid_advices) { + q.mem_advise(ptr, size, advice); + } + + q.wait_and_throw(); + std::cout << "Test passed." << std::endl; + return 0; +} diff --git a/sycl/test-e2e/USM/memadvise_hip_coherency.cpp b/sycl/test-e2e/USM/memadvise_hip_coherency.cpp new file mode 100644 index 0000000000000..55752dd02fdf2 --- /dev/null +++ b/sycl/test-e2e/USM/memadvise_hip_coherency.cpp @@ -0,0 +1,57 @@ +// RUN: %{build} -o %t1.out +// REQUIRES: hip_amd +// RUN: %{run} %t1.out + +#include +#include + +namespace kernels { +class SquareKrnl final { +public: + SquareKrnl(int *ptr) : mPtr(ptr) {} + + void operator()(sycl::id<1>) const { + // mPtr value squared here + *mPtr = (*mPtr) * (*mPtr); + } + +private: + int *mPtr; +}; +} // namespace kernels + +int main() { + sycl::queue q; + sycl::device dev = q.get_device(); + sycl::context ctx = q.get_context(); + if (!dev.get_info()) { + std::cout << "Shared USM is not supported. Skipping test.\n"; + return 0; + } + + int *ptr = sycl::malloc_shared(1, q); + + // Hint that data coherency during simultaneous execution on + // both host and device is not necessary + constexpr int MemAdviseCoarseGrained = PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED; + q.mem_advise(ptr, sizeof(int), MemAdviseCoarseGrained); + + // Call this routine TesCoherency function! + constexpr int number = 9; + constexpr int expected = 81; + *ptr = number; + q.submit([&](sycl::handler &h) { + h.parallel_for(sycl::range{1}, kernels::SquareKrnl(ptr)); + }); + // Synchronise the underlying stream the work is run on before host access. + q.wait(); + // Check if caches are flushed correctly and same memory is between devices. + if (*ptr != expected) { + std::cout << "Coarse-grained mode coherency failed. Value = " << *ptr + << '\n'; + return 1; + } + + sycl::free(ptr, q); + return 0; +}