From dc5899cce983675223bc4320a70283a81a200cf8 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Tue, 19 Dec 2023 10:39:19 +0000 Subject: [PATCH 1/6] Testing adding handles to opencl --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 16 +++-- sycl/source/detail/buffer_impl.cpp | 14 ++-- sycl/source/event.cpp | 15 ++--- sycl/source/queue.cpp | 26 ++++++-- .../DeprecatedFeatures/set_arg_interop.cpp | 35 +++++++++- sycl/unittests/handler/CMakeLists.txt | 1 - .../handler/SetArgForLocalAccessor.cpp | 54 --------------- sycl/unittests/thread_safety/CMakeLists.txt | 1 - .../thread_safety/InteropKernelEnqueue.cpp | 66 ------------------- 9 files changed, 77 insertions(+), 151 deletions(-) delete mode 100644 sycl/unittests/handler/SetArgForLocalAccessor.cpp delete mode 100644 sycl/unittests/thread_safety/InteropKernelEnqueue.cpp diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index cf049fee41869..a3de1e3fc37d4 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,9 @@ -# commit 8e6ea72e44dab71e976b3daf439073c4ef9a847b -# Merge: 1f6945ce c57c181d -# Author: Kenneth Benzie (Benie) -# Date: Wed Jan 29 15:54:56 2025 +0000 -# Merge pull request #2619 from martygrant/martin/memimage-info-unswitch-redo -# Move urMemImageGetInfo success test from a switch to individual test -set(UNIFIED_RUNTIME_TAG 8e6ea72e44dab71e976b3daf439073c4ef9a847b) +set(UNIFIED_RUNTIME_REPO "https://github.com/omarahmed1111/unified-runtime.git") +# commit df6da35d6e67f2383db28dd49ab08c5c0ef541d2 +# Merge: 67590533 55bd5636 +# Author: aarongreig +# Date: Mon Oct 7 12:28:07 2024 +0100 +# Merge pull request #2038 from GeorgeWeb/georgi/unsupported-max-coop-wgsize +# [UR][hip][opencl] Mark urKernelSuggestMaxCooperativeGroupCountExp as unsupported +# instead of returning misleading default value +set(UNIFIED_RUNTIME_TAG df6da35d6e67f2383db28dd49ab08c5c0ef541d2) diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp index 777091f6be572..1f889776f0451 100644 --- a/sycl/source/detail/buffer_impl.cpp +++ b/sycl/source/detail/buffer_impl.cpp @@ -49,15 +49,11 @@ void buffer_impl::destructorNotification(void *UserObj) { void buffer_impl::addInteropObject( std::vector &Handles) const { if (MOpenCLInterop) { - if (std::find(Handles.begin(), Handles.end(), - ur::cast(MInteropMemObject)) == - Handles.end()) { - const AdapterPtr &Adapter = getAdapter(); - Adapter->call( - ur::cast(MInteropMemObject)); - ur_native_handle_t NativeHandle = 0; - Adapter->call(MInteropMemObject, nullptr, - &NativeHandle); + const AdapterPtr &Adapter = getAdapter(); + ur_native_handle_t NativeHandle = 0; + Adapter->call(MInteropMemObject, nullptr, + &NativeHandle); + if (std::find(Handles.begin(), Handles.end(), NativeHandle) == Handles.end()) { Handles.push_back(NativeHandle); } } diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index df68777ca6df4..473320fde1d23 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -24,14 +24,13 @@ inline namespace _V1 { event::event() : impl(std::make_shared(std::nullopt)) {} -event::event(cl_event ClEvent, const context &SyclContext) - : impl(std::make_shared( - detail::ur::cast(ClEvent), SyclContext)) { - // This is a special interop constructor for OpenCL, so the event must be - // retained. - // TODO(pi2ur): Don't just cast from cl_event above - impl->getAdapter()->call( - detail::ur::cast(ClEvent)); +event::event(cl_event ClEvent, const context &SyclContext) { + ur_event_handle_t hEvent = nullptr; + impl->getAdapter()->call( + detail::ur::cast(ClEvent), + detail::getSyclObjImpl(SyclContext)->getHandleRef(), nullptr, &hEvent); + + impl = std::make_shared(hEvent, SyclContext); } bool event::operator==(const event &rhs) const { return rhs.impl == impl; } diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 6550677bb9854..7be9fcff49d78 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -94,10 +94,28 @@ queue::queue(const context &SyclContext, const device &SyclDevice, queue::queue(cl_command_queue clQueue, const context &SyclContext, const async_handler &AsyncHandler) { const property_list PropList{}; - impl = std::make_shared( - // TODO(pi2ur): Don't cast straight from cl_command_queue - reinterpret_cast(clQueue), - detail::getSyclObjImpl(SyclContext), AsyncHandler, PropList); + ur_queue_handle_t hQueue; + auto Context = detail::getSyclObjImpl(SyclContext); + auto Adapter = sycl::detail::ur::getAdapter(); + + // cl_device_id CLDevice; + // size_t Ret = clGetCommandQueueInfo(clQueue, CL_QUEUE_DEVICE, sizeof(CLDevice), + // &CLDevice, nullptr); + // if (Ret) { + // throw runtime_error("Failed to retrieve device associated with the queue", + // PI_ERROR_INVALID_QUEUE); + // } + // sycl::detail::pi::PiDevice Device; + // Plugin->call( + // detail::pi::cast(CLDevice), nullptr, &Device); + + ur_queue_native_properties_t Properties[] = {UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, 0}; + Adapter->call( + detail::ur::cast(clQueue), Context->getHandleRef(), + nullptr, Properties, &hQueue); + + impl = std::make_shared(hQueue, Context, AsyncHandler, + PropList); } cl_command_queue queue::get() const { return impl->get(); } diff --git a/sycl/test-e2e/DeprecatedFeatures/set_arg_interop.cpp b/sycl/test-e2e/DeprecatedFeatures/set_arg_interop.cpp index 325a329a916dd..1133ee0dab418 100644 --- a/sycl/test-e2e/DeprecatedFeatures/set_arg_interop.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/set_arg_interop.cpp @@ -10,6 +10,7 @@ #include #include +#include using namespace sycl; @@ -19,7 +20,7 @@ int main() { cl_context ClContext = Context.get(); - const size_t CountSources = 3; + const size_t CountSources = 4; const char *Sources[CountSources] = { "kernel void foo1(global float* Array, global int* Value) { *Array = " "42; *Value = 1; }\n", @@ -27,6 +28,7 @@ int main() { "Array[id] = id; }\n", "kernel void foo3(global float* Array, local float* LocalArray) { " "(void)LocalArray; (void)Array; }\n", + "kernel void foo4(global int* Value) {}\n", }; cl_int Err; @@ -46,12 +48,16 @@ int main() { cl_kernel ThirdCLKernel = clCreateKernel(ClProgram, "foo3", &Err); assert(Err == CL_SUCCESS); + cl_kernel FourthCLKernel = clCreateKernel(ClProgram, "foo4", &Err); + assert(Err == CL_SUCCESS); + const size_t Count = 100; float Array[Count]; kernel FirstKernel(FirstCLKernel, Context); kernel SecondKernel(SecondCLKernel, Context); kernel ThirdKernel(ThirdCLKernel, Context); + kernel FourthKernel(FourthCLKernel, Context); int Value; { buffer FirstBuffer(Array, range<1>(1)); @@ -114,10 +120,37 @@ int main() { } Queue.wait_and_throw(); + // Enqueuing an interop kernel while avoid calls to piKernelSetArg from + // different threads on the same kernel. + { + constexpr std::size_t NArgs = 16; + constexpr std::size_t ThreadCount = 4; + constexpr std::size_t LaunchCount = 8; + auto TestLambda = [&](int ThreadId) { + Queue + .submit([&](sycl::handler &CGH) { + for (std::size_t I = 0; I < NArgs; ++I) + CGH.set_arg(I, &ThreadId); + }) + .wait(); + }; + + std::vector threadPool; + threadPool.reserve(ThreadCount); + for (size_t tid = 0; tid < ThreadCount; ++tid) { + threadPool.push_back(std::thread(TestLambda, tid)); + } + + for (auto ¤tThread : threadPool) { + currentThread.join(); + } + } + clReleaseContext(ClContext); clReleaseKernel(FirstCLKernel); clReleaseKernel(SecondCLKernel); clReleaseKernel(ThirdCLKernel); + clReleaseKernel(FourthCLKernel); clReleaseProgram(ClProgram); return 0; } diff --git a/sycl/unittests/handler/CMakeLists.txt b/sycl/unittests/handler/CMakeLists.txt index eb7fc559ab73c..3f90404ab35b9 100644 --- a/sycl/unittests/handler/CMakeLists.txt +++ b/sycl/unittests/handler/CMakeLists.txt @@ -1,4 +1,3 @@ add_sycl_unittest(HandlerTests OBJECT - SetArgForLocalAccessor.cpp require.cpp ) diff --git a/sycl/unittests/handler/SetArgForLocalAccessor.cpp b/sycl/unittests/handler/SetArgForLocalAccessor.cpp deleted file mode 100644 index 7a9079872ce36..0000000000000 --- a/sycl/unittests/handler/SetArgForLocalAccessor.cpp +++ /dev/null @@ -1,54 +0,0 @@ -//==------- SetArgForLocalAccessor.cpp --- Handler unit tests --------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ur_mock_helpers.hpp" -#include -#include -#include - -#include - -// This test checks that we pass the correct buffer size value when setting -// local_accessor as an argument through handler::set_arg to a kernel created -// using OpenCL interoperability methods. - -namespace { - -size_t LocalBufferArgSize = 0; - -ur_result_t redefined_urKernelSetArgLocal(void *pParams) { - auto params = *static_cast(pParams); - LocalBufferArgSize = *params.pargSize; - - return UR_RESULT_SUCCESS; -} - -TEST(HandlerSetArg, LocalAccessor) { - sycl::unittest::UrMock<> Mock; - redefineMockForKernelInterop(Mock); - mock::getCallbacks().set_replace_callback("urKernelSetArgLocal", - &redefined_urKernelSetArgLocal); - - constexpr size_t Size = 128; - sycl::queue Q; - - ur_native_handle_t handle = mock::createDummyHandle(); - auto KernelCL = reinterpret_cast::template input_type>(&handle); - auto Kernel = - sycl::make_kernel(KernelCL, Q.get_context()); - - Q.submit([&](sycl::handler &CGH) { - sycl::local_accessor Acc(Size, CGH); - CGH.set_arg(0, Acc); - CGH.single_task(Kernel); - }).wait(); - - ASSERT_EQ(LocalBufferArgSize, Size * sizeof(float)); -} -} // namespace diff --git a/sycl/unittests/thread_safety/CMakeLists.txt b/sycl/unittests/thread_safety/CMakeLists.txt index 8b725af8b4dd4..78dc6f2190178 100644 --- a/sycl/unittests/thread_safety/CMakeLists.txt +++ b/sycl/unittests/thread_safety/CMakeLists.txt @@ -1,4 +1,3 @@ add_sycl_unittest(ThreadSafetyTests OBJECT HostAccessorDeadLock.cpp - InteropKernelEnqueue.cpp ) diff --git a/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp b/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp deleted file mode 100644 index ca54cf0d908d6..0000000000000 --- a/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp +++ /dev/null @@ -1,66 +0,0 @@ -//==-------- InteropKernelEnqueue.cpp --- Thread safety unit tests ---------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include -#include - -#include "ThreadUtils.h" -#include "ur_mock_helpers.hpp" - -namespace { -using namespace sycl; - -constexpr std::size_t NArgs = 16; -constexpr std::size_t ThreadCount = 4; -constexpr std::size_t LaunchCount = 8; - -uint32_t LastArgSet = -1; -std::size_t LastThread = -1; -ur_result_t redefined_urKernelSetArgValue(void *pParams) { - auto params = *static_cast(pParams); - EXPECT_EQ((LastArgSet + 1) % NArgs, *params.pargIndex); - LastArgSet = *params.pargIndex; - std::size_t ArgValue = *static_cast(*params.ppArgValue); - if (*params.pargIndex == 0) - LastThread = ArgValue; - else - EXPECT_EQ(LastThread, ArgValue); - return UR_RESULT_SUCCESS; -} - -TEST(KernelEnqueue, InteropKernel) { - unittest::UrMock<> Mock; - redefineMockForKernelInterop(Mock); - mock::getCallbacks().set_replace_callback("urKernelSetArgValue", - &redefined_urKernelSetArgValue); - - platform Plt = sycl::platform(); - queue Q; - - ur_native_handle_t Handle = mock::createDummyHandle(); - auto KernelCL = reinterpret_cast::template input_type>(&Handle); - auto Kernel = - sycl::make_kernel(KernelCL, Q.get_context()); - - auto TestLambda = [&](std::size_t ThreadId) { - Q.submit([&](sycl::handler &CGH) { - for (std::size_t I = 0; I < NArgs; ++I) - CGH.set_arg(I, ThreadId); - CGH.single_task(Kernel); - }).wait(); - }; - - for (std::size_t I = 0; I < LaunchCount; ++I) { - ThreadPool Pool(ThreadCount, TestLambda); - } -} -} // namespace From f10eae1bd863af55e625a23d8f4e6b7593bdbdba Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Wed, 16 Oct 2024 15:44:22 +0100 Subject: [PATCH 2/6] Change interop api to not own the native handle --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 2 +- sycl/include/sycl/backend.hpp | 7 ++++--- sycl/source/backend.cpp | 16 ++++------------ sycl/source/detail/buffer_impl.cpp | 4 ---- sycl/source/detail/context_impl.cpp | 6 ------ sycl/source/detail/device_image_impl.hpp | 3 --- sycl/source/detail/device_impl.cpp | 3 --- sycl/source/detail/event_impl.cpp | 2 -- sycl/source/detail/kernel_impl.cpp | 8 -------- sycl/source/detail/kernel_impl.hpp | 4 ---- sycl/source/detail/queue_impl.cpp | 2 -- sycl/source/detail/queue_impl.hpp | 1 - sycl/source/detail/sycl_mem_obj_t.cpp | 6 ------ sycl/source/device.cpp | 1 - sycl/source/kernel.cpp | 17 +++++++++-------- sycl/source/queue.cpp | 11 ----------- .../interop-opencl-make-kernel-bundle.cpp | 9 --------- sycl/unittests/SYCL2020/GetNativeOpenCL.cpp | 6 +++--- sycl/unittests/queue/InteropRetain.cpp | 13 ++++++------- 19 files changed, 27 insertions(+), 94 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index a3de1e3fc37d4..3214b7629a57b 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -6,4 +6,4 @@ set(UNIFIED_RUNTIME_REPO "https://github.com/omarahmed1111/unified-runtime.git") # Merge pull request #2038 from GeorgeWeb/georgi/unsupported-max-coop-wgsize # [UR][hip][opencl] Mark urKernelSuggestMaxCooperativeGroupCountExp as unsupported # instead of returning misleading default value -set(UNIFIED_RUNTIME_TAG df6da35d6e67f2383db28dd49ab08c5c0ef541d2) +set(UNIFIED_RUNTIME_TAG 7252088d76a910357dd3e00cb57e7a67fd3299bd) diff --git a/sycl/include/sycl/backend.hpp b/sycl/include/sycl/backend.hpp index 35304d06428a3..467f6f530a74a 100644 --- a/sycl/include/sycl/backend.hpp +++ b/sycl/include/sycl/backend.hpp @@ -328,7 +328,7 @@ make_context( const async_handler &Handler = {}) { return detail::make_context( detail::ur::cast(BackendObject), Handler, Backend, - false /* KeepOwnership */); + true /* KeepOwnership */); } template @@ -337,8 +337,9 @@ std::enable_if_t::MakeQueue == true, make_queue(const typename backend_traits::template input_type &BackendObject, const context &TargetContext, const async_handler Handler = {}) { - auto KeepOwnership = - Backend == backend::ext_oneapi_cuda || Backend == backend::ext_oneapi_hip; + auto KeepOwnership = Backend == backend::ext_oneapi_cuda || + Backend == backend::ext_oneapi_hip || + Backend == backend::opencl; if constexpr (Backend == backend::ext_oneapi_level_zero) { return detail::make_queue( detail::ur::cast( diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index 2c876a570e3c6..70cea2ce4249b 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -161,7 +161,7 @@ __SYCL_EXPORT queue make_queue(ur_native_handle_t NativeHandle, __SYCL_EXPORT event make_event(ur_native_handle_t NativeHandle, const context &Context, backend Backend) { - return make_event(NativeHandle, Context, false, Backend); + return make_event(NativeHandle, Context, true /* KeepOwnership */, Backend); } __SYCL_EXPORT event make_event(ur_native_handle_t NativeHandle, @@ -179,9 +179,6 @@ __SYCL_EXPORT event make_event(ur_native_handle_t NativeHandle, NativeHandle, ContextImpl->getHandleRef(), &Properties, &UrEvent); event Event = detail::createSyclObjFromImpl( std::make_shared(UrEvent, Context)); - - if (Backend == backend::opencl) - Adapter->call(UrEvent); return Event; } @@ -204,9 +201,6 @@ make_kernel_bundle(ur_native_handle_t NativeHandle, sycl::make_error_code(sycl::errc::invalid), "urProgramCreateWithNativeHandle resulted in a null program handle."); - if (ContextImpl->getBackend() == backend::opencl) - Adapter->call(UrProgram); - std::vector ProgramDevices; uint32_t NumDevices = 0; @@ -310,7 +304,8 @@ std::shared_ptr make_kernel_bundle(ur_native_handle_t NativeHandle, const context &TargetContext, bundle_state State, backend Backend) { - return make_kernel_bundle(NativeHandle, TargetContext, false, State, Backend); + return make_kernel_bundle(NativeHandle, TargetContext, + true /* KeepOwnership*/, State, Backend); } kernel make_kernel(const context &TargetContext, @@ -351,9 +346,6 @@ kernel make_kernel(const context &TargetContext, NativeHandle, ContextImpl->getHandleRef(), UrProgram, &Properties, &UrKernel); - if (Backend == backend::opencl) - Adapter->call(UrKernel); - // Construct the SYCL queue from UR queue. return detail::createSyclObjFromImpl( std::make_shared(UrKernel, ContextImpl, KernelBundleImpl)); @@ -364,7 +356,7 @@ kernel make_kernel(ur_native_handle_t NativeHandle, return make_kernel( TargetContext, get_empty_interop_kernel_bundle(TargetContext), - NativeHandle, false, Backend); + NativeHandle, true /* KeepOwnership */, Backend); } } // namespace detail diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp index 1f889776f0451..e6b17265b9f8b 100644 --- a/sycl/source/detail/buffer_impl.cpp +++ b/sycl/source/detail/buffer_impl.cpp @@ -82,10 +82,6 @@ buffer_impl::getNativeVector(backend BackendName) const { auto Adapter = Platform->getAdapter(); - if (Platform->getBackend() == backend::opencl) { - Adapter->call(NativeMem); - } - ur_native_handle_t Handle = 0; // When doing buffer interop we don't know what device the memory should be // resident on, so pass nullptr for Device param. Buffer interop may not be diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index e527d0a0c46a8..5ece8bde1dd65 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -110,15 +110,11 @@ context_impl::context_impl(ur_context_handle_t UrContext, // // TODO: Move this backend-specific retain of the context to SYCL-2020 style // make_context interop, when that is created. - if (getBackend() == sycl::backend::opencl) { - getAdapter()->call(MContext); - } MKernelProgramCache.setContextPtr(this); } cl_context context_impl::get() const { // TODO catch an exception and put it to list of asynchronous exceptions - getAdapter()->call(MContext); ur_native_handle_t nativeHandle = 0; getAdapter()->call(MContext, &nativeHandle); @@ -303,8 +299,6 @@ context_impl::findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const { ur_native_handle_t context_impl::getNative() const { const auto &Adapter = getAdapter(); - if (getBackend() == backend::opencl) - Adapter->call(getHandleRef()); ur_native_handle_t Handle; Adapter->call(getHandleRef(), &Handle); return Handle; diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index ac58b7b80f467..d58c6fc202c84 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -299,9 +299,6 @@ class device_image_impl { assert(MProgram); const auto &ContextImplPtr = detail::getSyclObjImpl(MContext); const AdapterPtr &Adapter = ContextImplPtr->getAdapter(); - - if (ContextImplPtr->getBackend() == backend::opencl) - Adapter->call(MProgram); ur_native_handle_t NativeProgram = 0; Adapter->call(MProgram, &NativeProgram); diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 3d820f2c782cd..e4004b874814c 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -98,7 +98,6 @@ bool device_impl::is_affinity_supported( cl_device_id device_impl::get() const { // TODO catch an exception and put it to list of asynchronous exceptions - getAdapter()->call(MDevice); return ur::cast(getNative()); } @@ -345,8 +344,6 @@ std::vector device_impl::create_sub_devices() const { ur_native_handle_t device_impl::getNative() const { auto Adapter = getAdapter(); - if (getBackend() == backend::opencl) - Adapter->call(getHandleRef()); ur_native_handle_t Handle; Adapter->call(getHandleRef(), &Handle); return Handle; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 520b4de1ae888..e281c966df23e 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -500,8 +500,6 @@ ur_native_handle_t event_impl::getNative() { this->setHandle(UREvent); Handle = UREvent; } - if (MContext->getBackend() == backend::opencl) - Adapter->call(Handle); ur_native_handle_t OutHandle; Adapter->call(Handle, &OutHandle); return OutHandle; diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 3bbff52ae4f2b..e7ca2931d67a6 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -24,14 +24,6 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr Context, Context)), MCreatedFromSource(true), MKernelBundleImpl(std::move(KernelBundleImpl)), MIsInterop(true), MKernelArgMaskPtr{ArgMask} { - ur_context_handle_t UrContext = nullptr; - // Using the adapter from the passed ContextImpl - getAdapter()->call( - MKernel, UR_KERNEL_INFO_CONTEXT, sizeof(UrContext), &UrContext, nullptr); - if (Context->getHandleRef() != UrContext) - throw sycl::exception( - make_error_code(errc::invalid), - "Input context must be the same as the context of cl_kernel"); // Enable USM indirect access for interoperability kernels. // Some UR Adapters (like OpenCL) require this call to enable USM diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 1b07d866dcc4c..9e6764f4ce468 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -75,7 +75,6 @@ class kernel_impl { /// /// \return a valid cl_kernel instance cl_kernel get() const { - getAdapter()->call(MKernel); ur_native_handle_t nativeHandle = 0; getAdapter()->call(MKernel, &nativeHandle); @@ -179,9 +178,6 @@ class kernel_impl { ur_native_handle_t getNative() const { const AdapterPtr &Adapter = MContext->getAdapter(); - if (MContext->getBackend() == backend::opencl) - Adapter->call(MKernel); - ur_native_handle_t NativeKernel = 0; Adapter->call(MKernel, &NativeKernel); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index d8ca3fb8c1544..92759783ff896 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -731,8 +731,6 @@ void queue_impl::destructorNotification() { ur_native_handle_t queue_impl::getNative(int32_t &NativeHandleDesc) const { const AdapterPtr &Adapter = getAdapter(); - if (getContextImplPtr()->getBackend() == backend::opencl) - Adapter->call(MQueues[0]); ur_native_handle_t Handle{}; ur_queue_native_desc_t UrNativeDesc{UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC, nullptr, nullptr}; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index bf18e97c50fca..45f0de0a129e4 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -273,7 +273,6 @@ class queue_impl { /// \return an OpenCL interoperability queue handle. cl_command_queue get() { - getAdapter()->call(MQueues[0]); ur_native_handle_t nativeHandle = 0; getAdapter()->call(MQueues[0], nullptr, &nativeHandle); diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 25e092232ae7f..fdbcb474afcb0 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -56,9 +56,6 @@ SYCLMemObjT::SYCLMemObjT(ur_native_handle_t MemObject, throw sycl::exception( make_error_code(errc::invalid), "Input context must be the same as the context of cl_mem"); - - if (MInteropContext->getBackend() == backend::opencl) - Adapter->call(MInteropMemObject); } ur_mem_type_t getImageType(int Dimensions) { @@ -111,9 +108,6 @@ SYCLMemObjT::SYCLMemObjT(ur_native_handle_t MemObject, throw sycl::exception( make_error_code(errc::invalid), "Input context must be the same as the context of cl_mem"); - - if (MInteropContext->getBackend() == backend::opencl) - Adapter->call(MInteropMemObject); } void SYCLMemObjT::releaseMem(ContextImplPtr Context, void *MemAllocation) { diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 13590e980841c..4072a8820ea8a 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -43,7 +43,6 @@ device::device(cl_device_id DeviceId) { auto Platform = detail::platform_impl::getPlatformFromUrDevice(Device, Adapter); impl = Platform->getOrMakeDeviceImpl(Device, Platform); - Adapter->call(impl->getHandleRef()); } device::device(const device_selector &deviceSelector) { diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp index b0055f74ef4bc..cb4dc7b49350f 100644 --- a/sycl/source/kernel.cpp +++ b/sycl/source/kernel.cpp @@ -22,16 +22,17 @@ kernel::kernel(cl_kernel ClKernel, const context &SyclContext) { ur_kernel_handle_t hKernel = nullptr; ur_native_handle_t nativeHandle = reinterpret_cast(ClKernel); - Adapter->call( - nativeHandle, detail::getSyclObjImpl(SyclContext)->getHandleRef(), - nullptr, nullptr, &hKernel); + ur_result_t Res = + Adapter->call_nocheck( + nativeHandle, detail::getSyclObjImpl(SyclContext)->getHandleRef(), + nullptr, nullptr, &hKernel); + if (Res == UR_RESULT_ERROR_INVALID_CONTEXT) { + throw sycl::exception( + make_error_code(errc::invalid), + "Input context must be the same as the context of cl_kernel"); + } impl = std::make_shared( hKernel, detail::getSyclObjImpl(SyclContext), nullptr, nullptr); - // This is a special interop constructor for OpenCL, so the kernel must be - // retained. - if (get_backend() == backend::opencl) { - impl->getAdapter()->call(hKernel); - } } cl_kernel kernel::get() const { return impl->get(); } diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 7be9fcff49d78..6b973b4237538 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -98,17 +98,6 @@ queue::queue(cl_command_queue clQueue, const context &SyclContext, auto Context = detail::getSyclObjImpl(SyclContext); auto Adapter = sycl::detail::ur::getAdapter(); - // cl_device_id CLDevice; - // size_t Ret = clGetCommandQueueInfo(clQueue, CL_QUEUE_DEVICE, sizeof(CLDevice), - // &CLDevice, nullptr); - // if (Ret) { - // throw runtime_error("Failed to retrieve device associated with the queue", - // PI_ERROR_INVALID_QUEUE); - // } - // sycl::detail::pi::PiDevice Device; - // Plugin->call( - // detail::pi::cast(CLDevice), nullptr, &Device); - ur_queue_native_properties_t Properties[] = {UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, 0}; Adapter->call( detail::ur::cast(clQueue), Context->getHandleRef(), diff --git a/sycl/test-e2e/Adapters/interop-opencl-make-kernel-bundle.cpp b/sycl/test-e2e/Adapters/interop-opencl-make-kernel-bundle.cpp index 582314335f6f3..afa6e59bd5367 100644 --- a/sycl/test-e2e/Adapters/interop-opencl-make-kernel-bundle.cpp +++ b/sycl/test-e2e/Adapters/interop-opencl-make-kernel-bundle.cpp @@ -33,15 +33,10 @@ int main() { clCreateProgramWithSource(native_context, 1, &source, nullptr, nullptr); std::cerr << "Build native program." << std::endl; clBuildProgram(p, 0, nullptr, nullptr, nullptr, nullptr); - std::cerr << "Release native context." << std::endl; - clReleaseContext(native_context); std::cerr << "Make kernel bundle." << std::endl; auto bundle = make_kernel_bundle( p, q.get_context()); - std::cerr << "Release native program." << std::endl; - // cl_program must have been retained by the above call. - clReleaseProgram(p); std::cerr << "Get native program." << std::endl; std::vector device_image = @@ -49,13 +44,9 @@ int main() { assert(device_image.size() == 1); std::cerr << "Create native kernel." << std::endl; cl_kernel k = clCreateKernel(device_image.front(), "do_nothing", nullptr); - // get_native must have retained cl_program as well. - clReleaseProgram(device_image.front()); std::cerr << "Make kernel." << std::endl; make_kernel(k, q.get_context()); - std::cerr << "Release native kernel." << std::endl; - clReleaseKernel(k); return 0; } diff --git a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp index ba2ae917808d3..002aeaa8ba51e 100644 --- a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp +++ b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp @@ -121,8 +121,8 @@ TEST(GetNative, GetNativeHandle) { get_native(Event); get_native(Buffer); - // Depending on global caches state, urDeviceRetain is called either once or - // twice, so there'll be 6 or 7 calls. - ASSERT_EQ(TestCounter, 6 + DeviceRetainCounter - 1) + // Interop object shouldn't be owned by sycl. So, get_native shouldn't retain + // native handles. + ASSERT_EQ(TestCounter, 2 + DeviceRetainCounter - 1) << "Not all the retain methods were called"; } diff --git a/sycl/unittests/queue/InteropRetain.cpp b/sycl/unittests/queue/InteropRetain.cpp index c29d3b9e93c5a..23bf3d86627bc 100644 --- a/sycl/unittests/queue/InteropRetain.cpp +++ b/sycl/unittests/queue/InteropRetain.cpp @@ -23,25 +23,24 @@ ur_result_t redefinedQueueRetain(void *) { return UR_RESULT_SUCCESS; } -TEST(PiInteropTest, CheckRetain) { +TEST(UrInteropTest, CheckRetain) { sycl::unittest::UrMock<> Mock; sycl::platform Plt = sycl::platform(); context Ctx{Plt.get_devices()[0]}; - // The queue construction should not call to urQueueRetain. Instead - // urQueueCreate should return the "retained" queue. + // The queue construction should not call to urQueueRetain. mock::getCallbacks().set_before_callback("urQueueRetain", &redefinedQueueRetain); queue Q{Ctx, default_selector()}; EXPECT_TRUE(QueueRetainCalled == 0); cl_command_queue OCLQ = get_native(Q); - EXPECT_TRUE(QueueRetainCalled == 1); + EXPECT_TRUE(QueueRetainCalled == 0); - // The make_queue should not call to urQueueRetain. The - // urQueueCreateWithNativeHandle should do the "retain" if needed. + // The make_queue should not call to urQueueRetain. + // Interop object shouldn't be owned by default in sycl. queue Q1 = make_queue(OCLQ, Ctx); - EXPECT_TRUE(QueueRetainCalled == 1); + EXPECT_TRUE(QueueRetainCalled == 0); } } // namespace From 6abdfddac1a253a660adc643ac5599984c715e6b Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Mon, 21 Oct 2024 09:19:07 +0100 Subject: [PATCH 3/6] Fix make_kernel e2e test to release native handles --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 2 +- sycl/include/sycl/backend.hpp | 2 +- sycl/test-e2e/Adapters/interop-opencl-make-kernel-bundle.cpp | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 3214b7629a57b..cbd525a98e608 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -6,4 +6,4 @@ set(UNIFIED_RUNTIME_REPO "https://github.com/omarahmed1111/unified-runtime.git") # Merge pull request #2038 from GeorgeWeb/georgi/unsupported-max-coop-wgsize # [UR][hip][opencl] Mark urKernelSuggestMaxCooperativeGroupCountExp as unsupported # instead of returning misleading default value -set(UNIFIED_RUNTIME_TAG 7252088d76a910357dd3e00cb57e7a67fd3299bd) +set(UNIFIED_RUNTIME_TAG 411993dadaef65a10b3ee8140134e80b116a25b8) diff --git a/sycl/include/sycl/backend.hpp b/sycl/include/sycl/backend.hpp index 467f6f530a74a..f9dfe5901e297 100644 --- a/sycl/include/sycl/backend.hpp +++ b/sycl/include/sycl/backend.hpp @@ -430,7 +430,7 @@ make_kernel_bundle(const typename backend_traits::template input_type< std::shared_ptr KBImpl = detail::make_kernel_bundle( detail::ur::cast(BackendObject), TargetContext, - false, State, Backend); + true /* KeepOwnership */, State, Backend); return detail::createSyclObjFromImpl>(KBImpl); } } // namespace _V1 diff --git a/sycl/test-e2e/Adapters/interop-opencl-make-kernel-bundle.cpp b/sycl/test-e2e/Adapters/interop-opencl-make-kernel-bundle.cpp index afa6e59bd5367..419586f9f4a6f 100644 --- a/sycl/test-e2e/Adapters/interop-opencl-make-kernel-bundle.cpp +++ b/sycl/test-e2e/Adapters/interop-opencl-make-kernel-bundle.cpp @@ -48,5 +48,8 @@ int main() { std::cerr << "Make kernel." << std::endl; make_kernel(k, q.get_context()); + clReleaseProgram(p); + clReleaseKernel(k); + return 0; } From 9a13118c9699db6bf3bc03291566e69bef5fe7df Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Mon, 18 Nov 2024 12:48:46 +0000 Subject: [PATCH 4/6] formating and update UR tag --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 18 +++++++++--------- sycl/source/detail/buffer_impl.cpp | 5 +++-- sycl/source/queue.cpp | 3 ++- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index cbd525a98e608..14c5b22ff05c5 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,9 +1,9 @@ -set(UNIFIED_RUNTIME_REPO "https://github.com/omarahmed1111/unified-runtime.git") -# commit df6da35d6e67f2383db28dd49ab08c5c0ef541d2 -# Merge: 67590533 55bd5636 -# Author: aarongreig -# Date: Mon Oct 7 12:28:07 2024 +0100 -# Merge pull request #2038 from GeorgeWeb/georgi/unsupported-max-coop-wgsize -# [UR][hip][opencl] Mark urKernelSuggestMaxCooperativeGroupCountExp as unsupported -# instead of returning misleading default value -set(UNIFIED_RUNTIME_TAG 411993dadaef65a10b3ee8140134e80b116a25b8) + set(UNIFIED_RUNTIME_REPO "https://github.com/omarahmed1111/unified-runtime.git") + # commit df6da35d6e67f2383db28dd49ab08c5c0ef541d2 + # Merge: 67590533 55bd5636 + # Author: aarongreig + # Date: Mon Oct 7 12:28:07 2024 +0100 + # Merge pull request #2038 from GeorgeWeb/georgi/unsupported-max-coop-wgsize + # [UR][hip][opencl] Mark urKernelSuggestMaxCooperativeGroupCountExp as unsupported + # instead of returning misleading default value + set(UNIFIED_RUNTIME_TAG 6eba20afafab7358c7841c230890e43da4fd89a6) diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp index e6b17265b9f8b..ee28c69aad3e8 100644 --- a/sycl/source/detail/buffer_impl.cpp +++ b/sycl/source/detail/buffer_impl.cpp @@ -52,8 +52,9 @@ void buffer_impl::addInteropObject( const AdapterPtr &Adapter = getAdapter(); ur_native_handle_t NativeHandle = 0; Adapter->call(MInteropMemObject, nullptr, - &NativeHandle); - if (std::find(Handles.begin(), Handles.end(), NativeHandle) == Handles.end()) { + &NativeHandle); + if (std::find(Handles.begin(), Handles.end(), NativeHandle) == + Handles.end()) { Handles.push_back(NativeHandle); } } diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 6b973b4237538..fc91f6e27a2db 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -98,7 +98,8 @@ queue::queue(cl_command_queue clQueue, const context &SyclContext, auto Context = detail::getSyclObjImpl(SyclContext); auto Adapter = sycl::detail::ur::getAdapter(); - ur_queue_native_properties_t Properties[] = {UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, 0}; + ur_queue_native_properties_t Properties[] = { + UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, 0}; Adapter->call( detail::ur::cast(clQueue), Context->getHandleRef(), nullptr, Properties, &hQueue); From 2f89652eaa34be7a15be6ff4547a257799662322 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Fri, 31 Jan 2025 17:08:04 +0000 Subject: [PATCH 5/6] Bump UR tag to latest --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 9 +-------- sycl/source/detail/context_impl.cpp | 6 ------ sycl/source/kernel.cpp | 1 - 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 14c5b22ff05c5..49d9d52e728d0 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,9 +1,2 @@ set(UNIFIED_RUNTIME_REPO "https://github.com/omarahmed1111/unified-runtime.git") - # commit df6da35d6e67f2383db28dd49ab08c5c0ef541d2 - # Merge: 67590533 55bd5636 - # Author: aarongreig - # Date: Mon Oct 7 12:28:07 2024 +0100 - # Merge pull request #2038 from GeorgeWeb/georgi/unsupported-max-coop-wgsize - # [UR][hip][opencl] Mark urKernelSuggestMaxCooperativeGroupCountExp as unsupported - # instead of returning misleading default value - set(UNIFIED_RUNTIME_TAG 6eba20afafab7358c7841c230890e43da4fd89a6) + set(UNIFIED_RUNTIME_TAG da306bae38372600c14fb2b1fed406de36cba474) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 5ece8bde1dd65..591a2b026bb3b 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -104,12 +104,6 @@ context_impl::context_impl(ur_context_handle_t UrContext, } MPlatform = Platform; } - // TODO catch an exception and put it to list of asynchronous exceptions - // getAdapter() will be the same as the Adapter passed. This should be taken - // care of when creating device object. - // - // TODO: Move this backend-specific retain of the context to SYCL-2020 style - // make_context interop, when that is created. MKernelProgramCache.setContextPtr(this); } diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp index cb4dc7b49350f..f345aa0c1cb15 100644 --- a/sycl/source/kernel.cpp +++ b/sycl/source/kernel.cpp @@ -16,7 +16,6 @@ namespace sycl { inline namespace _V1 { -// TODO(pi2ur): Don't cast straight from cl_kernel below kernel::kernel(cl_kernel ClKernel, const context &SyclContext) { auto Adapter = sycl::detail::ur::getAdapter(); ur_kernel_handle_t hKernel = nullptr; From 72c3e8abff5cdce07e8b553a2557fe077c4157c2 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Mon, 3 Feb 2025 13:05:56 +0000 Subject: [PATCH 6/6] Fix queue native properties init --- sycl/source/queue.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index fc91f6e27a2db..3669b9d3e17c6 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -98,11 +98,14 @@ queue::queue(cl_command_queue clQueue, const context &SyclContext, auto Context = detail::getSyclObjImpl(SyclContext); auto Adapter = sycl::detail::ur::getAdapter(); - ur_queue_native_properties_t Properties[] = { - UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, 0}; + ur_queue_native_properties_t Properties = { + /*.stype = */ UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, + /*.pNext = */ nullptr, + /*.isNativeHandleOwned = */ false, + }; Adapter->call( detail::ur::cast(clQueue), Context->getHandleRef(), - nullptr, Properties, &hQueue); + nullptr, &Properties, &hQueue); impl = std::make_shared(hQueue, Context, AsyncHandler, PropList);