diff --git a/.github/workflows/benchmarks_compute.yml b/.github/workflows/benchmarks_compute.yml
index 126872cedd..a398e6b56b 100644
--- a/.github/workflows/benchmarks_compute.yml
+++ b/.github/workflows/benchmarks_compute.yml
@@ -151,9 +151,17 @@ jobs:
- name: Build SYCL
run: cmake --build ${{github.workspace}}/sycl_build -j
- - name: Set oneAPI Device Selector
- run: |
- echo "ONEAPI_DEVICE_SELECTOR=${{ matrix.adapter.str_name }}:${{ matrix.adapter.unit }}" >> $GITHUB_ENV
+ - name: Configure UR
+ working-directory: ${{github.workspace}}/ur-repo
+ run: >
+ cmake -DCMAKE_BUILD_TYPE=Release
+ -B${{github.workspace}}/ur-repo/build
+ -DUR_BUILD_TESTS=OFF
+ -DUR_BUILD_ADAPTER_L0=ON
+ -DUR_BUILD_ADAPTER_L0_V2=ON
+
+ - name: Build UR
+ run: cmake --build ${{github.workspace}}/ur-repo/build -j $(nproc)
- name: Run benchmarks
id: benchmarks
diff --git a/.github/workflows/build-hw-reusable.yml b/.github/workflows/build-hw-reusable.yml
index 88b0877c27..fa23c38248 100644
--- a/.github/workflows/build-hw-reusable.yml
+++ b/.github/workflows/build-hw-reusable.yml
@@ -18,6 +18,10 @@ on:
required: false
type: string
default: OFF
+ static_adapter:
+ required: false
+ type: string
+ default: OFF
permissions:
contents: read
@@ -36,7 +40,7 @@ jobs:
strategy:
matrix:
adapter: [
- {name: "${{inputs.adapter_name}}", platform: "${{inputs.platform}}", static_Loader: "${{inputs.static_loader}}"},
+ {name: "${{inputs.adapter_name}}", platform: "${{inputs.platform}}", static_Loader: "${{inputs.static_loader}}", static_adapter: "${{inputs.static_loader}}"},
]
build_type: [Debug, Release]
compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}]
@@ -49,6 +53,10 @@ jobs:
build_type: Release
- adapter: {static_Loader: ON}
compiler: {c: clang, cxx: clang++}
+ - adapter: {static_adapter: ON}
+ build_type: Release
+ - adapter: {static_adapter: ON}
+ compiler: {c: clang, cxx: clang++}
runs-on: ${{inputs.runner_name}}
@@ -76,6 +84,7 @@ jobs:
-DUR_BUILD_TESTS=ON
-DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON
-DUR_STATIC_LOADER=${{matrix.adapter.static_Loader}}
+ -DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static_adapter}}
-DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++
-DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib
${{ matrix.adapter.name == 'HIP' && '-DUR_CONFORMANCE_AMD_ARCH=gfx1030' || '' }}
diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index cd5c91854c..6662f7833d 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -20,7 +20,7 @@ jobs:
compiler: [{c: gcc, cxx: g++}]
libbacktrace: ['-DVAL_USE_LIBBACKTRACE_BACKTRACE=OFF']
pool_tracking: ['-DUMF_ENABLE_POOL_TRACKING=ON', '-DUMF_ENABLE_POOL_TRACKING=OFF']
- latency_tracking: ['-DUMF_ENABLE_LATENCY_TRACKING=OFF']
+ latency_tracking: ['-DUR_ENABLE_LATENCY_HISTOGRAM=OFF']
include:
- os: 'ubuntu-22.04'
build_type: Release
@@ -40,7 +40,7 @@ jobs:
- os: 'ubuntu-22.04'
build_type: Release
compiler: {c: clang, cxx: clang++}
- latency_tracking: '-DUMF_ENABLE_LATENCY_TRACKING=ON'
+ latency_tracking: '-DUR_ENABLE_LATENCY_HISTOGRAM=ON'
runs-on: ${{ (matrix.os == 'ubuntu-22.04' && github.repository_owner == 'oneapi-src') && 'intel-ubuntu-22.04' || matrix.os }}
steps:
@@ -155,6 +155,7 @@ jobs:
adapter_name: L0
runner_name: L0
static_loader: ON
+ static_adapter: ON
opencl:
name: OpenCL
@@ -216,7 +217,8 @@ jobs:
os: ['windows-2019', 'windows-2022']
adapter: [
{name: None, var: ''}, {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'},
- {name: None, var: ''}, {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'}
+ {name: None, var: ''}, {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'},
+ {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'}
]
# TODO: building level zero loader on windows-2019 and clang-cl is currently broken
@@ -225,16 +227,25 @@ jobs:
adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'}
- os: 'windows-2019'
adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'}
+ - os: 'windows-2019'
+ adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'}
- adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'}
compiler: {c: clang-cl, cxx: clang-cl}
- adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'}
compiler: {c: clang-cl, cxx: clang-cl}
+ - adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'}
+ compiler: {c: clang-cl, cxx: clang-cl}
build_type: [Debug, Release]
compiler: [{c: cl, cxx: cl}, {c: clang-cl, cxx: clang-cl}]
include:
- compiler: {c: clang-cl, cxx: clang-cl}
toolset: "-T ClangCL"
+ - os: 'windows-2022'
+ adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'}
+ build_type: 'Release'
+ compiler: {c: cl, cxx: cl}
+
runs-on: ${{matrix.os}}
steps:
diff --git a/.github/workflows/e2e_core.yml b/.github/workflows/e2e_core.yml
index 148232bd0a..32b8d58e7a 100644
--- a/.github/workflows/e2e_core.yml
+++ b/.github/workflows/e2e_core.yml
@@ -169,10 +169,6 @@ jobs:
-DCMAKE_CXX_COMPILER="$(which clang++)"
-DLLVM_LIT="${{github.workspace}}/sycl-repo/llvm/utils/lit/lit.py"
- - name: Set LIT_XFAIL_NOT
- if: inputs.xfail_not != ''
- run: echo "LIT_XFAIL_NOT=${{inputs.xfail_not}}" >> $GITHUB_ENV
-
- name: Set LIT_XFAIL
if: inputs.xfail != ''
run: echo "LIT_XFAIL=${{inputs.xfail}}" >> $GITHUB_ENV
@@ -181,6 +177,10 @@ jobs:
if: inputs.filter_out != ''
run: echo "LIT_FILTER_OUT=${{inputs.filter_out}}" >> $GITHUB_ENV
+ - name: Set LIT_XFAIL_NOT
+ if: inputs.xfail_not != ''
+ run: echo "LIT_XFAIL_NOT=${{inputs.xfail_not}}" >> $GITHUB_ENV
+
# TODO: remove once intel/llvm lit tests can properly recognize the GPU
- name: Configure hardware platform feature for L0
if: matrix.adapter.name == 'L0'
diff --git a/.github/workflows/e2e_cuda.yml b/.github/workflows/e2e_cuda.yml
index 6b4b0ca16c..c2f1d969b8 100644
--- a/.github/workflows/e2e_cuda.yml
+++ b/.github/workflows/e2e_cuda.yml
@@ -21,3 +21,4 @@ jobs:
config: "--cuda"
unit: "gpu"
extra_lit_flags: "-sv --max-time=3600"
+ xfail: "Regression/device_num.cpp"
diff --git a/.github/workflows/e2e_level_zero.yml b/.github/workflows/e2e_level_zero.yml
index ecab9a85ba..3b2511e0f9 100644
--- a/.github/workflows/e2e_level_zero.yml
+++ b/.github/workflows/e2e_level_zero.yml
@@ -21,9 +21,11 @@ jobs:
config: ""
unit: "gpu"
# Failing tests
- xfail: "DeviceCodeSplit/grf.cpp;ESIMD/grf.cpp;ESIMD/mask_expand_load.cpp;KernelAndProgram/target_register_alloc_mode.cpp;Matrix/SG32/get_coord_int8_matB.cpp;Matrix/get_coord_int8_matB.cpp;Matrix/joint_matrix_prefetch.cpp;Matrix/joint_matrix_rowmajorA_rowmajorB.cpp; SYCL :: ESIMD/mask_expand_load.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_OOB.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_out_bounds.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_prefetch.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_rowmajorA_rowmajorB.cpp;Matrix/element_wise_all_ops_1d.cpp;Matrix/element_wise_all_ops_1d_cont.cpp;Matrix/element_wise_all_ops_scalar.cpp;Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp;Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp;Matrix/joint_matrix_out_bounds.cpp;Matrix/joint_matrix_unaligned_k.cpp"
+ xfail: "DeviceCodeSplit/grf.cpp;ESIMD/mask_expand_load.cpp;KernelAndProgram/target_register_alloc_mode.cpp;Matrix/SG32/get_coord_int8_matB.cpp;Matrix/get_coord_int8_matB.cpp;Matrix/joint_matrix_prefetch.cpp;Matrix/joint_matrix_rowmajorA_rowmajorB.cpp;ESIMD/mask_expand_load.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_OOB.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_out_bounds.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_prefetch.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_rowmajorA_rowmajorB.cpp;Matrix/element_wise_all_ops_1d.cpp;Matrix/element_wise_all_ops_1d_cont.cpp;Matrix/element_wise_all_ops_scalar.cpp;Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp;Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp;Matrix/joint_matrix_out_bounds.cpp;Matrix/joint_matrix_unaligned_k.cpp;Matrix/SPVCooperativeMatrix/SG32/get_coord_int8_matB.cpp;Matrix/SPVCooperativeMatrix/element_wise_all_ops_1d.cpp;Matrix/SPVCooperativeMatrix/element_wise_all_ops_1d_cont.cpp;Matrix/SPVCooperativeMatrix/element_wise_all_ops_scalar.cpp;Matrix/SPVCooperativeMatrix/element_wise_ops.cpp;Matrix/SPVCooperativeMatrix/get_coord_int8_matB.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_SLM.cpp;Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp"
+ # Unexpectedly Passed Tests
+ xfail_not: ""
# Flaky tests
- filter_out: "ESIMD/named_barriers/loop_extended.cpp;ESIMD/local_accessor_copy_to_from.cpp;"
+ filter_out: "Basic/accessor/accessor.cpp|DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp|Graph/Explicit/interop-level-zero-launch-kernel.cpp|Graph/RecordReplay/interop-level-zero-launch-kernel.cpp|syclcompat/launch/launch_policy_lmem.cpp"
# These runners by default spawn upwards of 260 workers.
# We also add a time out just in case some test hangs
extra_lit_flags: "--param gpu-intel-pvc=True --param gpu-intel-pvc-1T=True -sv -j 100 --max-time=3600"
diff --git a/.github/workflows/e2e_opencl.yml b/.github/workflows/e2e_opencl.yml
index fa7984bb3f..e4714b2434 100644
--- a/.github/workflows/e2e_opencl.yml
+++ b/.github/workflows/e2e_opencl.yml
@@ -20,5 +20,5 @@ jobs:
prefix: ""
config: ""
unit: "cpu"
- xfail: "AOT/double.cpp;AOT/half.cpp;AOT/reqd-sg-size.cpp;Basic/built-ins/marray_geometric.cpp;KernelCompiler/kernel_compiler_spirv.cpp;KernelCompiler/opencl_queries.cpp"
+ xfail: "AOT/double.cpp;AOT/half.cpp;AOT/reqd-sg-size.cpp;Basic/built-ins/marray_geometric.cpp;KernelCompiler/kernel_compiler_spirv.cpp;KernelCompiler/opencl_queries.cpp;NonUniformGroups/ballot_group.cpp;NonUniformGroups/ballot_group_algorithms.cpp;NonUniformGroups/fixed_size_group_algorithms.cpp;NonUniformGroups/opportunistic_group.cpp;NonUniformGroups/opportunistic_group_algorithms.cpp;NonUniformGroups/tangle_group.cpp;NonUniformGroups/tangle_group_algorithms.cpp"
extra_lit_flags: "-sv --max-time=3600"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f54cbd1067..a908a22d80 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,6 +52,7 @@ option(UR_BUILD_ADAPTER_HIP "Build the HIP adapter" OFF)
option(UR_BUILD_ADAPTER_NATIVE_CPU "Build the Native-CPU adapter" OFF)
option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF)
option(UR_BUILD_ADAPTER_L0_V2 "Build the (experimental) Level-Zero v2 adapter" OFF)
+option(UR_STATIC_ADAPTER_L0 "Build the Level-Zero adapter as static and embed in the loader" OFF)
option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)
option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF)
option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF)
diff --git a/README.md b/README.md
index 7ba72b43d3..29279a0059 100644
--- a/README.md
+++ b/README.md
@@ -145,7 +145,7 @@ List of options provided by CMake:
| UR_DEVICE_CODE_EXTRACTOR | Path of the `clang-offload-extract` executable from the DPC++ package, required for CTS device binaries | File path | `"${dirname(UR_DPCXX)}/clang-offload-extract"` |
| UR_DPCXX_BUILD_FLAGS | Build flags to pass to DPC++ when compiling device programs | Space-separated options list | `""` |
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |
-| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` |
+| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `$ENV{ROCM_PATH}` or `/opt/rocm` |
| UR_HIP_INCLUDE_DIR | Path of the ROCm HIP include directory | Directory path | `${UR_HIP_ROCM_DIR}/include` |
| UR_HIP_HSA_INCLUDE_DIRS | Path of the ROCm HSA include directory | Directory path | `${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include` |
| UR_HIP_LIB_DIR | Path of the ROCm HIP library directory | Directory path | `${UR_HIP_ROCM_DIR}/lib` |
diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake
index 24cb6f8e54..6a5700da8b 100644
--- a/cmake/helpers.cmake
+++ b/cmake/helpers.cmake
@@ -70,6 +70,7 @@ function(add_ur_target_compile_options name)
)
if (CMAKE_BUILD_TYPE STREQUAL "Release")
target_compile_definitions(${name} PRIVATE -D_FORTIFY_SOURCE=2)
+ target_compile_options(${name} PRIVATE -fvisibility=hidden)
endif()
if(UR_DEVELOPER_MODE)
target_compile_options(${name} PRIVATE
diff --git a/examples/collector/CMakeLists.txt b/examples/collector/CMakeLists.txt
index 5fe484d0b8..6dd112aae0 100644
--- a/examples/collector/CMakeLists.txt
+++ b/examples/collector/CMakeLists.txt
@@ -17,6 +17,6 @@ target_link_libraries(${TARGET_NAME} PRIVATE ${TARGET_XPTI})
target_include_directories(${TARGET_NAME} PRIVATE ${xpti_SOURCE_DIR}/include)
if(MSVC)
- target_compile_definitions(${TARGET_NAME} PRIVATE
- XPTI_STATIC_LIBRARY XPTI_CALLBACK_API_EXPORTS)
+ target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_STATIC_LIBRARY)
endif()
+target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_CALLBACK_API_EXPORTS)
diff --git a/include/ur_api.h b/include/ur_api.h
index 082890e73d..e75793f3d2 100644
--- a/include/ur_api.h
+++ b/include/ur_api.h
@@ -332,9 +332,17 @@ typedef enum ur_structure_type_t {
#if defined(_WIN32)
/// @brief Microsoft-specific dllexport storage-class attribute
#define UR_APIEXPORT __declspec(dllexport)
+#endif // defined(_WIN32)
+#endif // UR_APIEXPORT
+
+///////////////////////////////////////////////////////////////////////////////
+#ifndef UR_APIEXPORT
+#if __GNUC__ >= 4
+/// @brief GCC-specific dllexport storage-class attribute
+#define UR_APIEXPORT __attribute__((visibility("default")))
#else
#define UR_APIEXPORT
-#endif // defined(_WIN32)
+#endif // __GNUC__ >= 4
#endif // UR_APIEXPORT
///////////////////////////////////////////////////////////////////////////////
@@ -1569,8 +1577,7 @@ typedef enum ur_device_info_t {
///< ::urDevicePartition
UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS = 80, ///< [uint32_t] max number of sub groups
UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 81, ///< [::ur_bool_t] support sub group independent forward progress
- UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = 82, ///< [uint32_t[]] return an array of sub group sizes supported on Intel
- ///< device
+ UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = 82, ///< [uint32_t[]] return an array of supported sub group sizes
UR_DEVICE_INFO_USM_HOST_SUPPORT = 83, ///< [::ur_device_usm_access_capability_flags_t] support USM host memory
///< access
UR_DEVICE_INFO_USM_DEVICE_SUPPORT = 84, ///< [::ur_device_usm_access_capability_flags_t] support USM device memory
diff --git a/include/ur_api_funcs.def b/include/ur_api_funcs.def
new file mode 100644
index 0000000000..5cd8dd7926
--- /dev/null
+++ b/include/ur_api_funcs.def
@@ -0,0 +1,212 @@
+
+/*
+ *
+ * Copyright (C) 2024 Intel Corporation
+ *
+ * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See LICENSE.TXT
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * @file ur_api_funcs.def
+ * @version v0.11-r0
+ *
+ */
+
+ // Auto-generated file, do not edit.
+
+_UR_API(urPlatformGet)
+_UR_API(urPlatformGetInfo)
+_UR_API(urPlatformGetNativeHandle)
+_UR_API(urPlatformCreateWithNativeHandle)
+_UR_API(urPlatformGetApiVersion)
+_UR_API(urPlatformGetBackendOption)
+_UR_API(urContextCreate)
+_UR_API(urContextRetain)
+_UR_API(urContextRelease)
+_UR_API(urContextGetInfo)
+_UR_API(urContextGetNativeHandle)
+_UR_API(urContextCreateWithNativeHandle)
+_UR_API(urContextSetExtendedDeleter)
+_UR_API(urEventGetInfo)
+_UR_API(urEventGetProfilingInfo)
+_UR_API(urEventWait)
+_UR_API(urEventRetain)
+_UR_API(urEventRelease)
+_UR_API(urEventGetNativeHandle)
+_UR_API(urEventCreateWithNativeHandle)
+_UR_API(urEventSetCallback)
+_UR_API(urProgramCreateWithIL)
+_UR_API(urProgramCreateWithBinary)
+_UR_API(urProgramBuild)
+_UR_API(urProgramCompile)
+_UR_API(urProgramLink)
+_UR_API(urProgramRetain)
+_UR_API(urProgramRelease)
+_UR_API(urProgramGetFunctionPointer)
+_UR_API(urProgramGetGlobalVariablePointer)
+_UR_API(urProgramGetInfo)
+_UR_API(urProgramGetBuildInfo)
+_UR_API(urProgramSetSpecializationConstants)
+_UR_API(urProgramGetNativeHandle)
+_UR_API(urProgramCreateWithNativeHandle)
+_UR_API(urProgramBuildExp)
+_UR_API(urProgramCompileExp)
+_UR_API(urProgramLinkExp)
+_UR_API(urKernelCreate)
+_UR_API(urKernelGetInfo)
+_UR_API(urKernelGetGroupInfo)
+_UR_API(urKernelGetSubGroupInfo)
+_UR_API(urKernelRetain)
+_UR_API(urKernelRelease)
+_UR_API(urKernelGetNativeHandle)
+_UR_API(urKernelCreateWithNativeHandle)
+_UR_API(urKernelGetSuggestedLocalWorkSize)
+_UR_API(urKernelSetArgValue)
+_UR_API(urKernelSetArgLocal)
+_UR_API(urKernelSetArgPointer)
+_UR_API(urKernelSetExecInfo)
+_UR_API(urKernelSetArgSampler)
+_UR_API(urKernelSetArgMemObj)
+_UR_API(urKernelSetSpecializationConstants)
+_UR_API(urKernelSuggestMaxCooperativeGroupCountExp)
+_UR_API(urQueueGetInfo)
+_UR_API(urQueueCreate)
+_UR_API(urQueueRetain)
+_UR_API(urQueueRelease)
+_UR_API(urQueueGetNativeHandle)
+_UR_API(urQueueCreateWithNativeHandle)
+_UR_API(urQueueFinish)
+_UR_API(urQueueFlush)
+_UR_API(urSamplerCreate)
+_UR_API(urSamplerRetain)
+_UR_API(urSamplerRelease)
+_UR_API(urSamplerGetInfo)
+_UR_API(urSamplerGetNativeHandle)
+_UR_API(urSamplerCreateWithNativeHandle)
+_UR_API(urMemImageCreate)
+_UR_API(urMemBufferCreate)
+_UR_API(urMemRetain)
+_UR_API(urMemRelease)
+_UR_API(urMemBufferPartition)
+_UR_API(urMemGetNativeHandle)
+_UR_API(urMemBufferCreateWithNativeHandle)
+_UR_API(urMemImageCreateWithNativeHandle)
+_UR_API(urMemGetInfo)
+_UR_API(urMemImageGetInfo)
+_UR_API(urPhysicalMemCreate)
+_UR_API(urPhysicalMemRetain)
+_UR_API(urPhysicalMemRelease)
+_UR_API(urAdapterGet)
+_UR_API(urAdapterRelease)
+_UR_API(urAdapterRetain)
+_UR_API(urAdapterGetLastError)
+_UR_API(urAdapterGetInfo)
+_UR_API(urEnqueueKernelLaunch)
+_UR_API(urEnqueueEventsWait)
+_UR_API(urEnqueueEventsWaitWithBarrier)
+_UR_API(urEnqueueMemBufferRead)
+_UR_API(urEnqueueMemBufferWrite)
+_UR_API(urEnqueueMemBufferReadRect)
+_UR_API(urEnqueueMemBufferWriteRect)
+_UR_API(urEnqueueMemBufferCopy)
+_UR_API(urEnqueueMemBufferCopyRect)
+_UR_API(urEnqueueMemBufferFill)
+_UR_API(urEnqueueMemImageRead)
+_UR_API(urEnqueueMemImageWrite)
+_UR_API(urEnqueueMemImageCopy)
+_UR_API(urEnqueueMemBufferMap)
+_UR_API(urEnqueueMemUnmap)
+_UR_API(urEnqueueUSMFill)
+_UR_API(urEnqueueUSMMemcpy)
+_UR_API(urEnqueueUSMPrefetch)
+_UR_API(urEnqueueUSMAdvise)
+_UR_API(urEnqueueUSMFill2D)
+_UR_API(urEnqueueUSMMemcpy2D)
+_UR_API(urEnqueueDeviceGlobalVariableWrite)
+_UR_API(urEnqueueDeviceGlobalVariableRead)
+_UR_API(urEnqueueReadHostPipe)
+_UR_API(urEnqueueWriteHostPipe)
+_UR_API(urEnqueueKernelLaunchCustomExp)
+_UR_API(urEnqueueCooperativeKernelLaunchExp)
+_UR_API(urEnqueueTimestampRecordingExp)
+_UR_API(urEnqueueNativeCommandExp)
+_UR_API(urBindlessImagesUnsampledImageHandleDestroyExp)
+_UR_API(urBindlessImagesSampledImageHandleDestroyExp)
+_UR_API(urBindlessImagesImageAllocateExp)
+_UR_API(urBindlessImagesImageFreeExp)
+_UR_API(urBindlessImagesUnsampledImageCreateExp)
+_UR_API(urBindlessImagesSampledImageCreateExp)
+_UR_API(urBindlessImagesImageCopyExp)
+_UR_API(urBindlessImagesImageGetInfoExp)
+_UR_API(urBindlessImagesMipmapGetLevelExp)
+_UR_API(urBindlessImagesMipmapFreeExp)
+_UR_API(urBindlessImagesImportExternalMemoryExp)
+_UR_API(urBindlessImagesMapExternalArrayExp)
+_UR_API(urBindlessImagesMapExternalLinearMemoryExp)
+_UR_API(urBindlessImagesReleaseExternalMemoryExp)
+_UR_API(urBindlessImagesImportExternalSemaphoreExp)
+_UR_API(urBindlessImagesReleaseExternalSemaphoreExp)
+_UR_API(urBindlessImagesWaitExternalSemaphoreExp)
+_UR_API(urBindlessImagesSignalExternalSemaphoreExp)
+_UR_API(urUSMHostAlloc)
+_UR_API(urUSMDeviceAlloc)
+_UR_API(urUSMSharedAlloc)
+_UR_API(urUSMFree)
+_UR_API(urUSMGetMemAllocInfo)
+_UR_API(urUSMPoolCreate)
+_UR_API(urUSMPoolRetain)
+_UR_API(urUSMPoolRelease)
+_UR_API(urUSMPoolGetInfo)
+_UR_API(urUSMPitchedAllocExp)
+_UR_API(urUSMImportExp)
+_UR_API(urUSMReleaseExp)
+_UR_API(urCommandBufferCreateExp)
+_UR_API(urCommandBufferRetainExp)
+_UR_API(urCommandBufferReleaseExp)
+_UR_API(urCommandBufferFinalizeExp)
+_UR_API(urCommandBufferAppendKernelLaunchExp)
+_UR_API(urCommandBufferAppendUSMMemcpyExp)
+_UR_API(urCommandBufferAppendUSMFillExp)
+_UR_API(urCommandBufferAppendMemBufferCopyExp)
+_UR_API(urCommandBufferAppendMemBufferWriteExp)
+_UR_API(urCommandBufferAppendMemBufferReadExp)
+_UR_API(urCommandBufferAppendMemBufferCopyRectExp)
+_UR_API(urCommandBufferAppendMemBufferWriteRectExp)
+_UR_API(urCommandBufferAppendMemBufferReadRectExp)
+_UR_API(urCommandBufferAppendMemBufferFillExp)
+_UR_API(urCommandBufferAppendUSMPrefetchExp)
+_UR_API(urCommandBufferAppendUSMAdviseExp)
+_UR_API(urCommandBufferEnqueueExp)
+_UR_API(urCommandBufferRetainCommandExp)
+_UR_API(urCommandBufferReleaseCommandExp)
+_UR_API(urCommandBufferUpdateKernelLaunchExp)
+_UR_API(urCommandBufferGetInfoExp)
+_UR_API(urCommandBufferCommandGetInfoExp)
+_UR_API(urUsmP2PEnablePeerAccessExp)
+_UR_API(urUsmP2PDisablePeerAccessExp)
+_UR_API(urUsmP2PPeerAccessGetInfoExp)
+_UR_API(urVirtualMemGranularityGetInfo)
+_UR_API(urVirtualMemReserve)
+_UR_API(urVirtualMemFree)
+_UR_API(urVirtualMemMap)
+_UR_API(urVirtualMemUnmap)
+_UR_API(urVirtualMemSetAccess)
+_UR_API(urVirtualMemGetInfo)
+_UR_API(urDeviceGet)
+_UR_API(urDeviceGetInfo)
+_UR_API(urDeviceRetain)
+_UR_API(urDeviceRelease)
+_UR_API(urDevicePartition)
+_UR_API(urDeviceSelectBinary)
+_UR_API(urDeviceGetNativeHandle)
+_UR_API(urDeviceCreateWithNativeHandle)
+_UR_API(urDeviceGetGlobalTimestamps)
+_UR_API(urLoaderConfigCreate)
+_UR_API(urLoaderConfigEnableLayer)
+_UR_API(urLoaderConfigGetInfo)
+_UR_API(urLoaderConfigRelease)
+_UR_API(urLoaderConfigRetain)
+_UR_API(urLoaderConfigSetCodeLocationCallback)
+_UR_API(urLoaderConfigSetMockingEnabled)
+_UR_API(urLoaderInit)
+_UR_API(urLoaderTearDown)
diff --git a/include/ur_print.hpp b/include/ur_print.hpp
index f71cc12b32..681e8e814d 100644
--- a/include/ur_print.hpp
+++ b/include/ur_print.hpp
@@ -17403,6 +17403,11 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
return os;
}
+inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const ur_bool_t value) {
+ os << (value ? "true" : "false");
+ return os;
+}
+
namespace ur::details {
///////////////////////////////////////////////////////////////////////////////
// @brief Print pointer value
diff --git a/scripts/benchmarks/benches/base.py b/scripts/benchmarks/benches/base.py
index c7f263c253..e4377c8b65 100644
--- a/scripts/benchmarks/benches/base.py
+++ b/scripts/benchmarks/benches/base.py
@@ -16,9 +16,12 @@
class Benchmark:
def __init__(self, directory):
self.directory = directory
+ self.adapter_path = os.path.join(options.ur_dir, 'build', 'lib', f"libur_adapter_{options.ur_adapter_name}.so")
def run_bench(self, command, env_vars):
- return run(command=command, env_vars=env_vars, add_sycl=True, cwd=options.benchmark_cwd).stdout.decode()
+ env_vars_with_forced_adapter = env_vars.copy()
+ env_vars_with_forced_adapter.update({'UR_ADAPTERS_FORCE_LOAD': self.adapter_path})
+ return run(command=command, env_vars=env_vars_with_forced_adapter, add_sycl=True, cwd=options.benchmark_cwd).stdout.decode()
def create_data_path(self, name):
data_path = os.path.join(self.directory, "data", name)
diff --git a/scripts/benchmarks/benches/compute.py b/scripts/benchmarks/benches/compute.py
index 672875f2dd..cf164721a6 100644
--- a/scripts/benchmarks/benches/compute.py
+++ b/scripts/benchmarks/benches/compute.py
@@ -15,7 +15,6 @@ class ComputeBench:
def __init__(self, directory):
self.directory = directory
self.built = False
- self.adapter_short_name = {'level_zero' : 'L0', "level_zero_v2" : 'L0_V2'}
return
def setup(self):
@@ -35,11 +34,9 @@ def setup(self):
f"-DALLOW_WARNINGS=ON",
f"-DBUILD_UR=ON",
f"-DUR_BUILD_TESTS=OFF",
- f"-DUR_BUILD_ADAPTER_L0=ON",
f"-DUR_BUILD_TESTS=OFF",
f"-DUMF_DISABLE_HWLOC=ON",
f"-DBENCHMARK_UR_SOURCE_DIR={options.ur_dir}",
- f"-DUR_BUILD_ADAPTER_{self.adapter_short_name[options.ur_adapter_name]}=ON"
]
run(configure_command, add_sycl=True)
@@ -47,7 +44,6 @@ def setup(self):
self.built = True
self.bins = os.path.join(build_path, 'bin')
- self.libs = os.path.join(build_path, 'lib')
class ComputeBenchmark(Benchmark):
def __init__(self, bench, name, test):
@@ -82,7 +78,7 @@ def run(self, env_vars) -> Result:
result = self.run_bench(command, env_vars)
(label, mean) = self.parse_output(result)
- return Result(label=label, value=mean, command=command, env=env_vars, stdout=result)
+ return Result(label=label, value=mean, command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better())
def parse_output(self, output):
csv_file = io.StringIO(output)
@@ -130,9 +126,6 @@ def name(self):
order = "in order" if self.ioq else "out of order"
return f"api_overhead_benchmark_ur SubmitKernel {order}"
- def extra_env_vars(self) -> dict:
- return {"UR_ADAPTERS_FORCE_LOAD" : os.path.join(self.bench.libs, f"libur_adapter_{options.ur_adapter_name}.so")}
-
def bin_args(self) -> list[str]:
return [
f"--Ioq={self.ioq}",
diff --git a/scripts/benchmarks/benches/velocity.py b/scripts/benchmarks/benches/velocity.py
index e5601c6563..06d2222ac4 100644
--- a/scripts/benchmarks/benches/velocity.py
+++ b/scripts/benchmarks/benches/velocity.py
@@ -61,7 +61,7 @@ def run(self, env_vars) -> Result:
result = self.run_bench(command, env_vars)
- return Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result)
+ return Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better())
def teardown(self):
return
diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py
index a9850dfc67..d2b7ef8cd0 100755
--- a/scripts/benchmarks/main.py
+++ b/scripts/benchmarks/main.py
@@ -52,34 +52,46 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
benchmarks = [benchmark for benchmark in benchmarks if filter.search(benchmark.name())]
for benchmark in benchmarks:
- print(f"setting up {benchmark.name()}... ", end='', flush=True)
- benchmark.setup()
- print("complete.")
+ try:
+ print(f"setting up {benchmark.name()}... ", end='', flush=True)
+ benchmark.setup()
+ print("complete.")
+ except Exception as e:
+ if options.exit_on_failure:
+ raise e
+ else:
+ print(f"failed: {e}")
results = []
for benchmark in benchmarks:
- merged_env_vars = {**additional_env_vars}
- iteration_results = []
- for iter in range(options.iterations):
- print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True)
- bench_results = benchmark.run(merged_env_vars)
- if bench_results is not None:
- print(f"complete ({bench_results.value} {benchmark.unit()}).")
- iteration_results.append(bench_results)
+ try:
+ merged_env_vars = {**additional_env_vars}
+ iteration_results = []
+ for iter in range(options.iterations):
+ print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True)
+ bench_results = benchmark.run(merged_env_vars)
+ if bench_results is not None:
+ print(f"complete ({bench_results.value} {benchmark.unit()}).")
+ iteration_results.append(bench_results)
+ else:
+ print(f"did not finish.")
+
+ if len(iteration_results) == 0:
+ continue
+
+ iteration_results.sort(key=lambda res: res.value)
+ median_index = len(iteration_results) // 2
+ median_result = iteration_results[median_index]
+
+ median_result.unit = benchmark.unit()
+ median_result.name = benchmark.name()
+
+ results.append(median_result)
+ except Exception as e:
+ if options.exit_on_failure:
+ raise e
else:
- print(f"did not finish.")
-
- if len(iteration_results) == 0:
- continue
-
- iteration_results.sort(key=lambda res: res.value)
- median_index = len(iteration_results) // 2
- median_result = iteration_results[median_index]
-
- median_result.unit = benchmark.unit()
- median_result.name = benchmark.name()
-
- results.append(median_result)
+ print(f"failed: {e}")
for benchmark in benchmarks:
print(f"tearing down {benchmark.name()}... ", end='', flush=True)
@@ -126,6 +138,7 @@ def validate_and_parse_env_args(env_args):
parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=600)
parser.add_argument("--filter", type=str, help='Regex pattern to filter benchmarks by name.', default=None)
parser.add_argument("--verbose", help='Print output of all the commands.', action="store_true")
+ parser.add_argument("--exit_on_failure", help='Exit on first failure.', action="store_true")
args = parser.parse_args()
additional_env_vars = validate_and_parse_env_args(args.env)
@@ -137,6 +150,7 @@ def validate_and_parse_env_args(env_args):
options.timeout = args.timeout
options.ur_dir = args.ur_dir
options.ur_adapter_name = args.ur_adapter_name
+ options.exit_on_failure = args.exit_on_failure
benchmark_filter = re.compile(args.filter) if args.filter else None
diff --git a/scripts/benchmarks/output.py b/scripts/benchmarks/output.py
index 26deabe099..1a61f9909c 100644
--- a/scripts/benchmarks/output.py
+++ b/scripts/benchmarks/output.py
@@ -116,7 +116,7 @@ def generate_summary_table(chart_data: dict[str, list[Result]]):
if key in results:
value = results[key].value
if key == best_key:
- row += f" `**{value}**` |" # Highlight the best value
+ row += f" {value} |" # Highlight the best value
else:
row += f" {value} |"
else:
@@ -132,6 +132,7 @@ def generate_markdown(chart_data: dict[str, list[Result]]):
return f"""
# Summary
+result is better\n
{summary_table}
# Charts
{mermaid_script}
diff --git a/scripts/core/INTRO.rst b/scripts/core/INTRO.rst
index 448e3569e2..898d4ce5f3 100644
--- a/scripts/core/INTRO.rst
+++ b/scripts/core/INTRO.rst
@@ -396,6 +396,14 @@ Specific environment variables can be set to control the behavior of unified run
See the Layers_ section for details of the layers currently included in the runtime.
+.. envvar:: UR_LOADER_PRELOAD_FILTER
+
+ If set, the loader will read `ONEAPI_DEVICE_SELECTOR` before loading the UR Adapters to determine which backends should be loaded.
+
+ .. note::
+
+ This environment variable is default enabled on Linux, but default disabled on Windows.
+
Service identifiers
---------------------
diff --git a/scripts/core/common.yml b/scripts/core/common.yml
index d06333eb07..5df4a7c04e 100644
--- a/scripts/core/common.yml
+++ b/scripts/core/common.yml
@@ -39,6 +39,12 @@ desc: "Microsoft-specific dllexport storage-class attribute"
condition: "defined(_WIN32)"
name: $X_APIEXPORT
value: __declspec(dllexport)
+--- #--------------------------------------------------------------------------
+type: macro
+desc: "GCC-specific dllexport storage-class attribute"
+condition: "__GNUC__ >= 4"
+name: $X_APIEXPORT
+value: __attribute__ ((visibility ("default")))
altvalue: ""
--- #--------------------------------------------------------------------------
type: macro
diff --git a/scripts/core/device.yml b/scripts/core/device.yml
index 23c0233ef7..c063466b22 100644
--- a/scripts/core/device.yml
+++ b/scripts/core/device.yml
@@ -365,7 +365,7 @@ etors:
- name: SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS
desc: "[$x_bool_t] support sub group independent forward progress"
- name: SUB_GROUP_SIZES_INTEL
- desc: "[uint32_t[]] return an array of sub group sizes supported on Intel device"
+ desc: "[uint32_t[]] return an array of supported sub group sizes"
- name: USM_HOST_SUPPORT
desc: "[$x_device_usm_access_capability_flags_t] support USM host memory access"
- name: USM_DEVICE_SUPPORT
diff --git a/scripts/generate_code.py b/scripts/generate_code.py
index 64386bb5e3..0c7476ab42 100644
--- a/scripts/generate_code.py
+++ b/scripts/generate_code.py
@@ -108,6 +108,26 @@ def _mako_print_cpp(path, namespace, tags, version, specs, meta):
specs=specs,
meta=meta)
+
+def _mako_api_funcs(path, namespace, tags, version, revision, specs, meta):
+ template = "api_funcs.def.mako"
+ fin = os.path.join(templates_dir, template)
+
+ name = "%s_api_funcs"%(namespace)
+ filename = "%s.def"%(name)
+ fout = os.path.join(path, filename)
+
+ print("Generating %s..."%fout)
+ return util.makoWrite(
+ fin, fout,
+ name=name,
+ ver=version,
+ rev=revision,
+ namespace=namespace,
+ tags=tags,
+ specs=specs,
+ meta=meta)
+
"""
generates c/c++ files from the specification documents
"""
@@ -116,6 +136,7 @@ def _generate_api_cpp(incpath, srcpath, namespace, tags, version, revision, spec
loc += _mako_api_cpp(srcpath, namespace, tags, version, revision, specs, meta)
loc += _mako_ddi_h(incpath, namespace, tags, version, revision, specs, meta)
loc += _mako_print_hpp(incpath, namespace, tags, version, revision, specs, meta)
+ loc += _mako_api_funcs(incpath, namespace, tags, version, revision, specs, meta)
return loc
@@ -379,6 +400,32 @@ def generate_loader(path, section, namespace, tags, version, specs, meta):
)
print("Generated %s lines of code.\n"%loc)
+"""
+ generates c/c++ files from the specification documents
+"""
+def _mako_interface_loader_api(path, adapter, ext, namespace, tags, version, specs, meta):
+ dstpath = os.path.join(path, adapter)
+ os.makedirs(dstpath, exist_ok=True)
+
+ template = f"ur_interface_loader.{ext}.mako"
+ fin = os.path.join(templates_dir, template)
+
+ name = f"ur_interface_loader"
+
+ filename = f"{name}.{ext}"
+ fout = os.path.join(dstpath, filename)
+
+ print("Generating %s..."%fout)
+ return util.makoWrite(
+ fin, fout,
+ name=name,
+ adapter=adapter,
+ ver=version,
+ namespace=namespace,
+ tags=tags,
+ specs=specs,
+ meta=meta,)
+
"""
Entry-point:
generates adapter for unified_runtime
@@ -395,6 +442,10 @@ def generate_adapters(path, section, namespace, tags, version, specs, meta):
loc += _mako_linker_scripts(
dstpath, "adapter", "def", namespace, tags, version, specs, meta
)
+
+ loc += _mako_interface_loader_api(dstpath, "level_zero", "cpp", namespace, tags, version, specs, meta)
+ loc += _mako_interface_loader_api(dstpath, "level_zero", "hpp", namespace, tags, version, specs, meta)
+
print("Generated %s lines of code.\n"%loc)
"""
diff --git a/scripts/templates/api_funcs.def.mako b/scripts/templates/api_funcs.def.mako
new file mode 100644
index 0000000000..f0fb653208
--- /dev/null
+++ b/scripts/templates/api_funcs.def.mako
@@ -0,0 +1,35 @@
+<%!
+import re
+from templates import helper as th
+%><%
+ n=namespace
+ N=n.upper()
+
+ x=tags['$x']
+ X=x.upper()
+%>
+/*
+ *
+ * Copyright (C) 2024 Intel Corporation
+ *
+ * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See LICENSE.TXT
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * @file ${name}.def
+ * @version v${ver}-r${rev}
+ *
+ */
+
+ // Auto-generated file, do not edit.
+
+%for tbl in th.get_pfntables(specs, meta, n, tags):
+%for obj in tbl['functions']:
+_UR_API(${th.make_func_name(n, tags, obj)})
+%endfor
+%endfor
+%for obj in th.get_loader_functions(specs, meta, n, tags):
+%if n + "Loader" in obj:
+_UR_API(${obj})
+%endif
+%endfor
diff --git a/scripts/templates/ldrddi.cpp.mako b/scripts/templates/ldrddi.cpp.mako
index 44631cc360..9c797a0ec3 100644
--- a/scripts/templates/ldrddi.cpp.mako
+++ b/scripts/templates/ldrddi.cpp.mako
@@ -365,6 +365,10 @@ ${tbl['export']['name']}(
// Load the device-platform DDI tables
for( auto& platform : ur_loader::getContext()->platforms )
{
+ // statically linked adapter inside of the loader
+ if (platform.handle == nullptr)
+ continue;
+
if(platform.initStatus != ${X}_RESULT_SUCCESS)
continue;
auto getTable = reinterpret_cast<${tbl['pfn']}>(
diff --git a/scripts/templates/print.hpp.mako b/scripts/templates/print.hpp.mako
index 9bf427b889..4180231ea4 100644
--- a/scripts/templates/print.hpp.mako
+++ b/scripts/templates/print.hpp.mako
@@ -411,6 +411,11 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
%endfor
%endfor
+inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const ur_bool_t value) {
+ os << (value ? "true" : "false");
+ return os;
+}
+
namespace ${x}::details {
///////////////////////////////////////////////////////////////////////////////
// @brief Print pointer value
diff --git a/scripts/templates/queue_api.cpp.mako b/scripts/templates/queue_api.cpp.mako
index f941c7ba03..fcfa89d258 100644
--- a/scripts/templates/queue_api.cpp.mako
+++ b/scripts/templates/queue_api.cpp.mako
@@ -24,8 +24,9 @@ from templates import helper as th
ur_queue_handle_t_::~ur_queue_handle_t_() {}
## FUNCTION ###################################################################
+namespace ${x}::level_zero {
%for obj in th.get_queue_related_functions(specs, n, tags):
-${X}_APIEXPORT ${x}_result_t ${X}_APICALL
+${x}_result_t
${th.make_func_name(n, tags, obj)}(
%for line in th.make_param_lines(n, tags, obj, format=["name", "type", "delim"]):
${line}
@@ -35,3 +36,4 @@ ${th.make_func_name(n, tags, obj)}(
return ${obj['params'][0]['name']}->${th.transform_queue_related_function_name(n, tags, obj, format=["name"])};
}
%endfor
+}
\ No newline at end of file
diff --git a/scripts/templates/ur_interface_loader.cpp.mako b/scripts/templates/ur_interface_loader.cpp.mako
new file mode 100644
index 0000000000..3298b5bcae
--- /dev/null
+++ b/scripts/templates/ur_interface_loader.cpp.mako
@@ -0,0 +1,88 @@
+<%!
+import re
+from templates import helper as th
+%><%
+ n=namespace
+ N=n.upper()
+
+ x=tags['$x']
+ X=x.upper()
+ Adapter=adapter.upper()
+%>//===--------- ${n}_interface_loader.cpp - Level Zero Adapter ------------===//
+//
+// Copyright (C) 2024 Intel Corporation
+//
+// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
+// Exceptions. See LICENSE.TXT
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include <${n}_api.h>
+#include <${n}_ddi.h>
+
+#include "ur_interface_loader.hpp"
+
+static ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) {
+ if (nullptr == pDdiTable) {
+ return UR_RESULT_ERROR_INVALID_NULL_POINTER;
+ }
+ // Pre 1.0 we enforce loader and adapter must have same version.
+ // Post 1.0 only major version match should be required.
+ if (version != UR_API_VERSION_CURRENT) {
+ return UR_RESULT_ERROR_UNSUPPORTED_VERSION;
+ }
+ return UR_RESULT_SUCCESS;
+}
+
+#ifdef UR_STATIC_ADAPTER_${Adapter}
+namespace ${n}::${adapter} {
+#elif defined(__cplusplus)
+extern "C" {
+#endif
+
+%for tbl in th.get_pfntables(specs, meta, n, tags):
+${X}_APIEXPORT ${x}_result_t ${X}_APICALL ${tbl['export']['name']}(
+ %for line in th.make_param_lines(n, tags, tbl['export'], format=["type", "name", "delim"]):
+ ${line}
+ %endfor
+ )
+{
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
+ }
+
+ %for obj in tbl['functions']:
+ pDdiTable->${th.append_ws(th.make_pfn_name(n, tags, obj), 43)} = ${n}::${adapter}::${th.make_func_name(n, tags, obj)};
+ %endfor
+
+ return result;
+}
+
+%endfor
+
+#ifdef UR_STATIC_ADAPTER_${Adapter}
+} // namespace ur::${adapter}
+#elif defined(__cplusplus)
+} // extern "C"
+#endif
+
+#ifdef UR_STATIC_ADAPTER_${Adapter}
+namespace ur::${adapter} {
+ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) {
+ if (ddi == nullptr) {
+ return UR_RESULT_ERROR_INVALID_NULL_POINTER;
+ }
+
+ ur_result_t result;
+
+%for tbl in th.get_pfntables(specs, meta, n, tags):
+ result = ${n}::${adapter}::${tbl['export']['name']}( ${X}_API_VERSION_CURRENT, &ddi->${tbl['name']} );
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+%endfor
+
+ return result;
+}
+}
+#endif
diff --git a/scripts/templates/ur_interface_loader.hpp.mako b/scripts/templates/ur_interface_loader.hpp.mako
new file mode 100644
index 0000000000..e2902f93c8
--- /dev/null
+++ b/scripts/templates/ur_interface_loader.hpp.mako
@@ -0,0 +1,38 @@
+<%!
+import re
+from templates import helper as th
+%><%
+ n=namespace
+ N=n.upper()
+
+ x=tags['$x']
+ X=x.upper()
+ Adapter=adapter.upper()
+%>//===--------- ${n}_interface_loader.hpp - Level Zero Adapter ------------===//
+//
+// Copyright (C) 2024 Intel Corporation
+//
+// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
+// Exceptions. See LICENSE.TXT
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include <${n}_api.h>
+#include <${n}_ddi.h>
+
+namespace ${n}::${adapter} {
+%for s in specs:
+%for obj in th.filter_items(s['objects'], 'type', 'function'):
+%if not th.obj_traits.is_loader_only(obj):
+${x}_result_t ${th.make_func_name(n, tags, obj)}(
+ %for line in th.make_param_lines(n, tags, obj, format=["type", "name", "delim"]):
+ ${line}
+ %endfor
+ );
+%endif
+%endfor
+%endfor
+#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO
+ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi);
+#endif
+}
diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp
index bbaaa27cdb..9c8a0c807c 100644
--- a/source/adapters/cuda/device.cpp
+++ b/source/adapters/cuda/device.cpp
@@ -57,12 +57,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
return ReturnValue(4318u);
}
case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: {
- int ComputeUnits = 0;
- UR_CHECK_ERROR(cuDeviceGetAttribute(
- &ComputeUnits, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- hDevice->get()));
- detail::ur::assertion(ComputeUnits >= 0);
- return ReturnValue(static_cast(ComputeUnits));
+ return ReturnValue(hDevice->getNumComputeUnits());
}
case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: {
return ReturnValue(MaxWorkItemDimensions);
diff --git a/source/adapters/cuda/device.hpp b/source/adapters/cuda/device.hpp
index 0a40329026..3654f2bb36 100644
--- a/source/adapters/cuda/device.hpp
+++ b/source/adapters/cuda/device.hpp
@@ -32,6 +32,7 @@ struct ur_device_handle_t_ {
int MaxCapacityLocalMem{0};
int MaxChosenLocalMem{0};
bool MaxLocalMemSizeChosen{false};
+ uint32_t NumComputeUnits{0};
public:
ur_device_handle_t_(native_type cuDevice, CUcontext cuContext, CUevent evBase,
@@ -54,6 +55,10 @@ struct ur_device_handle_t_ {
sizeof(MaxWorkGroupSize), &MaxWorkGroupSize,
nullptr));
+ UR_CHECK_ERROR(cuDeviceGetAttribute(
+ reinterpret_cast(&NumComputeUnits),
+ CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, cuDevice));
+
// Set local mem max size if env var is present
static const char *LocalMemSizePtrUR =
std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE");
@@ -107,6 +112,8 @@ struct ur_device_handle_t_ {
int getMaxChosenLocalMem() const noexcept { return MaxChosenLocalMem; };
bool maxLocalMemSizeChosen() { return MaxLocalMemSizeChosen; };
+
+ uint32_t getNumComputeUnits() const noexcept { return NumComputeUnits; };
};
int getAttribute(ur_device_handle_t Device, CUdevice_attribute Attribute);
diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp
index e2960573aa..427fde70e6 100644
--- a/source/adapters/cuda/image.cpp
+++ b/source/adapters/cuda/image.cpp
@@ -759,13 +759,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
cpy_desc.dstZ = pCopyRegion->dstOffset.z;
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
cpy_desc.srcHost = pSrc;
- cpy_desc.srcPitch = pCopyRegion->copyExtent.width * PixelSizeBytes;
- cpy_desc.srcHeight = pCopyRegion->copyExtent.height;
+ cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes;
+ cpy_desc.srcHeight = std::max(uint64_t{1}, pSrcImageDesc->height);
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
cpy_desc.dstArray = (CUarray)pDst;
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height);
- cpy_desc.Depth = pDstImageDesc->arraySize;
+ cpy_desc.Depth = pCopyRegion->copyExtent.depth;
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
}
} else if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) {
@@ -855,10 +855,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
cpy_desc.dstHost = pDst;
cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes;
- cpy_desc.dstHeight = pDstImageDesc->height;
+ cpy_desc.dstHeight = std::max(uint64_t{1}, pDstImageDesc->height);
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height);
- cpy_desc.Depth = pSrcImageDesc->arraySize;
+ cpy_desc.Depth = pCopyRegion->copyExtent.depth;
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
}
} else {
@@ -932,7 +932,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
cpy_desc.dstArray = (CUarray)pDst;
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height);
- cpy_desc.Depth = pSrcImageDesc->arraySize;
+ cpy_desc.Depth = pCopyRegion->copyExtent.depth;
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
}
// Synchronization is required here to handle the case of copying data
diff --git a/source/adapters/cuda/kernel.cpp b/source/adapters/cuda/kernel.cpp
index d43bd046dc..2061893744 100644
--- a/source/adapters/cuda/kernel.cpp
+++ b/source/adapters/cuda/kernel.cpp
@@ -167,10 +167,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle(
UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, size_t localWorkSize,
size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) {
- (void)hKernel;
- (void)localWorkSize;
- (void)dynamicSharedMemorySize;
- *pGroupCountRet = 1;
+ UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_KERNEL);
+
+ // We need to set the active current device for this kernel explicitly here,
+ // because the occupancy querying API does not take device parameter.
+ ur_device_handle_t Device = hKernel->getProgram()->getDevice();
+ ScopedContext Active(Device);
+ try {
+ // We need to calculate max num of work-groups using per-device semantics.
+
+ int MaxNumActiveGroupsPerCU{0};
+ UR_CHECK_ERROR(cuOccupancyMaxActiveBlocksPerMultiprocessor(
+ &MaxNumActiveGroupsPerCU, hKernel->get(), localWorkSize,
+ dynamicSharedMemorySize));
+ detail::ur::assertion(MaxNumActiveGroupsPerCU >= 0);
+ // Handle the case where we can't have all SMs active with at least 1 group
+ // per SM. In that case, the device is still able to run 1 work-group, hence
+ // we will manually check if it is possible with the available HW resources.
+ if (MaxNumActiveGroupsPerCU == 0) {
+ size_t MaxWorkGroupSize{};
+ urKernelGetGroupInfo(
+ hKernel, Device, UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE,
+ sizeof(MaxWorkGroupSize), &MaxWorkGroupSize, nullptr);
+ size_t MaxLocalSizeBytes{};
+ urDeviceGetInfo(Device, UR_DEVICE_INFO_LOCAL_MEM_SIZE,
+ sizeof(MaxLocalSizeBytes), &MaxLocalSizeBytes, nullptr);
+ if (localWorkSize > MaxWorkGroupSize ||
+ dynamicSharedMemorySize > MaxLocalSizeBytes ||
+ hasExceededMaxRegistersPerBlock(Device, hKernel, localWorkSize))
+ *pGroupCountRet = 0;
+ else
+ *pGroupCountRet = 1;
+ } else {
+ // Multiply by the number of SMs (CUs = compute units) on the device in
+ // order to retreive the total number of groups/blocks that can be
+ // launched.
+ *pGroupCountRet = Device->getNumComputeUnits() * MaxNumActiveGroupsPerCU;
+ }
+ } catch (ur_result_t Err) {
+ return Err;
+ }
return UR_RESULT_SUCCESS;
}
diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt
index 164eae7521..2e77e434ca 100644
--- a/source/adapters/hip/CMakeLists.txt
+++ b/source/adapters/hip/CMakeLists.txt
@@ -8,8 +8,13 @@ set(TARGET_NAME ur_adapter_hip)
# Set default UR HIP platform to AMD
set(UR_HIP_PLATFORM "AMD" CACHE STRING "UR HIP platform, AMD or NVIDIA")
+set(DEFAULT_ROCM_PATH "/opt/rocm")
+if(DEFINED ENV{ROCM_PATH})
+ set(DEFAULT_ROCM_PATH $ENV{ROCM_PATH})
+endif()
+
# Set default ROCm installation directory
-set(UR_HIP_ROCM_DIR "/opt/rocm" CACHE STRING "ROCm installation dir")
+set(UR_HIP_ROCM_DIR "${DEFAULT_ROCM_PATH}" CACHE STRING "ROCm installation dir")
# Allow custom location of HIP/HSA include and HIP library directories
set(UR_HIP_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/include" CACHE PATH
"Custom ROCm HIP include dir")
diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt
index 05bf05e0a7..cc05d36084 100644
--- a/source/adapters/level_zero/CMakeLists.txt
+++ b/source/adapters/level_zero/CMakeLists.txt
@@ -40,7 +40,7 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR)
set(UR_LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git")
endif()
if (UR_LEVEL_ZERO_LOADER_TAG STREQUAL "")
- set(UR_LEVEL_ZERO_LOADER_TAG v1.17.6)
+ set(UR_LEVEL_ZERO_LOADER_TAG v1.17.39)
endif()
# Disable due to a bug https://github.com/oneapi-src/level-zero/issues/104
@@ -73,27 +73,33 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR)
${level-zero-loader_SOURCE_DIR}/include CACHE PATH "Path to Level Zero Headers")
endif()
-add_library (LevelZeroLoader INTERFACE)
+add_library(LevelZeroLoader INTERFACE)
# The MSVC linker does not like / at the start of a path, so to work around this
# we split it into a link library and a library path, where the path is allowed
# to have leading /.
get_filename_component(LEVEL_ZERO_LIBRARY_SRC "${LEVEL_ZERO_LIBRARY}" DIRECTORY)
get_filename_component(LEVEL_ZERO_LIB_NAME "${LEVEL_ZERO_LIBRARY}" NAME)
target_link_directories(LevelZeroLoader
- INTERFACE "${LEVEL_ZERO_LIBRARY_SRC}"
+ INTERFACE "$"
+ "$"
)
target_link_libraries(LevelZeroLoader
INTERFACE "${LEVEL_ZERO_LIB_NAME}"
)
-add_library (LevelZeroLoader-Headers INTERFACE)
+add_library(LevelZeroLoader-Headers INTERFACE)
target_include_directories(LevelZeroLoader-Headers
- INTERFACE "${LEVEL_ZERO_INCLUDE_DIR}"
+ INTERFACE "$"
+ "$"
)
if(UR_BUILD_ADAPTER_L0)
- add_ur_adapter(ur_adapter_level_zero
- SHARED
+ set(ADAPTER_LIB_TYPE SHARED)
+ if(UR_STATIC_ADAPTER_L0)
+ set(ADAPTER_LIB_TYPE STATIC)
+ endif()
+
+ add_ur_adapter(ur_adapter_level_zero ${ADAPTER_LIB_TYPE}
${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
@@ -109,10 +115,10 @@ if(UR_BUILD_ADAPTER_L0)
${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp
${CMAKE_CURRENT_SOURCE_DIR}/program.hpp
- ${CMAKE_CURRENT_SOURCE_DIR}/queue_api.hpp
${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp
${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.hpp
${CMAKE_CURRENT_SOURCE_DIR}/ur_level_zero.cpp
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/context.cpp
@@ -127,14 +133,28 @@ if(UR_BUILD_ADAPTER_L0)
${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
${CMAKE_CURRENT_SOURCE_DIR}/program.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/queue_api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp
${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image.cpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
)
+ if(UR_STATIC_ADAPTER_L0)
+ target_compile_definitions(ur_adapter_level_zero PUBLIC UR_STATIC_ADAPTER_LEVEL_ZERO)
+
+ # 'utils' target from 'level-zero-loader' includes path which is prefixed
+ # in the source directory, this breaks the installation of 'utils' target.
+ set_target_properties(utils PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "")
+ install(TARGETS ur_adapter_level_zero ur_umf LevelZeroLoader LevelZeroLoader-Headers ze_loader utils
+ EXPORT ${PROJECT_NAME}-targets
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ )
+ endif()
+
if(NOT WIN32)
target_sources(ur_adapter_level_zero
PRIVATE
@@ -154,7 +174,7 @@ if(UR_BUILD_ADAPTER_L0)
if (WIN32)
# 0x800: Search for the DLL only in the System32 folder
- target_link_options(ur_adapter_level_zero PUBLIC /DEPENDENTLOADFLAG:0x800)
+ target_link_options(ur_adapter_level_zero PRIVATE /DEPENDENTLOADFLAG:0x800)
endif()
target_link_libraries(ur_adapter_level_zero PRIVATE
@@ -181,6 +201,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp
${CMAKE_CURRENT_SOURCE_DIR}/program.hpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.hpp
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device.cpp
@@ -188,6 +209,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
${CMAKE_CURRENT_SOURCE_DIR}/program.cpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
# v2-only sources
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp
@@ -199,6 +221,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/kernel.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/v2/memory.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.hpp
@@ -211,6 +234,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_normal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/kernel.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/v2/memory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_create.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp
index ed52254ec3..eaabb70a29 100644
--- a/source/adapters/level_zero/adapter.cpp
+++ b/source/adapters/level_zero/adapter.cpp
@@ -289,7 +289,8 @@ ur_result_t adapterStateTeardown() {
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(
+namespace ur::level_zero {
+ur_result_t urAdapterGet(
uint32_t NumEntries, ///< [in] the number of platforms to be added to
///< phAdapters. If phAdapters is not NULL, then
///< NumEntries should be greater than zero, otherwise
@@ -330,7 +331,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) {
+ur_result_t urAdapterRelease(ur_adapter_handle_t) {
// Check first if the Adapter pointer is valid
if (GlobalAdapter) {
std::lock_guard Lock{GlobalAdapter->Mutex};
@@ -342,7 +343,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) {
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) {
+ur_result_t urAdapterRetain(ur_adapter_handle_t) {
if (GlobalAdapter) {
std::lock_guard Lock{GlobalAdapter->Mutex};
GlobalAdapter->RefCount++;
@@ -351,7 +352,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) {
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError(
+ur_result_t urAdapterGetLastError(
ur_adapter_handle_t, ///< [in] handle of the platform instance
const char **Message, ///< [out] pointer to a C string where the adapter
///< specific error message will be stored.
@@ -364,11 +365,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError(
return ErrorMessageCode;
}
-UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t,
- ur_adapter_info_t PropName,
- size_t PropSize,
- void *PropValue,
- size_t *PropSizeRet) {
+ur_result_t urAdapterGetInfo(ur_adapter_handle_t, ur_adapter_info_t PropName,
+ size_t PropSize, void *PropValue,
+ size_t *PropSizeRet) {
UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet);
switch (PropName) {
@@ -382,3 +381,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t,
return UR_RESULT_SUCCESS;
}
+} // namespace ur::level_zero
diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp
index e507730888..1bf4f26716 100644
--- a/source/adapters/level_zero/command_buffer.cpp
+++ b/source/adapters/level_zero/command_buffer.cpp
@@ -10,6 +10,7 @@
#include "command_buffer.hpp"
#include "helpers/kernel_helpers.hpp"
#include "logger/ur_logger.hpp"
+#include "ur_interface_loader.hpp"
#include "ur_level_zero.hpp"
/* L0 Command-buffer Extension Doc see:
@@ -297,16 +298,16 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
IsUpdatable(Desc ? Desc->isUpdatable : false),
IsProfilingEnabled(Desc ? Desc->enableProfiling : false),
IsInOrderCmdList(IsInOrderCmdList) {
- urContextRetain(Context);
- urDeviceRetain(Device);
+ ur::level_zero::urContextRetain(Context);
+ ur::level_zero::urDeviceRetain(Device);
}
void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() {
// Release the memory allocated to the Context stored in the command_buffer
- urContextRelease(Context);
+ ur::level_zero::urContextRelease(Context);
// Release the device
- urDeviceRelease(Device);
+ ur::level_zero::urDeviceRelease(Device);
// Release the memory allocated to the CommandList stored in the
// command_buffer
@@ -376,7 +377,7 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() {
for (auto &AssociatedKernel : KernelsList) {
ReleaseIndirectMem(AssociatedKernel);
- urKernelRelease(AssociatedKernel);
+ ur::level_zero::urKernelRelease(AssociatedKernel);
}
}
@@ -387,16 +388,16 @@ ur_exp_command_buffer_command_handle_t_::
ur_kernel_handle_t Kernel = nullptr)
: CommandBuffer(CommandBuffer), CommandId(CommandId), WorkDim(WorkDim),
UserDefinedLocalSize(UserDefinedLocalSize), Kernel(Kernel) {
- urCommandBufferRetainExp(CommandBuffer);
+ ur::level_zero::urCommandBufferRetainExp(CommandBuffer);
if (Kernel)
- urKernelRetain(Kernel);
+ ur::level_zero::urKernelRetain(Kernel);
}
ur_exp_command_buffer_command_handle_t_::
~ur_exp_command_buffer_command_handle_t_() {
- urCommandBufferReleaseExp(CommandBuffer);
+ ur::level_zero::urCommandBufferReleaseExp(CommandBuffer);
if (Kernel)
- urKernelRelease(Kernel);
+ ur::level_zero::urKernelRelease(Kernel);
}
void ur_exp_command_buffer_handle_t_::registerSyncPoint(
@@ -433,7 +434,7 @@ ur_result_t ur_exp_command_buffer_handle_t_::getFenceForQueue(
return UR_RESULT_SUCCESS;
}
-namespace {
+namespace ur::level_zero {
/**
* Creates a L0 command list
@@ -493,9 +494,8 @@ bool canBeInOrder(ur_context_handle_t Context,
? (CommandBufferDesc ? CommandBufferDesc->isInOrder : false)
: false;
}
-} // namespace
-UR_APIEXPORT ur_result_t UR_APICALL
+ur_result_t
urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
const ur_exp_command_buffer_desc_t *CommandBufferDesc,
ur_exp_command_buffer_handle_t *CommandBuffer) {
@@ -567,13 +567,13 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL
+ur_result_t
urCommandBufferRetainExp(ur_exp_command_buffer_handle_t CommandBuffer) {
CommandBuffer->RefCount.increment();
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL
+ur_result_t
urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) {
if (!CommandBuffer->RefCount.decrementAndTest())
return UR_RESULT_SUCCESS;
@@ -583,7 +583,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) {
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL
+ur_result_t
urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) {
UR_ASSERT(CommandBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER);
// It is not allowed to append to command list from multiple threads.
@@ -627,8 +627,6 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) {
return UR_RESULT_SUCCESS;
}
-namespace {
-
/**
* Sets the global offset for a kernel command that will be appended to the
* command buffer.
@@ -730,9 +728,8 @@ createCommandHandle(ur_exp_command_buffer_handle_t CommandBuffer,
return UR_RESULT_SUCCESS;
}
-} // namespace
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
+ur_result_t urCommandBufferAppendKernelLaunchExp(
ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel,
uint32_t WorkDim, const size_t *GlobalWorkOffset,
const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
@@ -769,7 +766,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
// is in use. Once the event has been signaled, the code in
// CleanupCompletedEvent(Event) will do a urKernelRelease to update the
// reference count on the kernel, using the kernel saved in CommandData.
- UR_CALL(urKernelRetain(Kernel));
+ UR_CALL(ur::level_zero::urKernelRetain(Kernel));
if (Command && CommandBuffer->IsUpdatable) {
UR_CALL(createCommandHandle(CommandBuffer, Kernel, WorkDim, LocalWorkSize,
@@ -790,7 +787,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp(
+ur_result_t urCommandBufferAppendUSMMemcpyExp(
ur_exp_command_buffer_handle_t CommandBuffer, void *Dst, const void *Src,
size_t Size, uint32_t NumSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *SyncPointWaitList,
@@ -812,7 +809,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp(
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
+ur_result_t urCommandBufferAppendMemBufferCopyExp(
ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t SrcMem,
ur_mem_handle_t DstMem, size_t SrcOffset, size_t DstOffset, size_t Size,
uint32_t NumSyncPointsInWaitList,
@@ -842,7 +839,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
SyncPointWaitList, SyncPoint);
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
+ur_result_t urCommandBufferAppendMemBufferCopyRectExp(
ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t SrcMem,
ur_mem_handle_t DstMem, ur_rect_offset_t SrcOrigin,
ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch,
@@ -875,7 +872,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
SyncPointWaitList, SyncPoint);
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp(
+ur_result_t urCommandBufferAppendMemBufferWriteExp(
ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer,
size_t Offset, size_t Size, const void *Src,
uint32_t NumSyncPointsInWaitList,
@@ -897,7 +894,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp(
SyncPoint);
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp(
+ur_result_t urCommandBufferAppendMemBufferWriteRectExp(
ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer,
ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset,
ur_rect_region_t Region, size_t BufferRowPitch, size_t BufferSlicePitch,
@@ -922,7 +919,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp(
SyncPointWaitList, SyncPoint);
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp(
+ur_result_t urCommandBufferAppendMemBufferReadExp(
ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer,
size_t Offset, size_t Size, void *Dst, uint32_t NumSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *SyncPointWaitList,
@@ -942,7 +939,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp(
SyncPoint);
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp(
+ur_result_t urCommandBufferAppendMemBufferReadRectExp(
ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer,
ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset,
ur_rect_region_t Region, size_t BufferRowPitch, size_t BufferSlicePitch,
@@ -966,7 +963,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp(
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp(
+ur_result_t urCommandBufferAppendUSMPrefetchExp(
ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size,
ur_usm_migration_flags_t Flags, uint32_t NumSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *SyncPointWaitList,
@@ -1005,7 +1002,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp(
+ur_result_t urCommandBufferAppendUSMAdviseExp(
ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size,
ur_usm_advice_flags_t Advice, uint32_t NumSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *SyncPointWaitList,
@@ -1067,7 +1064,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
+ur_result_t urCommandBufferAppendMemBufferFillExp(
ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer,
const void *Pattern, size_t PatternSize, size_t Offset, size_t Size,
uint32_t NumSyncPointsInWaitList,
@@ -1088,7 +1085,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp(
+ur_result_t urCommandBufferAppendUSMFillExp(
ur_exp_command_buffer_handle_t CommandBuffer, void *Ptr,
const void *Pattern, size_t PatternSize, size_t Size,
uint32_t NumSyncPointsInWaitList,
@@ -1102,8 +1099,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp(
Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
}
-namespace {
-
/**
* Gets an L0 command queue that supports the chosen engine.
* @param[in] Queue The UR queue used to submit the command buffer.
@@ -1112,8 +1107,7 @@ namespace {
* @param[out] ZeCommandQueue The L0 command queue.
* @return UR_RESULT_SUCCESS or an error code on failure
*/
-ur_result_t getZeCommandQueue(ur_queue_handle_legacy_t Queue,
- bool UseCopyEngine,
+ur_result_t getZeCommandQueue(ur_queue_handle_t Queue, bool UseCopyEngine,
ze_command_queue_handle_t &ZeCommandQueue) {
auto &QGroup = Queue->getQueueGroup(UseCopyEngine);
uint32_t QueueGroupOrdinal;
@@ -1130,7 +1124,7 @@ ur_result_t getZeCommandQueue(ur_queue_handle_legacy_t Queue,
* @return UR_RESULT_SUCCESS or an error code on failure
*/
ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer,
- ur_queue_handle_legacy_t Queue,
+ ur_queue_handle_t Queue,
uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList) {
const bool UseCopyEngine = false;
@@ -1182,7 +1176,7 @@ ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer,
* @return UR_RESULT_SUCCESS or an error code on failure
*/
ur_result_t createUserEvent(ur_exp_command_buffer_handle_t CommandBuffer,
- ur_queue_handle_legacy_t Queue,
+ ur_queue_handle_t Queue,
ur_command_list_ptr_t SignalCommandList,
ur_event_handle_t *Event) {
// Execution event for this enqueue of the UR command-buffer
@@ -1226,13 +1220,12 @@ ur_result_t createUserEvent(ur_exp_command_buffer_handle_t CommandBuffer,
return UR_RESULT_SUCCESS;
}
-} // namespace
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
- ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t UrQueue,
- uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
- ur_event_handle_t *Event) {
- auto Queue = Legacy(UrQueue);
+ur_result_t
+urCommandBufferEnqueueExp(ur_exp_command_buffer_handle_t CommandBuffer,
+ ur_queue_handle_t Queue, uint32_t NumEventsInWaitList,
+ const ur_event_handle_t *EventWaitList,
+ ur_event_handle_t *Event) {
std::scoped_lock Lock(Queue->Mutex);
ze_command_queue_handle_t ZeCommandQueue;
@@ -1294,13 +1287,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainCommandExp(
+ur_result_t urCommandBufferRetainCommandExp(
ur_exp_command_buffer_command_handle_t Command) {
Command->RefCount.increment();
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp(
+ur_result_t urCommandBufferReleaseCommandExp(
ur_exp_command_buffer_command_handle_t Command) {
if (!Command->RefCount.decrementAndTest())
return UR_RESULT_SUCCESS;
@@ -1309,8 +1302,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp(
return UR_RESULT_SUCCESS;
}
-namespace {
-
/**
* Validates contents of the update command description.
* @param[in] Command The command which is being updated.
@@ -1620,9 +1611,8 @@ ur_result_t updateKernelCommand(
return UR_RESULT_SUCCESS;
}
-} // namespace
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
+ur_result_t urCommandBufferUpdateKernelLaunchExp(
ur_exp_command_buffer_command_handle_t Command,
const ur_exp_command_buffer_update_kernel_launch_desc_t *CommandDesc) {
UR_ASSERT(Command->Kernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
@@ -1653,10 +1643,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp(
- ur_exp_command_buffer_handle_t hCommandBuffer,
- ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue,
- size_t *pPropSizeRet) {
+ur_result_t
+urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer,
+ ur_exp_command_buffer_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet) {
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
switch (propName) {
@@ -1669,10 +1660,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp(
return UR_RESULT_ERROR_INVALID_ENUMERATION;
}
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp(
- ur_exp_command_buffer_command_handle_t Command,
- ur_exp_command_buffer_command_info_t PropName, size_t PropSize,
- void *PropValue, size_t *PropSizeRet) {
+ur_result_t
+urCommandBufferCommandGetInfoExp(ur_exp_command_buffer_command_handle_t Command,
+ ur_exp_command_buffer_command_info_t PropName,
+ size_t PropSize, void *PropValue,
+ size_t *PropSizeRet) {
UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet);
switch (PropName) {
@@ -1684,3 +1676,5 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp(
return UR_RESULT_ERROR_INVALID_ENUMERATION;
}
+
+} // namespace ur::level_zero
diff --git a/source/adapters/level_zero/common.hpp b/source/adapters/level_zero/common.hpp
index b7d0a4a913..6dd8a614c5 100644
--- a/source/adapters/level_zero/common.hpp
+++ b/source/adapters/level_zero/common.hpp
@@ -19,7 +19,7 @@
#include
#include
-#include
+#include
#include
#include
diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp
index 452189d038..296e3e98d5 100644
--- a/source/adapters/level_zero/context.cpp
+++ b/source/adapters/level_zero/context.cpp
@@ -18,7 +18,9 @@
#include "queue.hpp"
#include "ur_level_zero.hpp"
-UR_APIEXPORT ur_result_t UR_APICALL urContextCreate(
+namespace ur::level_zero {
+
+ur_result_t urContextCreate(
uint32_t DeviceCount, ///< [in] the number of devices given in phDevices
const ur_device_handle_t
*Devices, ///< [in][range(0, DeviceCount)] array of handle of devices.
@@ -53,7 +55,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urContextRetain(
+ur_result_t urContextRetain(
ur_context_handle_t
Context ///< [in] handle of the context to get a reference of.
) {
@@ -61,7 +63,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextRetain(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urContextRelease(
+ur_result_t urContextRelease(
ur_context_handle_t Context ///< [in] handle of the context to release.
) {
ur_platform_handle_t Plt = Context->getPlatform();
@@ -85,7 +87,7 @@ static const bool UseMemcpy2DOperations = [] {
return std::atoi(UseMemcpy2DOperationsFlag) > 0;
}();
-UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo(
+ur_result_t urContextGetInfo(
ur_context_handle_t Context, ///< [in] handle of the context
ur_context_info_t ContextInfoType, ///< [in] type of the info to retrieve
size_t PropSize, ///< [in] the number of bytes of memory pointed to by
@@ -133,7 +135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle(
+ur_result_t urContextGetNativeHandle(
ur_context_handle_t Context, ///< [in] handle of the context.
ur_native_handle_t *NativeContext ///< [out] a pointer to the native
///< handle of the context.
@@ -142,7 +144,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle(
+ur_result_t urContextCreateWithNativeHandle(
ur_native_handle_t
NativeContext, ///< [in] the native handle of the context.
ur_adapter_handle_t, uint32_t NumDevices, const ur_device_handle_t *Devices,
@@ -166,7 +168,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter(
+ur_result_t urContextSetExtendedDeleter(
ur_context_handle_t Context, ///< [in] handle of the context.
ur_context_extended_deleter_t
Deleter, ///< [in] Function pointer to extended deleter.
@@ -180,6 +182,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter(
"{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
+} // namespace ur::level_zero
ur_result_t ur_context_handle_t_::initialize() {
@@ -509,7 +512,7 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
// Create one event ZePool per MaxNumEventsPerPool events
if (*ZePool == nullptr) {
ze_event_pool_counter_based_exp_desc_t counterBasedExt = {
- ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC};
+ ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC, nullptr, 0};
ZeStruct ZeEventPoolDesc;
ZeEventPoolDesc.count = MaxNumEventsPerPool;
ZeEventPoolDesc.flags = 0;
@@ -527,6 +530,8 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
counterBasedExt.flags =
ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE;
}
+ logger::debug("ze_event_pool_desc_t counter based flags set to: {}",
+ counterBasedExt.flags);
ZeEventPoolDesc.pNext = &counterBasedExt;
}
@@ -576,8 +581,8 @@ void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) {
std::scoped_lock Lock(EventCacheMutex);
ur_device_handle_t Device = nullptr;
- if (!Event->IsMultiDevice && Legacy(Event->UrQueue)) {
- Device = Legacy(Event->UrQueue)->Device;
+ if (!Event->IsMultiDevice && Event->UrQueue) {
+ Device = Event->UrQueue->Device;
}
auto Cache = getEventCache(Event->isHostVisible(),
@@ -598,10 +603,10 @@ ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) {
ze_device_handle_t ZeDevice = nullptr;
bool UsingImmediateCommandlists =
- !Legacy(Event->UrQueue) || Legacy(Event->UrQueue)->UsingImmCmdLists;
+ !Event->UrQueue || Event->UrQueue->UsingImmCmdLists;
- if (!Event->IsMultiDevice && Legacy(Event->UrQueue)) {
- ZeDevice = Legacy(Event->UrQueue)->Device->ZeDevice;
+ if (!Event->IsMultiDevice && Event->UrQueue) {
+ ZeDevice = Event->UrQueue->Device->ZeDevice;
}
std::list *ZePoolCache = getZeEventPoolCache(
@@ -644,7 +649,7 @@ static const size_t CmdListsCleanupThreshold = [] {
// Retrieve an available command list to be used in a PI call.
ur_result_t ur_context_handle_t_::getAvailableCommandList(
- ur_queue_handle_legacy_t Queue, ur_command_list_ptr_t &CommandList,
+ ur_queue_handle_t Queue, ur_command_list_ptr_t &CommandList,
bool UseCopyEngine, uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList, bool AllowBatching,
ze_command_queue_handle_t *ForcedCmdQueue) {
@@ -767,9 +772,11 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList(
CommandList =
Queue->CommandListMap
.emplace(ZeCommandList,
- ur_command_list_info_t(ZeFence, true, false,
- ZeCommandQueue, ZeQueueDesc,
- Queue->useCompletionBatching()))
+ ur_command_list_info_t(
+ ZeFence, true, false, ZeCommandQueue, ZeQueueDesc,
+ Queue->useCompletionBatching(), true,
+ ZeCommandListIt->second.InOrderList,
+ ZeCommandListIt->second.IsImmediate))
.first;
}
ZeCommandListCache.erase(ZeCommandListIt);
diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp
index a1212f0698..c2fbba633f 100644
--- a/source/adapters/level_zero/context.hpp
+++ b/source/adapters/level_zero/context.hpp
@@ -18,7 +18,7 @@
#include
#include
-#include
+#include
#include
#include
@@ -297,7 +297,7 @@ struct ur_context_handle_t_ : _ur_object {
// for executing on this device. Immediate commandlists are created only
// once for each SYCL Queue and after that they are reused.
ur_result_t getAvailableCommandList(
- ur_queue_handle_legacy_t Queue, ur_command_list_ptr_t &CommandList,
+ ur_queue_handle_t Queue, ur_command_list_ptr_t &CommandList,
bool UseCopyEngine, uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList, bool AllowBatching = false,
ze_command_queue_handle_t *ForcedCmdQueue = nullptr);
diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp
index de2bee3789..e6cb650420 100644
--- a/source/adapters/level_zero/device.cpp
+++ b/source/adapters/level_zero/device.cpp
@@ -10,13 +10,59 @@
#include "device.hpp"
#include "adapter.hpp"
#include "logger/ur_logger.hpp"
+#include "ur_interface_loader.hpp"
#include "ur_level_zero.hpp"
#include "ur_util.hpp"
#include
#include
#include
-UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(
+// UR_L0_USE_COPY_ENGINE can be set to an integer value, or
+// a pair of integer values of the form "lower_index:upper_index".
+// Here, the indices point to copy engines in a list of all available copy
+// engines.
+// This functions returns this pair of indices.
+// If the user specifies only a single integer, a value of 0 indicates that
+// the copy engines will not be used at all. A value of 1 indicates that all
+// available copy engines can be used.
+const std::pair
+getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) {
+ const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE");
+ const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE");
+ static const char *EnvVar = UrRet ? UrRet : (PiRet ? PiRet : nullptr);
+ // If the environment variable is not set, no copy engines are used when
+ // immediate commandlists are being used. For standard commandlists all are
+ // used.
+ if (!EnvVar) {
+ if (Device->ImmCommandListUsed)
+ return std::pair(0, 0); // Only main copy engine will be used.
+ return std::pair(0, INT_MAX); // All copy engines will be used.
+ }
+ std::string CopyEngineRange = EnvVar;
+ // Environment variable can be a single integer or a pair of integers
+ // separated by ":"
+ auto pos = CopyEngineRange.find(":");
+ if (pos == std::string::npos) {
+ bool UseCopyEngine = (std::stoi(CopyEngineRange) != 0);
+ if (UseCopyEngine)
+ return std::pair(0, INT_MAX); // All copy engines can be used.
+ return std::pair(-1, -1); // No copy engines will be used.
+ }
+ int LowerCopyEngineIndex = std::stoi(CopyEngineRange.substr(0, pos));
+ int UpperCopyEngineIndex = std::stoi(CopyEngineRange.substr(pos + 1));
+ if ((LowerCopyEngineIndex > UpperCopyEngineIndex) ||
+ (LowerCopyEngineIndex < -1) || (UpperCopyEngineIndex < -1)) {
+ logger::error("UR_L0_LEVEL_ZERO_USE_COPY_ENGINE: invalid value provided, "
+ "default set.");
+ LowerCopyEngineIndex = 0;
+ UpperCopyEngineIndex = INT_MAX;
+ }
+ return std::pair(LowerCopyEngineIndex, UpperCopyEngineIndex);
+}
+
+namespace ur::level_zero {
+
+ur_result_t urDeviceGet(
ur_platform_handle_t Platform, ///< [in] handle of the platform instance
ur_device_type_t DeviceType, ///< [in] the type of the devices.
uint32_t NumEntries, ///< [in] the number of devices to be added to
@@ -143,7 +189,7 @@ uint64_t calculateGlobalMemSize(ur_device_handle_t Device) {
return Device->ZeGlobalMemSize.operator->()->value;
}
-UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
+ur_result_t urDeviceGetInfo(
ur_device_handle_t Device, ///< [in] handle of the device instance
ur_device_info_t ParamName, ///< [in] type of the info to retrieve
size_t propSize, ///< [in] the number of bytes pointed to by ParamValue.
@@ -1068,158 +1114,353 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
return UR_RESULT_SUCCESS;
}
-// UR_L0_USE_COPY_ENGINE can be set to an integer value, or
-// a pair of integer values of the form "lower_index:upper_index".
-// Here, the indices point to copy engines in a list of all available copy
-// engines.
-// This functions returns this pair of indices.
-// If the user specifies only a single integer, a value of 0 indicates that
-// the copy engines will not be used at all. A value of 1 indicates that all
-// available copy engines can be used.
-const std::pair
-getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) {
- const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE");
- const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE");
- static const char *EnvVar = UrRet ? UrRet : (PiRet ? PiRet : nullptr);
- // If the environment variable is not set, no copy engines are used when
- // immediate commandlists are being used. For standard commandlists all are
- // used.
- if (!EnvVar) {
- if (Device->ImmCommandListUsed)
- return std::pair(0, 0); // Only main copy engine will be used.
- return std::pair(0, INT_MAX); // All copy engines will be used.
- }
- std::string CopyEngineRange = EnvVar;
- // Environment variable can be a single integer or a pair of integers
- // separated by ":"
- auto pos = CopyEngineRange.find(":");
- if (pos == std::string::npos) {
- bool UseCopyEngine = (std::stoi(CopyEngineRange) != 0);
- if (UseCopyEngine)
- return std::pair(0, INT_MAX); // All copy engines can be used.
- return std::pair(-1, -1); // No copy engines will be used.
- }
- int LowerCopyEngineIndex = std::stoi(CopyEngineRange.substr(0, pos));
- int UpperCopyEngineIndex = std::stoi(CopyEngineRange.substr(pos + 1));
- if ((LowerCopyEngineIndex > UpperCopyEngineIndex) ||
- (LowerCopyEngineIndex < -1) || (UpperCopyEngineIndex < -1)) {
- logger::error("UR_L0_LEVEL_ZERO_USE_COPY_ENGINE: invalid value provided, "
- "default set.");
- LowerCopyEngineIndex = 0;
- UpperCopyEngineIndex = INT_MAX;
- }
- return std::pair(LowerCopyEngineIndex, UpperCopyEngineIndex);
-}
-
bool CopyEngineRequested(const ur_device_handle_t &Device) {
int LowerCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).first;
int UpperCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).second;
return ((LowerCopyQueueIndex != -1) || (UpperCopyQueueIndex != -1));
}
-// Whether immediate commandlists will be used for kernel launches and copies.
-// The default is standard commandlists. Setting 1 or 2 specifies use of
-// immediate commandlists. Note: when immediate commandlists are used then
-// device-only events must be either AllHostVisible or OnDemandHostVisibleProxy.
-// (See env var UR_L0_DEVICE_SCOPE_EVENTS).
-
-// Get value of immediate commandlists env var setting or -1 if unset
-ur_device_handle_t_::ImmCmdlistMode
-ur_device_handle_t_::useImmediateCommandLists() {
- // If immediate commandlist setting is not explicitly set, then use the device
- // default.
- // TODO: confirm this is good once make_queue revert is added
- static const int ImmediateCommandlistsSetting = [] {
- const char *UrRet = std::getenv("UR_L0_USE_IMMEDIATE_COMMANDLISTS");
- const char *PiRet =
- std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS");
- const char *ImmediateCommandlistsSettingStr =
- UrRet ? UrRet : (PiRet ? PiRet : nullptr);
- if (!ImmediateCommandlistsSettingStr)
- return -1;
- return std::atoi(ImmediateCommandlistsSettingStr);
- }();
-
- if (ImmediateCommandlistsSetting == -1) {
- bool isDG2SupportedDriver =
- this->Platform->isDriverVersionNewerOrSimilar(1, 5, 30820);
- if ((isDG2SupportedDriver && isDG2()) || isPVC()) {
- return PerQueue;
- } else {
- return NotUsed;
+ur_result_t urDevicePartition(
+ ur_device_handle_t Device, ///< [in] handle of the device to partition.
+ const ur_device_partition_properties_t
+ *Properties, ///< [in] Device partition properties.
+ uint32_t NumDevices, ///< [in] the number of sub-devices.
+ ur_device_handle_t
+ *OutDevices, ///< [out][optional][range(0, NumDevices)] array of handle
+ ///< of devices. If NumDevices is less than the number of
+ ///< sub-devices available, then the function shall only
+ ///< retrieve that number of sub-devices.
+ uint32_t *NumDevicesRet ///< [out][optional] pointer to the number of
+ ///< sub-devices the device can be partitioned into
+ ///< according to the partitioning property.
+) {
+ // Other partitioning ways are not supported by Level Zero
+ UR_ASSERT(Properties->PropCount == 1, UR_RESULT_ERROR_INVALID_VALUE);
+ if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) {
+ if ((Properties->pProperties->value.affinity_domain !=
+ UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE &&
+ Properties->pProperties->value.affinity_domain !=
+ UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) {
+ return UR_RESULT_ERROR_INVALID_VALUE;
}
+ } else if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) {
+ if (Properties->pProperties->value.affinity_domain != 0) {
+ return UR_RESULT_ERROR_INVALID_VALUE;
+ }
+ } else {
+ return UR_RESULT_ERROR_INVALID_VALUE;
}
- switch (ImmediateCommandlistsSetting) {
- case 0:
- return NotUsed;
- case 1:
- return PerQueue;
- case 2:
- return PerThreadPerQueue;
- default:
- return NotUsed;
- }
-}
-bool ur_device_handle_t_::useRelaxedAllocationLimits() {
- static const bool EnableRelaxedAllocationLimits = [] {
- auto UrRet = ur_getenv("UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS");
- const bool RetVal = UrRet ? std::stoi(*UrRet) : 0;
- return RetVal;
- }();
+ // Devices cache is normally created in piDevicesGet but still make
+ // sure that cache is populated.
+ //
+ auto Res = Device->Platform->populateDeviceCacheIfNeeded();
+ if (Res != UR_RESULT_SUCCESS) {
+ return Res;
+ }
- return EnableRelaxedAllocationLimits;
-}
+ auto EffectiveNumDevices = [&]() -> decltype(Device->SubDevices.size()) {
+ if (Device->SubDevices.size() == 0)
+ return 0;
-bool ur_device_handle_t_::useDriverInOrderLists() {
- // Use in-order lists implementation from L0 driver instead
- // of adapter's implementation.
+ // Sub-Sub-Devices are partitioned by CSlices, not by affinity domain.
+ // However, if
+ // UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that
+ // still expose CSlices in partitioning by affinity domain for compatibility
+ // reasons.
+ if (Properties->pProperties->type ==
+ UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN &&
+ !ExposeCSliceInAffinityPartitioning) {
+ if (Device->isSubDevice()) {
+ return 0;
+ }
+ }
+ if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) {
+ // Not a CSlice-based partitioning.
+ if (!Device->SubDevices[0]->isCCS()) {
+ return 0;
+ }
+ }
- static const bool UseDriverInOrderLists = [&] {
- const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS");
- bool CompatibleDriver = this->Platform->isDriverVersionNewerOrSimilar(
- 1, 3, L0_DRIVER_INORDER_MIN_VERSION);
- if (!UrRet)
- return CompatibleDriver;
- return std::atoi(UrRet) != 0;
+ return Device->SubDevices.size();
}();
- return UseDriverInOrderLists;
-}
-
-ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
- int SubSubDeviceIndex) {
- // Maintain various device properties cache.
- // Note that we just describe here how to compute the data.
- // The real initialization is upon first access.
+ // TODO: Consider support for partitioning to <= total sub-devices.
+ // Currently supported partitioning (by affinity domain/numa) would always
+ // partition to all sub-devices.
//
- auto ZeDevice = this->ZeDevice;
- ZeDeviceProperties.Compute = [ZeDevice](ze_device_properties_t &Properties) {
- ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &Properties));
- };
-
- ZeDeviceComputeProperties.Compute =
- [ZeDevice](ze_device_compute_properties_t &Properties) {
- ZE_CALL_NOCHECK(zeDeviceGetComputeProperties, (ZeDevice, &Properties));
- };
+ if (NumDevices != 0)
+ UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE);
- ZeDeviceIpVersionExt.Compute =
- [ZeDevice](ze_device_ip_version_ext_t &Properties) {
- ze_device_properties_t P;
- P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
- P.pNext = (void *)&Properties;
- ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P));
- };
+ for (uint32_t I = 0; I < NumDevices; I++) {
+ auto prop = Properties->pProperties[0];
+ if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) {
+ // In case the value is NEXT_PARTITIONABLE, we need to change it to the
+ // chosen domain. This will always be NUMA since that's the only domain
+ // supported by level zero.
+ prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA;
+ }
+ Device->SubDevices[I]->SubDeviceCreationProperty = prop;
- ZeDeviceImageProperties.Compute =
- [ZeDevice](ze_device_image_properties_t &Properties) {
- ZE_CALL_NOCHECK(zeDeviceGetImageProperties, (ZeDevice, &Properties));
- };
+ OutDevices[I] = Device->SubDevices[I];
+ // reusing the same pi_device needs to increment the reference count
+ ur::level_zero::urDeviceRetain(OutDevices[I]);
+ }
- ZeDeviceModuleProperties.Compute =
- [ZeDevice](ze_device_module_properties_t &Properties) {
- ZE_CALL_NOCHECK(zeDeviceGetModuleProperties, (ZeDevice, &Properties));
- };
+ if (NumDevicesRet) {
+ *NumDevicesRet = EffectiveNumDevices;
+ }
+ return UR_RESULT_SUCCESS;
+}
+
+ur_result_t urDeviceSelectBinary(
+ ur_device_handle_t
+ Device, ///< [in] handle of the device to select binary for.
+ const ur_device_binary_t
+ *Binaries, ///< [in] the array of binaries to select from.
+ uint32_t NumBinaries, ///< [in] the number of binaries passed in ppBinaries.
+ ///< Must greater than or equal to zero otherwise
+ ///< ::UR_RESULT_ERROR_INVALID_VALUE is returned.
+ uint32_t
+ *SelectedBinary ///< [out] the index of the selected binary in the input
+ ///< array of binaries. If a suitable binary was not
+ ///< found the function returns ${X}_INVALID_BINARY.
+) {
+ std::ignore = Device;
+ // TODO: this is a bare-bones implementation for choosing a device image
+ // that would be compatible with the targeted device. An AOT-compiled
+ // image is preferred over SPIR-V for known devices (i.e. Intel devices)
+ // The implementation makes no effort to differentiate between multiple images
+ // for the given device, and simply picks the first one compatible.
+ //
+ // Real implementation will use the same mechanism OpenCL ICD dispatcher
+ // uses. Something like:
+ // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT);
+ // return context->dispatch->piextDeviceSelectIR(
+ // ctx, images, num_images, selected_image);
+ // where context->dispatch is set to the dispatch table provided by PI
+ // plugin for platform/device the ctx was created for.
+
+ // Look for GEN binary, which we known can only be handled by Level-Zero now.
+ const char *BinaryTarget =
+ UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; // UR_DEVICE_BINARY_TARGET_SPIRV64_GEN;
+
+ uint32_t *SelectedBinaryInd = SelectedBinary;
+
+ // Find the appropriate device image, fallback to spirv if not found
+ constexpr uint32_t InvalidInd = (std::numeric_limits::max)();
+ uint32_t Spirv = InvalidInd;
+
+ for (uint32_t i = 0; i < NumBinaries; ++i) {
+ if (strcmp(Binaries[i].pDeviceTargetSpec, BinaryTarget) == 0) {
+ *SelectedBinaryInd = i;
+ return UR_RESULT_SUCCESS;
+ }
+ if (strcmp(Binaries[i].pDeviceTargetSpec,
+ UR_DEVICE_BINARY_TARGET_SPIRV64) == 0)
+ Spirv = i;
+ }
+ // Points to a spirv image, if such indeed was found
+ if ((*SelectedBinaryInd = Spirv) != InvalidInd)
+ return UR_RESULT_SUCCESS;
+
+ // No image can be loaded for the given device
+ return UR_RESULT_ERROR_INVALID_BINARY;
+}
+
+ur_result_t urDeviceGetNativeHandle(
+ ur_device_handle_t Device, ///< [in] handle of the device.
+ ur_native_handle_t
+ *NativeDevice ///< [out] a pointer to the native handle of the device.
+) {
+ *NativeDevice = reinterpret_cast(Device->ZeDevice);
+ return UR_RESULT_SUCCESS;
+}
+
+ur_result_t urDeviceCreateWithNativeHandle(
+ ur_native_handle_t NativeDevice, ///< [in] the native handle of the device.
+ [[maybe_unused]] ur_adapter_handle_t
+ Adapter, ///< [in] handle of the platform instance
+ [[maybe_unused]] const ur_device_native_properties_t
+ *Properties, ///< [in][optional] pointer to native device properties
+ ///< struct.
+ ur_device_handle_t
+ *Device ///< [out] pointer to the handle of the device object created.
+) {
+ auto ZeDevice = ur_cast(NativeDevice);
+
+ // The SYCL spec requires that the set of devices must remain fixed for the
+ // duration of the application's execution. We assume that we found all of the
+ // Level Zero devices when we initialized the platforms/devices cache, so the
+ // "NativeHandle" must already be in the cache. If it is not, this must not be
+ // a valid Level Zero device.
+
+ ur_device_handle_t Dev = nullptr;
+ if (const auto *platforms = GlobalAdapter->PlatformCache->get_value()) {
+ for (const auto &p : *platforms) {
+ Dev = p->getDeviceFromNativeHandle(ZeDevice);
+ }
+ } else {
+ return GlobalAdapter->PlatformCache->get_error();
+ }
+
+ if (Dev == nullptr)
+ return UR_RESULT_ERROR_INVALID_VALUE;
+
+ *Device = Dev;
+ return UR_RESULT_SUCCESS;
+}
+
+ur_result_t urDeviceGetGlobalTimestamps(
+ ur_device_handle_t Device, ///< [in] handle of the device instance
+ uint64_t *DeviceTimestamp, ///< [out][optional] pointer to the Device's
+ ///< global timestamp that correlates with the
+ ///< Host's global timestamp value
+ uint64_t *HostTimestamp ///< [out][optional] pointer to the Host's global
+ ///< timestamp that correlates with the Device's
+ ///< global timestamp value
+) {
+ const uint64_t &ZeTimerResolution =
+ Device->ZeDeviceProperties->timerResolution;
+ const uint64_t TimestampMaxCount = Device->getTimestampMask();
+ uint64_t DeviceClockCount, Dummy;
+
+ ZE2UR_CALL(zeDeviceGetGlobalTimestamps,
+ (Device->ZeDevice,
+ HostTimestamp == nullptr ? &Dummy : HostTimestamp,
+ &DeviceClockCount));
+
+ if (DeviceTimestamp != nullptr) {
+ *DeviceTimestamp =
+ (DeviceClockCount & TimestampMaxCount) * ZeTimerResolution;
+ }
+
+ return UR_RESULT_SUCCESS;
+}
+
+ur_result_t urDeviceRetain(ur_device_handle_t Device) {
+ // The root-device ref-count remains unchanged (always 1).
+ if (Device->isSubDevice()) {
+ Device->RefCount.increment();
+ }
+ return UR_RESULT_SUCCESS;
+}
+
+ur_result_t urDeviceRelease(ur_device_handle_t Device) {
+ // Root devices are destroyed during the piTearDown process.
+ if (Device->isSubDevice()) {
+ if (Device->RefCount.decrementAndTest()) {
+ delete Device;
+ }
+ }
+
+ return UR_RESULT_SUCCESS;
+}
+} // namespace ur::level_zero
+
+// Whether immediate commandlists will be used for kernel launches and copies.
+// The default is standard commandlists. Setting 1 or 2 specifies use of
+// immediate commandlists. Note: when immediate commandlists are used then
+// device-only events must be either AllHostVisible or OnDemandHostVisibleProxy.
+// (See env var UR_L0_DEVICE_SCOPE_EVENTS).
+
+// Get value of immediate commandlists env var setting or -1 if unset
+ur_device_handle_t_::ImmCmdlistMode
+ur_device_handle_t_::useImmediateCommandLists() {
+ // If immediate commandlist setting is not explicitly set, then use the device
+ // default.
+ // TODO: confirm this is good once make_queue revert is added
+ static const int ImmediateCommandlistsSetting = [] {
+ const char *UrRet = std::getenv("UR_L0_USE_IMMEDIATE_COMMANDLISTS");
+ const char *PiRet =
+ std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS");
+ const char *ImmediateCommandlistsSettingStr =
+ UrRet ? UrRet : (PiRet ? PiRet : nullptr);
+ if (!ImmediateCommandlistsSettingStr)
+ return -1;
+ return std::atoi(ImmediateCommandlistsSettingStr);
+ }();
+
+ if (ImmediateCommandlistsSetting == -1) {
+ bool isDG2SupportedDriver =
+ this->Platform->isDriverVersionNewerOrSimilar(1, 5, 30820);
+ if ((isDG2SupportedDriver && isDG2()) || isPVC()) {
+ return PerQueue;
+ } else {
+ return NotUsed;
+ }
+ }
+ switch (ImmediateCommandlistsSetting) {
+ case 0:
+ return NotUsed;
+ case 1:
+ return PerQueue;
+ case 2:
+ return PerThreadPerQueue;
+ default:
+ return NotUsed;
+ }
+}
+
+bool ur_device_handle_t_::useRelaxedAllocationLimits() {
+ static const bool EnableRelaxedAllocationLimits = [] {
+ auto UrRet = ur_getenv("UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS");
+ const bool RetVal = UrRet ? std::stoi(*UrRet) : 0;
+ return RetVal;
+ }();
+
+ return EnableRelaxedAllocationLimits;
+}
+
+bool ur_device_handle_t_::useDriverInOrderLists() {
+ // Use in-order lists implementation from L0 driver instead
+ // of adapter's implementation.
+
+ static const bool UseDriverInOrderLists = [&] {
+ const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS");
+ bool CompatibleDriver = this->Platform->isDriverVersionNewerOrSimilar(
+ 1, 3, L0_DRIVER_INORDER_MIN_VERSION);
+ if (!UrRet)
+ return CompatibleDriver;
+ return std::atoi(UrRet) != 0;
+ }();
+
+ return UseDriverInOrderLists;
+}
+
+ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
+ int SubSubDeviceIndex) {
+ // Maintain various device properties cache.
+ // Note that we just describe here how to compute the data.
+ // The real initialization is upon first access.
+ //
+ auto ZeDevice = this->ZeDevice;
+ ZeDeviceProperties.Compute = [ZeDevice](ze_device_properties_t &Properties) {
+ ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &Properties));
+ };
+
+ ZeDeviceComputeProperties.Compute =
+ [ZeDevice](ze_device_compute_properties_t &Properties) {
+ ZE_CALL_NOCHECK(zeDeviceGetComputeProperties, (ZeDevice, &Properties));
+ };
+
+ ZeDeviceIpVersionExt.Compute =
+ [ZeDevice](ze_device_ip_version_ext_t &Properties) {
+ ze_device_properties_t P;
+ P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
+ P.pNext = (void *)&Properties;
+ ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P));
+ };
+
+ ZeDeviceImageProperties.Compute =
+ [ZeDevice](ze_device_image_properties_t &Properties) {
+ ZE_CALL_NOCHECK(zeDeviceGetImageProperties, (ZeDevice, &Properties));
+ };
+
+ ZeDeviceModuleProperties.Compute =
+ [ZeDevice](ze_device_module_properties_t &Properties) {
+ ZE_CALL_NOCHECK(zeDeviceGetModuleProperties, (ZeDevice, &Properties));
+ };
ZeDeviceMemoryProperties.Compute =
[ZeDevice](
@@ -1314,7 +1555,7 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
return UR_RESULT_ERROR_UNKNOWN;
}
- if (CopyEngineRequested((ur_device_handle_t)this)) {
+ if (ur::level_zero::CopyEngineRequested((ur_device_handle_t)this)) {
for (uint32_t i = 0; i < numQueueGroups; i++) {
if (((QueueGroupProperties[i].flags &
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0) &&
@@ -1355,26 +1596,6 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
return UR_RESULT_SUCCESS;
}
-ur_result_t urDeviceRetain(ur_device_handle_t Device) {
-
- // The root-device ref-count remains unchanged (always 1).
- if (Device->isSubDevice()) {
- Device->RefCount.increment();
- }
- return UR_RESULT_SUCCESS;
-}
-
-ur_result_t urDeviceRelease(ur_device_handle_t Device) {
- // Root devices are destroyed during the piTearDown process.
- if (Device->isSubDevice()) {
- if (Device->RefCount.decrementAndTest()) {
- delete Device;
- }
- }
-
- return UR_RESULT_SUCCESS;
-}
-
void ZeDriverVersionStringExtension::setZeDriverVersionString(
ur_platform_handle_t_ *Platform) {
// Check if Intel Driver Version String is available. If yes, save the API
@@ -1442,221 +1663,3 @@ void ZeUSMImportExtension::doZeUSMRelease(ze_driver_handle_t DriverHandle,
void *HostPtr) {
ZE_CALL_NOCHECK(zexDriverReleaseImportedPointer, (DriverHandle, HostPtr));
}
-
-UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition(
- ur_device_handle_t Device, ///< [in] handle of the device to partition.
- const ur_device_partition_properties_t
- *Properties, ///< [in] Device partition properties.
- uint32_t NumDevices, ///< [in] the number of sub-devices.
- ur_device_handle_t
- *OutDevices, ///< [out][optional][range(0, NumDevices)] array of handle
- ///< of devices. If NumDevices is less than the number of
- ///< sub-devices available, then the function shall only
- ///< retrieve that number of sub-devices.
- uint32_t *NumDevicesRet ///< [out][optional] pointer to the number of
- ///< sub-devices the device can be partitioned into
- ///< according to the partitioning property.
-) {
- // Other partitioning ways are not supported by Level Zero
- UR_ASSERT(Properties->PropCount == 1, UR_RESULT_ERROR_INVALID_VALUE);
- if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) {
- if ((Properties->pProperties->value.affinity_domain !=
- UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE &&
- Properties->pProperties->value.affinity_domain !=
- UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) {
- return UR_RESULT_ERROR_INVALID_VALUE;
- }
- } else if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) {
- if (Properties->pProperties->value.affinity_domain != 0) {
- return UR_RESULT_ERROR_INVALID_VALUE;
- }
- } else {
- return UR_RESULT_ERROR_INVALID_VALUE;
- }
-
- // Devices cache is normally created in piDevicesGet but still make
- // sure that cache is populated.
- //
- auto Res = Device->Platform->populateDeviceCacheIfNeeded();
- if (Res != UR_RESULT_SUCCESS) {
- return Res;
- }
-
- auto EffectiveNumDevices = [&]() -> decltype(Device->SubDevices.size()) {
- if (Device->SubDevices.size() == 0)
- return 0;
-
- // Sub-Sub-Devices are partitioned by CSlices, not by affinity domain.
- // However, if
- // UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that
- // still expose CSlices in partitioning by affinity domain for compatibility
- // reasons.
- if (Properties->pProperties->type ==
- UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN &&
- !ExposeCSliceInAffinityPartitioning) {
- if (Device->isSubDevice()) {
- return 0;
- }
- }
- if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) {
- // Not a CSlice-based partitioning.
- if (!Device->SubDevices[0]->isCCS()) {
- return 0;
- }
- }
-
- return Device->SubDevices.size();
- }();
-
- // TODO: Consider support for partitioning to <= total sub-devices.
- // Currently supported partitioning (by affinity domain/numa) would always
- // partition to all sub-devices.
- //
- if (NumDevices != 0)
- UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE);
-
- for (uint32_t I = 0; I < NumDevices; I++) {
- auto prop = Properties->pProperties[0];
- if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) {
- // In case the value is NEXT_PARTITIONABLE, we need to change it to the
- // chosen domain. This will always be NUMA since that's the only domain
- // supported by level zero.
- prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA;
- }
- Device->SubDevices[I]->SubDeviceCreationProperty = prop;
-
- OutDevices[I] = Device->SubDevices[I];
- // reusing the same pi_device needs to increment the reference count
- urDeviceRetain(OutDevices[I]);
- }
-
- if (NumDevicesRet) {
- *NumDevicesRet = EffectiveNumDevices;
- }
- return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary(
- ur_device_handle_t
- Device, ///< [in] handle of the device to select binary for.
- const ur_device_binary_t
- *Binaries, ///< [in] the array of binaries to select from.
- uint32_t NumBinaries, ///< [in] the number of binaries passed in ppBinaries.
- ///< Must greater than or equal to zero otherwise
- ///< ::UR_RESULT_ERROR_INVALID_VALUE is returned.
- uint32_t
- *SelectedBinary ///< [out] the index of the selected binary in the input
- ///< array of binaries. If a suitable binary was not
- ///< found the function returns ${X}_INVALID_BINARY.
-) {
- std::ignore = Device;
- // TODO: this is a bare-bones implementation for choosing a device image
- // that would be compatible with the targeted device. An AOT-compiled
- // image is preferred over SPIR-V for known devices (i.e. Intel devices)
- // The implementation makes no effort to differentiate between multiple images
- // for the given device, and simply picks the first one compatible.
- //
- // Real implementation will use the same mechanism OpenCL ICD dispatcher
- // uses. Something like:
- // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT);
- // return context->dispatch->piextDeviceSelectIR(
- // ctx, images, num_images, selected_image);
- // where context->dispatch is set to the dispatch table provided by PI
- // plugin for platform/device the ctx was created for.
-
- // Look for GEN binary, which we known can only be handled by Level-Zero now.
- const char *BinaryTarget =
- UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; // UR_DEVICE_BINARY_TARGET_SPIRV64_GEN;
-
- uint32_t *SelectedBinaryInd = SelectedBinary;
-
- // Find the appropriate device image, fallback to spirv if not found
- constexpr uint32_t InvalidInd = (std::numeric_limits::max)();
- uint32_t Spirv = InvalidInd;
-
- for (uint32_t i = 0; i < NumBinaries; ++i) {
- if (strcmp(Binaries[i].pDeviceTargetSpec, BinaryTarget) == 0) {
- *SelectedBinaryInd = i;
- return UR_RESULT_SUCCESS;
- }
- if (strcmp(Binaries[i].pDeviceTargetSpec,
- UR_DEVICE_BINARY_TARGET_SPIRV64) == 0)
- Spirv = i;
- }
- // Points to a spirv image, if such indeed was found
- if ((*SelectedBinaryInd = Spirv) != InvalidInd)
- return UR_RESULT_SUCCESS;
-
- // No image can be loaded for the given device
- return UR_RESULT_ERROR_INVALID_BINARY;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle(
- ur_device_handle_t Device, ///< [in] handle of the device.
- ur_native_handle_t
- *NativeDevice ///< [out] a pointer to the native handle of the device.
-) {
- *NativeDevice = reinterpret_cast(Device->ZeDevice);
- return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
- ur_native_handle_t NativeDevice, ///< [in] the native handle of the device.
- [[maybe_unused]] ur_adapter_handle_t
- Adapter, ///< [in] handle of the platform instance
- [[maybe_unused]] const ur_device_native_properties_t
- *Properties, ///< [in][optional] pointer to native device properties
- ///< struct.
- ur_device_handle_t
- *Device ///< [out] pointer to the handle of the device object created.
-) {
- auto ZeDevice = ur_cast(NativeDevice);
-
- // The SYCL spec requires that the set of devices must remain fixed for the
- // duration of the application's execution. We assume that we found all of the
- // Level Zero devices when we initialized the platforms/devices cache, so the
- // "NativeHandle" must already be in the cache. If it is not, this must not be
- // a valid Level Zero device.
-
- ur_device_handle_t Dev = nullptr;
- if (const auto *platforms = GlobalAdapter->PlatformCache->get_value()) {
- for (const auto &p : *platforms) {
- Dev = p->getDeviceFromNativeHandle(ZeDevice);
- }
- } else {
- return GlobalAdapter->PlatformCache->get_error();
- }
-
- if (Dev == nullptr)
- return UR_RESULT_ERROR_INVALID_VALUE;
-
- *Device = Dev;
- return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(
- ur_device_handle_t Device, ///< [in] handle of the device instance
- uint64_t *DeviceTimestamp, ///< [out][optional] pointer to the Device's
- ///< global timestamp that correlates with the
- ///< Host's global timestamp value
- uint64_t *HostTimestamp ///< [out][optional] pointer to the Host's global
- ///< timestamp that correlates with the Device's
- ///< global timestamp value
-) {
- const uint64_t &ZeTimerResolution =
- Device->ZeDeviceProperties->timerResolution;
- const uint64_t TimestampMaxCount = Device->getTimestampMask();
- uint64_t DeviceClockCount, Dummy;
-
- ZE2UR_CALL(zeDeviceGetGlobalTimestamps,
- (Device->ZeDevice,
- HostTimestamp == nullptr ? &Dummy : HostTimestamp,
- &DeviceClockCount));
-
- if (DeviceTimestamp != nullptr) {
- *DeviceTimestamp =
- (DeviceClockCount & TimestampMaxCount) * ZeTimerResolution;
- }
-
- return UR_RESULT_SUCCESS;
-}
diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp
index 898edff779..a8b8098819 100644
--- a/source/adapters/level_zero/device.hpp
+++ b/source/adapters/level_zero/device.hpp
@@ -19,7 +19,7 @@
#include
#include
-#include
+#include
#include
#include
diff --git a/source/adapters/level_zero/enqueue_native.cpp b/source/adapters/level_zero/enqueue_native.cpp
index b67cccc4f1..7c3a1da988 100644
--- a/source/adapters/level_zero/enqueue_native.cpp
+++ b/source/adapters/level_zero/enqueue_native.cpp
@@ -8,13 +8,30 @@
//
//===----------------------------------------------------------------------===//
+#include
#include
+#include
-#include "queue.hpp"
+namespace ur::level_zero {
+
+ur_result_t urEnqueueNativeCommandExp(
+ ur_queue_handle_t hQueue,
+ ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data,
+ uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList,
+ const ur_exp_enqueue_native_command_properties_t *pProperties,
+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent) {
+ std::ignore = hQueue;
+ std::ignore = pfnNativeEnqueue;
+ std::ignore = data;
+ std::ignore = numMemsInMemList;
+ std::ignore = phMemList;
+ std::ignore = pProperties;
+ std::ignore = numEventsInWaitList;
+ std::ignore = phEventWaitList;
+ std::ignore = phEvent;
-ur_result_t ur_queue_handle_legacy_t_::enqueueNativeCommandExp(
- ur_exp_enqueue_native_command_function_t, void *, uint32_t,
- const ur_mem_handle_t *, const ur_exp_enqueue_native_command_properties_t *,
- uint32_t, const ur_event_handle_t *, ur_event_handle_t *) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
+
+} // namespace ur::level_zero
diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp
index f4dee0d661..f58db37753 100644
--- a/source/adapters/level_zero/event.cpp
+++ b/source/adapters/level_zero/event.cpp
@@ -18,6 +18,7 @@
#include "common.hpp"
#include "event.hpp"
#include "logger/ur_logger.hpp"
+#include "ur_interface_loader.hpp"
#include "ur_level_zero.hpp"
void printZeEventList(const _ur_ze_event_list_t &UrZeEventList) {
@@ -46,21 +47,23 @@ static const bool UseMultipleCmdlistBarriers = [] {
}();
bool WaitListEmptyOrAllEventsFromSameQueue(
- ur_queue_handle_legacy_t Queue, uint32_t NumEventsInWaitList,
+ ur_queue_handle_t Queue, uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList) {
if (!NumEventsInWaitList)
return true;
for (uint32_t i = 0; i < NumEventsInWaitList; ++i) {
- if (Queue != Legacy(EventWaitList[i]->UrQueue))
+ if (Queue != EventWaitList[i]->UrQueue)
return false;
}
return true;
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueEventsWait( ///< [in] handle of
- ///< the queue object
+namespace ur::level_zero {
+
+ur_result_t urEnqueueEventsWait(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
const ur_event_handle_t
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
@@ -72,7 +75,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueEventsWait( ///< [in] handle of
*OutEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
if (EventWaitList) {
bool UseCopyEngine = false;
@@ -152,9 +154,8 @@ static const bool InOrderBarrierBySignal = [] {
return (UrRet ? std::atoi(UrRet) : true);
}();
-ur_result_t
-ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the
- ///< queue object
+ur_result_t urEnqueueEventsWaitWithBarrier(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
const ur_event_handle_t
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
@@ -166,8 +167,6 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the
*OutEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
-
// Lock automatically releases when this goes out of scope.
std::scoped_lock lock(Queue->Mutex);
@@ -197,7 +196,9 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the
//
if (Queue->isInOrderQueue() && InOrderBarrierBySignal &&
!Queue->isProfilingEnabled()) {
- if (EventWaitList.Length) {
+ // If we are using driver in order lists, then append wait on events
+ // is unnecessary and we can signal the event created.
+ if (EventWaitList.Length && !CmdList->second.IsInOrderList) {
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
(CmdList->first, EventWaitList.Length,
EventWaitList.ZeEventList));
@@ -222,9 +223,8 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the
return UR_RESULT_SUCCESS;
}
- ur_event_handle_t InternalEvent;
+ ur_event_handle_t ResultEvent = nullptr;
bool IsInternal = OutEvent == nullptr;
- ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;
// For in-order queue and wait-list which is empty or has events from
// the same queue just use the last command event as the barrier event.
@@ -234,8 +234,11 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the
WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList,
EventWaitList) &&
Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) {
- UR_CALL(urEventRetain(Queue->LastCommandEvent));
- *Event = Queue->LastCommandEvent;
+ UR_CALL(ur::level_zero::urEventRetain(Queue->LastCommandEvent));
+ ResultEvent = Queue->LastCommandEvent;
+ if (OutEvent) {
+ *OutEvent = ResultEvent;
+ }
return UR_RESULT_SUCCESS;
}
@@ -265,16 +268,21 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the
EventWaitList, OkToBatch));
// Insert the barrier into the command-list and execute.
- UR_CALL(insertBarrierIntoCmdList(CmdList, TmpWaitList, *Event, IsInternal));
+ UR_CALL(insertBarrierIntoCmdList(CmdList, TmpWaitList, ResultEvent,
+ IsInternal));
UR_CALL(Queue->executeCommandList(CmdList, false, OkToBatch));
// Because of the dependency between commands in the in-order queue we don't
// need to keep track of any active barriers if we have in-order queue.
if (UseMultipleCmdlistBarriers && !Queue->isInOrderQueue()) {
- auto UREvent = reinterpret_cast(*Event);
+ auto UREvent = reinterpret_cast(ResultEvent);
Queue->ActiveBarriers.add(UREvent);
}
+
+ if (OutEvent) {
+ *OutEvent = ResultEvent;
+ }
return UR_RESULT_SUCCESS;
}
@@ -304,8 +312,8 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the
for (auto &QueueMap :
{Queue->ComputeQueueGroupsByTID, Queue->CopyQueueGroupsByTID})
for (auto &QueueGroup : QueueMap) {
- bool UseCopyEngine = QueueGroup.second.Type !=
- ur_queue_handle_legacy_t_::queue_type::Compute;
+ bool UseCopyEngine =
+ QueueGroup.second.Type != ur_queue_handle_t_::queue_type::Compute;
if (Queue->UsingImmCmdLists) {
// If immediate command lists are being used, each will act as their own
// queue, so we must insert a barrier into each.
@@ -362,20 +370,20 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the
// Insert a barrier with the events from each command-queue into the
// convergence command list. The resulting event signals the convergence of
// all barriers.
- UR_CALL(insertBarrierIntoCmdList(ConvergenceCmdList, BaseWaitList, *Event,
- IsInternal));
+ UR_CALL(insertBarrierIntoCmdList(ConvergenceCmdList, BaseWaitList,
+ ResultEvent, IsInternal));
} else {
// If there is only a single queue then insert a barrier and the single
// result event can be used as our active barrier and used as the return
// event. Take into account whether output event is discarded or not.
- UR_CALL(insertBarrierIntoCmdList(CmdLists[0], _ur_ze_event_list_t{}, *Event,
- IsInternal));
+ UR_CALL(insertBarrierIntoCmdList(CmdLists[0], _ur_ze_event_list_t{},
+ ResultEvent, IsInternal));
}
// Execute each command list so the barriers can be encountered.
for (ur_command_list_ptr_t &CmdList : CmdLists) {
- bool IsCopy = CmdList->second.isCopy(
- reinterpret_cast(Queue));
+ bool IsCopy =
+ CmdList->second.isCopy(reinterpret_cast(Queue));
const auto &CommandBatch =
(IsCopy) ? Queue->CopyCommandBatch : Queue->ComputeCommandBatch;
// Only batch if the matching CmdList is already open.
@@ -385,12 +393,14 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the
}
UR_CALL(Queue->ActiveBarriers.clear());
- auto UREvent = reinterpret_cast(*Event);
- Queue->ActiveBarriers.add(UREvent);
+ Queue->ActiveBarriers.add(ResultEvent);
+ if (OutEvent) {
+ *OutEvent = ResultEvent;
+ }
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(
+ur_result_t urEventGetInfo(
ur_event_handle_t Event, ///< [in] handle of the event object
ur_event_info_t PropName, ///< [in] the name of the event property to query
size_t PropValueSize, ///< [in] size in bytes of the event property value
@@ -419,7 +429,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(
// possible that this is trying to query some event's status that
// is part of the batch. This isn't strictly required, but it seems
// like a reasonable thing to do.
- auto UrQueue = Legacy(Event->UrQueue);
+ auto UrQueue = Event->UrQueue;
if (UrQueue) {
// Lock automatically releases when this goes out of scope.
std::unique_lock Lock(UrQueue->Mutex, std::try_to_lock);
@@ -473,7 +483,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
+ur_result_t urEventGetProfilingInfo(
ur_event_handle_t Event, ///< [in] handle of the event object
ur_profiling_info_t
PropName, ///< [in] the name of the profiling property to query
@@ -491,9 +501,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE;
}
- ur_device_handle_t Device = Legacy(Event->UrQueue)
- ? Legacy(Event->UrQueue)->Device
- : Event->Context->Devices[0];
+ ur_device_handle_t Device =
+ Event->UrQueue ? Event->UrQueue->Device : Event->Context->Devices[0];
uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution;
const uint64_t TimestampMaxValue = Device->getTimestampMask();
@@ -517,10 +526,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
return ReturnValue(Event->RecordEventEndTimestamp);
// Otherwise we need to collect it from the queue.
- auto Entry = Legacy(Event->UrQueue)->EndTimeRecordings.find(Event);
+ auto Entry = Event->UrQueue->EndTimeRecordings.find(Event);
// Unexpected state if there is no end-time record.
- if (Entry == Legacy(Event->UrQueue)->EndTimeRecordings.end())
+ if (Entry == Event->UrQueue->EndTimeRecordings.end())
return UR_RESULT_ERROR_UNKNOWN;
auto &EndTimeRecording = Entry->second;
@@ -545,7 +554,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
// anymore, so we cache it on the event and evict the record from the
// queue.
Event->RecordEventEndTimestamp = ContextEndTime;
- Legacy(Event->UrQueue)->EndTimeRecordings.erase(Entry);
+ Event->UrQueue->EndTimeRecordings.erase(Entry);
return ReturnValue(ContextEndTime);
}
@@ -663,7 +672,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp(
+ur_result_t urEnqueueTimestampRecordingExp(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
bool Blocking, ///< [in] blocking or non-blocking enqueue
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
const ur_event_handle_t
@@ -677,7 +687,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp(
*OutEvent ///< [in,out] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
// Lock automatically releases when this goes out of scope.
std::scoped_lock lock(Queue->Mutex);
@@ -701,12 +710,13 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp(
(*OutEvent)->WaitList = TmpWaitList;
uint64_t DeviceStartTimestamp = 0;
- UR_CALL(urDeviceGetGlobalTimestamps(Device, &DeviceStartTimestamp, nullptr));
+ UR_CALL(ur::level_zero::urDeviceGetGlobalTimestamps(
+ Device, &DeviceStartTimestamp, nullptr));
(*OutEvent)->RecordEventStartTimestamp = DeviceStartTimestamp;
// Create a new entry in the queue's recordings.
Queue->EndTimeRecordings[*OutEvent] =
- ur_queue_handle_legacy_t_::end_time_recording{};
+ ur_queue_handle_t_::end_time_recording{};
ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp,
(CommandList->first,
@@ -720,64 +730,15 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp(
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent(
- ze_event_handle_t &ZeHostVisibleEvent) {
- auto UrQueue = Legacy(this->UrQueue);
-
- std::scoped_lock Lock(UrQueue->Mutex,
- this->Mutex);
-
- if (!HostVisibleEvent) {
- this->IsCreatingHostProxyEvent = true;
- if (UrQueue->ZeEventsScope != OnDemandHostVisibleProxy)
- die("getOrCreateHostVisibleEvent: missing host-visible event");
-
- // Submit the command(s) signalling the proxy event to the queue.
- // We have to first submit a wait for the device-only event for which this
- // proxy is created.
- //
- // Get a new command list to be used on this call
-
- // We want to batch these commands to avoid extra submissions (costly)
- bool OkToBatch = true;
-
- ur_command_list_ptr_t CommandList{};
- UR_CALL(UrQueue->Context->getAvailableCommandList(
- UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch))
-
- // Create a "proxy" host-visible event.
- UR_CALL(createEventAndAssociateQueue(
- UrQueue, &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList,
- /* IsInternal */ false, /* IsMultiDevice */ false,
- /* HostVisible */ true));
-
- if (this->IsInnerBatchedEvent) {
- ZE2UR_CALL(zeCommandListAppendBarrier,
- (CommandList->first, ZeEvent, 0, nullptr));
- } else {
- ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
- (CommandList->first, 1, &ZeEvent));
- }
- ZE2UR_CALL(zeCommandListAppendSignalEvent,
- (CommandList->first, HostVisibleEvent->ZeEvent));
-
- UR_CALL(UrQueue->executeCommandList(CommandList, false, OkToBatch))
- this->IsCreatingHostProxyEvent = false;
- }
-
- ZeHostVisibleEvent = HostVisibleEvent->ZeEvent;
- return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
- uint32_t NumEvents, ///< [in] number of events in the event list
- const ur_event_handle_t
- *EventWaitList ///< [in][range(0, numEvents)] pointer to a list of
- ///< events to wait for completion
+ur_result_t
+urEventWait(uint32_t NumEvents, ///< [in] number of events in the event list
+ const ur_event_handle_t
+ *EventWaitList ///< [in][range(0, numEvents)] pointer to a list
+ ///< of events to wait for completion
) {
for (uint32_t I = 0; I < NumEvents; I++) {
auto e = EventWaitList[I];
- auto UrQueue = Legacy(e->UrQueue);
+ auto UrQueue = e->UrQueue;
if (UrQueue && UrQueue->ZeEventsScope == OnDemandHostVisibleProxy) {
// Make sure to add all host-visible "proxy" event signals if needed.
// This ensures that all signalling commands are submitted below and
@@ -795,7 +756,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
// Submit dependent open command lists for execution, if any
for (uint32_t I = 0; I < NumEvents; I++) {
ur_event_handle_t_ *Event = ur_cast(EventWaitList[I]);
- auto UrQueue = Legacy(Event->UrQueue);
+ auto UrQueue = Event->UrQueue;
if (UrQueue) {
// Lock automatically releases when this goes out of scope.
std::scoped_lock lock(UrQueue->Mutex);
@@ -803,7 +764,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
UR_CALL(UrQueue->executeAllOpenCommandLists());
}
}
- std::unordered_set Queues;
+ std::unordered_set Queues;
for (uint32_t I = 0; I < NumEvents; I++) {
{
ur_event_handle_t_ *Event =
@@ -830,13 +791,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
Event->Completed = true;
}
}
- if (auto Q = Legacy(Event->UrQueue)) {
+ if (auto Q = Event->UrQueue) {
if (Q->UsingImmCmdLists && Q->isInOrderQueue())
// Use information about waited event to cleanup completed events in
// the in-order queue.
CleanupEventsInImmCmdLists(
- Legacy(Event->UrQueue), false /* QueueLocked */,
- false /* QueueSynced */,
+ Event->UrQueue, false /* QueueLocked */, false /* QueueSynced */,
reinterpret_cast(Event));
else {
// NOTE: we are cleaning up after the event here to free resources
@@ -861,8 +821,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(
- ur_event_handle_t Event ///< [in] handle of the event object
+ur_result_t
+urEventRetain(ur_event_handle_t Event ///< [in] handle of the event object
) {
Event->RefCountExternal++;
Event->RefCount.increment();
@@ -870,8 +830,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(
- ur_event_handle_t Event ///< [in] handle of the event object
+ur_result_t
+urEventRelease(ur_event_handle_t Event ///< [in] handle of the event object
) {
Event->RefCountExternal--;
UR_CALL(urEventReleaseInternal(Event));
@@ -879,7 +839,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle(
+ur_result_t urEventGetNativeHandle(
ur_event_handle_t Event, ///< [in] handle of the event.
ur_native_handle_t
*NativeEvent ///< [out] a pointer to the native handle of the event.
@@ -892,7 +852,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle(
// Event can potentially be in an open command-list, make sure that
// it is submitted for execution to avoid potential deadlock if
// interop app is going to wait for it.
- auto Queue = Legacy(Event->UrQueue);
+ auto Queue = Event->UrQueue;
if (Queue) {
std::scoped_lock lock(Queue->Mutex);
const auto &OpenCommandList = Queue->eventOpenCommandList(Event);
@@ -904,7 +864,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate(
+ur_result_t urExtEventCreate(
ur_context_handle_t Context, ///< [in] handle of the context object
ur_event_handle_t
*Event ///< [out] pointer to the handle of the event object created.
@@ -917,7 +877,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle(
+ur_result_t urEventCreateWithNativeHandle(
ur_native_handle_t NativeEvent, ///< [in] the native handle of the event.
ur_context_handle_t Context, ///< [in] handle of the context object
const ur_event_native_properties_t *Properties,
@@ -967,7 +927,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback(
+ur_result_t urEventSetCallback(
ur_event_handle_t Event, ///< [in] handle of the event object
ur_execution_info_t ExecStatus, ///< [in] execution status of the event
ur_event_callback_t Notify, ///< [in] execution status of the event
@@ -983,6 +943,57 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
+} // namespace ur::level_zero
+
+ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent(
+ ze_event_handle_t &ZeHostVisibleEvent) {
+ auto UrQueue = this->UrQueue;
+
+ std::scoped_lock Lock(UrQueue->Mutex,
+ this->Mutex);
+
+ if (!HostVisibleEvent) {
+ this->IsCreatingHostProxyEvent = true;
+ if (UrQueue->ZeEventsScope != OnDemandHostVisibleProxy)
+ die("getOrCreateHostVisibleEvent: missing host-visible event");
+
+ // Submit the command(s) signalling the proxy event to the queue.
+ // We have to first submit a wait for the device-only event for which this
+ // proxy is created.
+ //
+ // Get a new command list to be used on this call
+
+ // We want to batch these commands to avoid extra submissions (costly)
+ bool OkToBatch = true;
+
+ ur_command_list_ptr_t CommandList{};
+ UR_CALL(UrQueue->Context->getAvailableCommandList(
+ UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch))
+
+ // Create a "proxy" host-visible event.
+ UR_CALL(createEventAndAssociateQueue(
+ UrQueue, &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList,
+ /* IsInternal */ false, /* IsMultiDevice */ false,
+ /* HostVisible */ true));
+
+ if (this->IsInnerBatchedEvent) {
+ ZE2UR_CALL(zeCommandListAppendBarrier,
+ (CommandList->first, ZeEvent, 0, nullptr));
+ } else {
+ ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
+ (CommandList->first, 1, &ZeEvent));
+ }
+ ZE2UR_CALL(zeCommandListAppendSignalEvent,
+ (CommandList->first, HostVisibleEvent->ZeEvent));
+
+ UR_CALL(UrQueue->executeCommandList(CommandList, false, OkToBatch))
+ this->IsCreatingHostProxyEvent = false;
+ }
+
+ ZeHostVisibleEvent = HostVisibleEvent->ZeEvent;
+ return UR_RESULT_SUCCESS;
+}
+
ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
if (!Event->RefCount.decrementAndTest())
return UR_RESULT_SUCCESS;
@@ -1022,7 +1033,7 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) {
}
// Save pointer to the queue before deleting/resetting event.
- auto Queue = Legacy(Event->UrQueue);
+ auto Queue = Event->UrQueue;
// If the event was a timestamp recording, we try to evict its entry in the
// queue.
@@ -1099,7 +1110,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
ur_kernel_handle_t AssociatedKernel = nullptr;
// List of dependent events.
std::list EventsToBeReleased;
- ur_queue_handle_legacy_t AssociatedQueue = nullptr;
+ ur_queue_handle_t AssociatedQueue = nullptr;
{
// If the Event is already locked, then continue with the cleanup, otherwise
// block on locking the event.
@@ -1113,7 +1124,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
if (Event->CleanedUp)
return UR_RESULT_SUCCESS;
- AssociatedQueue = Legacy(Event->UrQueue);
+ AssociatedQueue = Event->UrQueue;
// Remember the kernel associated with this event if there is one. We are
// going to release it later.
@@ -1158,7 +1169,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
// We've reset event data members above, now cleanup resources.
if (AssociatedKernel) {
ReleaseIndirectMem(AssociatedKernel);
- UR_CALL(urKernelRelease(AssociatedKernel));
+ UR_CALL(ur::level_zero::urKernelRelease(AssociatedKernel));
}
if (AssociatedQueue) {
@@ -1217,7 +1228,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
}
if (DepEventKernel) {
ReleaseIndirectMem(DepEventKernel);
- UR_CALL(urKernelRelease(DepEventKernel));
+ UR_CALL(ur::level_zero::urKernelRelease(DepEventKernel));
}
UR_CALL(urEventReleaseInternal(DepEvent));
}
@@ -1230,9 +1241,9 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
// The "HostVisible" argument specifies if event needs to be allocated from
// a host-visible pool.
//
-ur_result_t EventCreate(ur_context_handle_t Context,
- ur_queue_handle_legacy_t Queue, bool IsMultiDevice,
- bool HostVisible, ur_event_handle_t *RetEvent,
+ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
+ bool IsMultiDevice, bool HostVisible,
+ ur_event_handle_t *RetEvent,
bool CounterBasedEventEnabled,
bool ForceDisableProfiling) {
bool ProfilingEnabled =
@@ -1319,7 +1330,7 @@ ur_result_t ur_event_handle_t_::reset() {
ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
uint32_t EventListLength, const ur_event_handle_t *EventList,
- ur_queue_handle_legacy_t CurQueue, bool UseCopyEngine) {
+ ur_queue_handle_t CurQueue, bool UseCopyEngine) {
this->Length = 0;
this->ZeEventList = nullptr;
this->UrEventList = nullptr;
@@ -1435,7 +1446,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
}
}
- auto Queue = Legacy(EventList[I]->UrQueue);
+ auto Queue = EventList[I]->UrQueue;
auto CurQueueDevice = CurQueue->Device;
std::optional> QueueLock =
@@ -1508,8 +1519,8 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
std::shared_lock Lock(EventList[I]->Mutex);
- ur_device_handle_t QueueRootDevice;
- ur_device_handle_t CurrentQueueRootDevice;
+ ur_device_handle_t QueueRootDevice = nullptr;
+ ur_device_handle_t CurrentQueueRootDevice = nullptr;
if (Queue) {
QueueRootDevice = Queue->Device;
CurrentQueueRootDevice = CurQueueDevice;
@@ -1537,8 +1548,13 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
(ZeCommandList, 1u, &EventList[I]->ZeEvent));
- if (!MultiDeviceEvent->CounterBasedEventsEnabled)
+ if (!MultiDeviceEvent->CounterBasedEventsEnabled) {
ZE2UR_CALL(zeEventHostSignal, (MultiDeviceZeEvent));
+ } else {
+ ZE2UR_CALL(zeCommandListAppendSignalEvent,
+ (ZeCommandList, MultiDeviceZeEvent));
+ }
+ MultiDeviceEvent->Completed = true;
UR_CALL(Queue->executeCommandList(CommandList, /* IsBlocking */ false,
/* OkToBatchCommand */ true));
@@ -1636,7 +1652,7 @@ ur_result_t _ur_ze_event_list_t::collectEventsForReleaseAndDestroyUrZeEventList(
// Tells if this event is with profiling capabilities.
bool ur_event_handle_t_::isProfilingEnabled() const {
return !UrQueue || // tentatively assume user events are profiling enabled
- (Legacy(UrQueue)->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0;
+ (UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0;
}
// Tells if this event was created as a timestamp event, allowing profiling
diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp
index e99df2a272..7dd64acdaa 100644
--- a/source/adapters/level_zero/event.hpp
+++ b/source/adapters/level_zero/event.hpp
@@ -20,7 +20,7 @@
#include
#include
-#include
+#include
#include
#include
@@ -29,9 +29,9 @@
extern "C" {
ur_result_t urEventReleaseInternal(ur_event_handle_t Event);
-ur_result_t EventCreate(ur_context_handle_t Context,
- ur_queue_handle_legacy_t Queue, bool IsMultiDevice,
- bool HostVisible, ur_event_handle_t *RetEvent,
+ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
+ bool IsMultiDevice, bool HostVisible,
+ ur_event_handle_t *RetEvent,
bool CounterBasedEventEnabled = false,
bool ForceDisableProfiling = false);
} // extern "C"
@@ -89,7 +89,7 @@ struct _ur_ze_event_list_t {
// command-lists.
ur_result_t createAndRetainUrZeEventList(uint32_t EventListLength,
const ur_event_handle_t *EventList,
- ur_queue_handle_legacy_t CurQueue,
+ ur_queue_handle_t CurQueue,
bool UseCopyEngine);
// Add all the events in this object's UrEventList to the end
diff --git a/source/adapters/level_zero/helpers/memory_helpers.cpp b/source/adapters/level_zero/helpers/memory_helpers.cpp
new file mode 100644
index 0000000000..aea32795ab
--- /dev/null
+++ b/source/adapters/level_zero/helpers/memory_helpers.cpp
@@ -0,0 +1,33 @@
+//===--------- memory_helpers.cpp - Level Zero Adapter -------------------===//
+//
+// Copyright (C) 2024 Intel Corporation
+//
+// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
+// Exceptions. See LICENSE.TXT
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "memory_helpers.hpp"
+#include "../common.hpp"
+
+ze_memory_type_t getMemoryType(ze_context_handle_t hContext, void *ptr) {
+ // TODO: use UMF once
+ // https://github.com/oneapi-src/unified-memory-framework/issues/687 is
+ // implemented
+ ZeStruct zeMemoryAllocationProperties;
+ ZE2UR_CALL_THROWS(zeMemGetAllocProperties,
+ (hContext, ptr, &zeMemoryAllocationProperties, nullptr));
+ return zeMemoryAllocationProperties.type;
+}
+
+bool maybeImportUSM(ze_driver_handle_t hTranslatedDriver,
+ ze_context_handle_t hContext, void *ptr, size_t size) {
+ if (ZeUSMImport.Enabled && ptr != nullptr &&
+ getMemoryType(hContext, ptr) == ZE_MEMORY_TYPE_UNKNOWN) {
+ // Promote the host ptr to USM host memory
+ ZeUSMImport.doZeUSMImport(hTranslatedDriver, ptr, size);
+ return true;
+ }
+ return false;
+}
diff --git a/source/adapters/level_zero/helpers/memory_helpers.hpp b/source/adapters/level_zero/helpers/memory_helpers.hpp
new file mode 100644
index 0000000000..ad50be992c
--- /dev/null
+++ b/source/adapters/level_zero/helpers/memory_helpers.hpp
@@ -0,0 +1,23 @@
+//===--------- memory_helpers.hpp - Level Zero Adapter -------------------===//
+//
+// Copyright (C) 2024 Intel Corporation
+//
+// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
+// Exceptions. See LICENSE.TXT
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#pragma once
+
+#include
+#include
+
+// If USM Import feature is enabled and hostptr is supplied,
+// import the hostptr if not already imported into USM.
+// Data transfer rate is maximized when both source and destination
+// are USM pointers. Promotion of the host pointer to USM thus
+// optimizes data transfer performance.
+bool maybeImportUSM(ze_driver_handle_t hTranslatedDriver,
+ ze_context_handle_t hContext, void *ptr, size_t size);
+
+ze_memory_type_t getMemoryType(ze_context_handle_t hContext, void *ptr);
diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp
index f68b2d93be..a717597623 100644
--- a/source/adapters/level_zero/image.cpp
+++ b/source/adapters/level_zero/image.cpp
@@ -14,7 +14,9 @@
#include "event.hpp"
#include "logger/ur_logger.hpp"
#include "sampler.hpp"
+#include "ur_interface_loader.hpp"
#include "ur_level_zero.hpp"
+#include "ze_api.h"
typedef ze_result_t(ZE_APICALL *zeImageGetDeviceOffsetExp_pfn)(
ze_image_handle_t hImage, uint64_t *pDeviceOffset);
@@ -444,7 +446,8 @@ ur_result_t bindlessImagesCreateImpl(ur_context_handle_t hContext,
ze_image_handle_t ZeImage;
ze_memory_allocation_properties_t MemAllocProperties{
- ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES};
+ ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES, nullptr,
+ ZE_MEMORY_TYPE_UNKNOWN, 0, 0};
ZE2UR_CALL(zeMemGetAllocProperties,
(hContext->ZeContext, reinterpret_cast(hImageMem),
&MemAllocProperties, nullptr));
@@ -631,11 +634,14 @@ getImageFormatTypeAndSize(const ur_image_format_t *ImageFormat) {
return {ZeImageFormatType, ZeImageFormatTypeSize};
}
-UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp(
- ur_context_handle_t hContext, ur_device_handle_t hDevice,
- const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool,
- size_t widthInBytes, size_t height, size_t elementSizeBytes, void **ppMem,
- size_t *pResultPitch) {
+namespace ur::level_zero {
+
+ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ const ur_usm_desc_t *pUSMDesc,
+ ur_usm_pool_handle_t pool, size_t widthInBytes,
+ size_t height, size_t elementSizeBytes,
+ void **ppMem, size_t *pResultPitch) {
std::shared_lock Lock(hContext->Mutex);
UR_ASSERT(hContext && hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
@@ -668,13 +674,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp(
*pResultPitch = RowPitch;
size_t Size = height * RowPitch;
- UR_CALL(urUSMDeviceAlloc(hContext, hDevice, pUSMDesc, pool, Size, ppMem));
+ UR_CALL(ur::level_zero::urUSMDeviceAlloc(hContext, hDevice, pUSMDesc, pool,
+ Size, ppMem));
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL
-urBindlessImagesUnsampledImageHandleDestroyExp(
+ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_image_native_handle_t hImage) {
UR_ASSERT(hContext && hDevice && hImage, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
@@ -691,17 +697,16 @@ urBindlessImagesUnsampledImageHandleDestroyExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL
-urBindlessImagesSampledImageHandleDestroyExp(
+ur_result_t urBindlessImagesSampledImageHandleDestroyExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_image_native_handle_t hImage) {
// Sampled image is a combination of unsampled image and sampler.
// Sampler is released in urSamplerRelease.
- return urBindlessImagesUnsampledImageHandleDestroyExp(hContext, hDevice,
- hImage);
+ return ur::level_zero::urBindlessImagesUnsampledImageHandleDestroyExp(
+ hContext, hDevice, hImage);
}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp(
+ur_result_t urBindlessImagesImageAllocateExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
ur_exp_image_mem_native_handle_t *phImageMem) {
@@ -730,16 +735,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageFreeExp(
- ur_context_handle_t hContext, ur_device_handle_t hDevice,
- ur_exp_image_mem_native_handle_t hImageMem) {
+ur_result_t
+urBindlessImagesImageFreeExp(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ ur_exp_image_mem_native_handle_t hImageMem) {
std::ignore = hContext;
std::ignore = hDevice;
- UR_CALL(urMemRelease(reinterpret_cast(hImageMem)));
+ UR_CALL(ur::level_zero::urMemRelease(
+ reinterpret_cast(hImageMem)));
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp(
+ur_result_t urBindlessImagesUnsampledImageCreateExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_image_mem_native_handle_t hImageMem,
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
@@ -749,7 +756,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
+ur_result_t urBindlessImagesSampledImageCreateExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_image_mem_native_handle_t hImageMem,
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
@@ -759,8 +766,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp(
- [[maybe_unused]] const void *pSrc, [[maybe_unused]] void *pDst,
+ur_result_t urBindlessImagesImageCopyExp(
+ ur_queue_handle_t hQueue, [[maybe_unused]] const void *pSrc,
+ [[maybe_unused]] void *pDst,
[[maybe_unused]] const ur_image_desc_t *pSrcImageDesc,
[[maybe_unused]] const ur_image_desc_t *pDstImageDesc,
[[maybe_unused]] const ur_image_format_t *pSrcImageFormat,
@@ -770,7 +778,6 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp(
[[maybe_unused]] uint32_t numEventsInWaitList,
[[maybe_unused]] const ur_event_handle_t *phEventWaitList,
[[maybe_unused]] ur_event_handle_t *phEvent) {
- auto hQueue = this;
std::scoped_lock Lock(hQueue->Mutex);
UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
@@ -920,7 +927,7 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp(
+ur_result_t urBindlessImagesImageGetInfoExp(
ur_context_handle_t, ur_exp_image_mem_native_handle_t hImageMem,
ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet) {
UR_ASSERT(hImageMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
@@ -970,7 +977,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp(
}
}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp(
+ur_result_t urBindlessImagesMipmapGetLevelExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel,
ur_exp_image_mem_native_handle_t *phImageMem) {
@@ -984,13 +991,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp(
- ur_context_handle_t hContext, ur_device_handle_t hDevice,
- ur_exp_image_mem_native_handle_t hMem) {
- return urBindlessImagesImageFreeExp(hContext, hDevice, hMem);
+ur_result_t
+urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ ur_exp_image_mem_native_handle_t hMem) {
+ return ur::level_zero::urBindlessImagesImageFreeExp(hContext, hDevice, hMem);
}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp(
+ur_result_t urBindlessImagesImportExternalMemoryExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
ur_exp_external_mem_type_t memHandleType,
ur_exp_external_mem_desc_t *pExternalMemDesc,
@@ -1050,7 +1058,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp(
+ur_result_t urBindlessImagesMapExternalArrayExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
ur_exp_external_mem_handle_t hExternalMem,
@@ -1085,7 +1093,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp(
+ur_result_t urBindlessImagesMapExternalLinearMemoryExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice, uint64_t offset,
uint64_t size, ur_exp_external_mem_handle_t hExternalMem, void **phRetMem) {
std::ignore = hContext;
@@ -1099,7 +1107,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp(
+ur_result_t urBindlessImagesReleaseExternalMemoryExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_external_mem_handle_t hExternalMem) {
@@ -1109,7 +1117,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp(
struct ur_ze_external_memory_data *externalMemoryData =
reinterpret_cast(hExternalMem);
- UR_CALL(urMemRelease(externalMemoryData->urMemoryHandle));
+ UR_CALL(ur::level_zero::urMemRelease(externalMemoryData->urMemoryHandle));
switch (externalMemoryData->type) {
case UR_ZE_EXTERNAL_OPAQUE_FD:
@@ -1129,7 +1137,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp(
+ur_result_t urBindlessImagesImportExternalSemaphoreExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_external_semaphore_type_t semHandleType,
ur_exp_external_semaphore_desc_t *pExternalSemaphoreDesc,
@@ -1144,7 +1152,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp(
+ur_result_t urBindlessImagesReleaseExternalSemaphoreExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_external_semaphore_handle_t hExternalSemaphore) {
std::ignore = hContext;
@@ -1155,10 +1163,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t ur_queue_handle_legacy_t_::bindlessImagesWaitExternalSemaphoreExp(
- ur_exp_external_semaphore_handle_t hSemaphore, bool hasValue,
- uint64_t waitValue, uint32_t numEventsInWaitList,
+ur_result_t urBindlessImagesWaitExternalSemaphoreExp(
+ ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore,
+ bool hasValue, uint64_t waitValue, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+ std::ignore = hQueue;
std::ignore = hSemaphore;
std::ignore = hasValue;
std::ignore = waitValue;
@@ -1170,10 +1179,11 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesWaitExternalSemaphoreExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t ur_queue_handle_legacy_t_::bindlessImagesSignalExternalSemaphoreExp(
- ur_exp_external_semaphore_handle_t hSemaphore, bool hasValue,
- uint64_t signalValue, uint32_t numEventsInWaitList,
+ur_result_t urBindlessImagesSignalExternalSemaphoreExp(
+ ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore,
+ bool hasValue, uint64_t signalValue, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+ std::ignore = hQueue;
std::ignore = hSemaphore;
std::ignore = hasValue;
std::ignore = signalValue;
@@ -1184,3 +1194,5 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesSignalExternalSemaphoreExp(
"{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
+
+} // namespace ur::level_zero
diff --git a/source/adapters/level_zero/image.hpp b/source/adapters/level_zero/image.hpp
index 618258601d..43f37fa757 100644
--- a/source/adapters/level_zero/image.hpp
+++ b/source/adapters/level_zero/image.hpp
@@ -10,7 +10,7 @@
#pragma once
#include
-#include
+#include
#include
#include
diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp
index 3469620b71..9c638d53f6 100644
--- a/source/adapters/level_zero/kernel.cpp
+++ b/source/adapters/level_zero/kernel.cpp
@@ -11,11 +11,29 @@
#include "kernel.hpp"
#include "logger/ur_logger.hpp"
#include "ur_api.h"
-#include "ur_level_zero.hpp"
+#include "ur_interface_loader.hpp"
#include "helpers/kernel_helpers.hpp"
-UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
+ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel,
+ ze_kernel_handle_t *phZeKernel) {
+ if (hKernel->ZeKernelMap.empty()) {
+ *phZeKernel = hKernel->ZeKernel;
+ } else {
+ auto It = hKernel->ZeKernelMap.find(hDevice);
+ if (It == hKernel->ZeKernelMap.end()) {
+ /* kernel and queue don't match */
+ return UR_RESULT_ERROR_INVALID_QUEUE;
+ }
+ *phZeKernel = It->second;
+ }
+
+ return UR_RESULT_SUCCESS;
+}
+
+namespace ur::level_zero {
+
+ur_result_t urKernelGetSuggestedLocalWorkSize(
ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t workDim,
[[maybe_unused]] const size_t *pGlobalWorkOffset,
const size_t *pGlobalWorkSize, size_t *pSuggestedLocalWorkSize) {
@@ -29,32 +47,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
std::copy(pGlobalWorkSize, pGlobalWorkSize + workDim, GlobalWorkSize3D);
ze_kernel_handle_t ZeKernel{};
- UR_CALL(getZeKernel(Legacy(hQueue)->Device->ZeDevice, hKernel, &ZeKernel));
+ UR_CALL(getZeKernel(hQueue->Device->ZeDevice, hKernel, &ZeKernel));
- UR_CALL(getSuggestedLocalWorkSize(Legacy(hQueue)->Device, ZeKernel,
- GlobalWorkSize3D, LocalWorkSize));
+ UR_CALL(getSuggestedLocalWorkSize(hQueue->Device, ZeKernel, GlobalWorkSize3D,
+ LocalWorkSize));
std::copy(LocalWorkSize, LocalWorkSize + workDim, pSuggestedLocalWorkSize);
return UR_RESULT_SUCCESS;
}
-ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel,
- ze_kernel_handle_t *phZeKernel) {
- if (hKernel->ZeKernelMap.empty()) {
- *phZeKernel = hKernel->ZeKernel;
- } else {
- auto It = hKernel->ZeKernelMap.find(hDevice);
- if (It == hKernel->ZeKernelMap.end()) {
- /* kernel and queue don't match */
- return UR_RESULT_ERROR_INVALID_QUEUE;
- }
- *phZeKernel = It->second;
- }
-
- return UR_RESULT_SUCCESS;
-}
-
-ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch(
+ur_result_t urEnqueueKernelLaunch(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object
uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify
///< the global and work-group work-items
@@ -86,7 +89,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch(
UR_ASSERT(WorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
UR_ASSERT(WorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
- auto Queue = this;
ze_kernel_handle_t ZeKernel{};
UR_CALL(getZeKernel(Queue->Device->ZeDevice, Kernel, &ZeKernel));
@@ -158,7 +160,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch(
// is in use. Once the event has been signalled, the code in
// CleanupCompletedEvent(Event) will do a urKernelRelease to update the
// reference count on the kernel, using the kernel saved in CommandData.
- UR_CALL(urKernelRetain(Kernel));
+ UR_CALL(ur::level_zero::urKernelRetain(Kernel));
// Add to list of kernels to be submitted
if (IndirectAccessTrackingEnabled)
@@ -204,7 +206,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch(
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp(
+ur_result_t urEnqueueCooperativeKernelLaunchExp(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object
uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify
///< the global and work-group work-items
@@ -236,7 +239,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp(
UR_ASSERT(WorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
UR_ASSERT(WorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
- auto Queue = this;
auto ZeDevice = Queue->Device->ZeDevice;
ze_kernel_handle_t ZeKernel{};
@@ -422,7 +424,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp(
// is in use. Once the event has been signalled, the code in
// CleanupCompletedEvent(Event) will do a urKernelRelease to update the
// reference count on the kernel, using the kernel saved in CommandData.
- UR_CALL(urKernelRetain(Kernel));
+ UR_CALL(ur::level_zero::urKernelRetain(Kernel));
// Add to list of kernels to be submitted
if (IndirectAccessTrackingEnabled)
@@ -468,7 +470,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp(
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite(
+ur_result_t urEnqueueDeviceGlobalVariableWrite(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to.
ur_program_handle_t Program, ///< [in] handle of the program containing the
///< device global variable.
const char
@@ -489,14 +492,21 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite(
*Event ///< [in,out][optional] return an event object that identifies
///< this particular kernel execution instance.
) {
- auto Queue = this;
std::scoped_lock lock(Queue->Mutex);
+ ze_module_handle_t ZeModule{};
+ auto It = Program->ZeModuleMap.find(Queue->Device->ZeDevice);
+ if (It != Program->ZeModuleMap.end()) {
+ ZeModule = It->second;
+ } else {
+ ZeModule = Program->ZeModule;
+ }
+
// Find global variable pointer
size_t GlobalVarSize = 0;
void *GlobalVarPtr = nullptr;
ZE2UR_CALL(zeModuleGetGlobalPointer,
- (Program->ZeModule, Name, &GlobalVarSize, &GlobalVarPtr));
+ (ZeModule, Name, &GlobalVarSize, &GlobalVarPtr));
if (GlobalVarSize < Offset + Count) {
setErrorMessage("Write device global variable is out of range.",
UR_RESULT_ERROR_INVALID_VALUE,
@@ -522,29 +532,28 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite(
EventWaitList, Event, PreferCopyEngine);
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableRead(
- ur_program_handle_t Program, ///< [in] handle of the program containing
- ///< the device global variable.
- const char *Name, ///< [in] the unique identifier for the device global
- ///< variable.
+ur_result_t urEnqueueDeviceGlobalVariableRead(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to.
+ ur_program_handle_t Program, ///< [in] handle of the program containing the
+ ///< device global variable.
+ const char
+ *Name, ///< [in] the unique identifier for the device global variable.
bool BlockingRead, ///< [in] indicates if this operation should block.
size_t Count, ///< [in] the number of bytes to copy.
- size_t Offset, ///< [in] the byte offset into the device global variable
- ///< to start copying.
- void *Dst, ///< [in] pointer to where the data must be copied to.
+ size_t Offset, ///< [in] the byte offset into the device global variable to
+ ///< start copying.
+ void *Dst, ///< [in] pointer to where the data must be copied to.
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list.
const ur_event_handle_t
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
- ///< pointer to a list of events that must be
- ///< complete before the kernel execution. If
- ///< nullptr, the numEventsInWaitList must be 0,
- ///< indicating that no wait event.
+ ///< pointer to a list of events that must be complete
+ ///< before the kernel execution. If nullptr, the
+ ///< numEventsInWaitList must be 0, indicating that no
+ ///< wait event.
ur_event_handle_t
- *Event ///< [in,out][optional] return an event object that
- ///< identifies this particular kernel execution instance.
+ *Event ///< [in,out][optional] return an event object that identifies
+ ///< this particular kernel execution instance.
) {
- auto Queue = this;
-
std::scoped_lock lock(Queue->Mutex);
// Find global variable pointer
@@ -577,7 +586,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableRead(
EventWaitList, Event, PreferCopyEngine);
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate(
+ur_result_t urKernelCreate(
ur_program_handle_t Program, ///< [in] handle of the program instance
const char *KernelName, ///< [in] pointer to null-terminated string.
ur_kernel_handle_t
@@ -640,7 +649,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue(
+ur_result_t urKernelSetArgValue(
ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object
uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1]
size_t ArgSize, ///< [in] size of argument type
@@ -690,7 +699,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue(
return ze2urResult(ZeResult);
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal(
+ur_result_t urKernelSetArgLocal(
ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object
uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1]
size_t ArgSize, ///< [in] size of the local buffer to be allocated by the
@@ -700,12 +709,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal(
) {
std::ignore = Properties;
- UR_CALL(urKernelSetArgValue(Kernel, ArgIndex, ArgSize, nullptr, nullptr));
+ UR_CALL(ur::level_zero::urKernelSetArgValue(Kernel, ArgIndex, ArgSize,
+ nullptr, nullptr));
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(
+ur_result_t urKernelGetInfo(
ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object
ur_kernel_info_t ParamName, ///< [in] name of the Kernel property to query
size_t PropSize, ///< [in] the size of the Kernel property value.
@@ -767,7 +777,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo(
+ur_result_t urKernelGetGroupInfo(
ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object
ur_device_handle_t Device, ///< [in] handle of the Device object
ur_kernel_group_info_t
@@ -848,7 +858,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo(
+ur_result_t urKernelGetSubGroupInfo(
ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object
ur_device_handle_t Device, ///< [in] handle of the Device object
ur_kernel_sub_group_info_t
@@ -879,7 +889,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(
+ur_result_t urKernelRetain(
ur_kernel_handle_t Kernel ///< [in] handle for the Kernel to retain
) {
Kernel->RefCount.increment();
@@ -887,7 +897,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease(
+ur_result_t urKernelRelease(
ur_kernel_handle_t Kernel ///< [in] handle for the Kernel to release
) {
if (!Kernel->RefCount.decrementAndTest())
@@ -904,7 +914,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease(
}
Kernel->ZeKernelMap.clear();
if (IndirectAccessTrackingEnabled) {
- UR_CALL(urContextRelease(KernelProgram->Context));
+ UR_CALL(ur::level_zero::urContextRelease(KernelProgram->Context));
}
// do a release on the program this kernel was part of without delete of the
// program handle
@@ -915,7 +925,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer(
+ur_result_t urKernelSetArgPointer(
ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object
uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1]
const ur_kernel_arg_pointer_properties_t
@@ -927,12 +937,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer(
std::ignore = Properties;
// KernelSetArgValue is expecting a pointer to the argument
- UR_CALL(urKernelSetArgValue(Kernel, ArgIndex, sizeof(const void *), nullptr,
- &ArgValue));
+ UR_CALL(ur::level_zero::urKernelSetArgValue(
+ Kernel, ArgIndex, sizeof(const void *), nullptr, &ArgValue));
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo(
+ur_result_t urKernelSetExecInfo(
ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object
ur_kernel_exec_info_t PropName, ///< [in] name of the execution attribute
size_t PropSize, ///< [in] size in byte the attribute value
@@ -978,7 +988,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler(
+ur_result_t urKernelSetArgSampler(
ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object
uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1]
const ur_kernel_arg_sampler_properties_t
@@ -996,7 +1006,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj(
+ur_result_t urKernelSetArgMemObj(
ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object
uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1]
const ur_kernel_arg_mem_obj_properties_t
@@ -1038,7 +1048,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle(
+ur_result_t urKernelGetNativeHandle(
ur_kernel_handle_t Kernel, ///< [in] handle of the kernel.
ur_native_handle_t
*NativeKernel ///< [out] a pointer to the native handle of the kernel.
@@ -1049,7 +1059,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
+ur_result_t urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, size_t localWorkSize,
size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) {
(void)localWorkSize;
@@ -1062,7 +1072,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle(
+ur_result_t urKernelCreateWithNativeHandle(
ur_native_handle_t NativeKernel, ///< [in] the native handle of the kernel.
ur_context_handle_t Context, ///< [in] handle of the context object
ur_program_handle_t Program,
@@ -1098,13 +1108,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle(
return UR_RESULT_SUCCESS;
}
+ur_result_t urKernelSetSpecializationConstants(
+ ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object
+ uint32_t Count, ///< [in] the number of elements in the pSpecConstants array
+ const ur_specialization_constant_info_t
+ *SpecConstants ///< [in] array of specialization constant value
+ ///< descriptions
+) {
+ std::ignore = Kernel;
+ std::ignore = Count;
+ std::ignore = SpecConstants;
+ logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"),
+ "{} function not implemented!", __FUNCTION__);
+ return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+} // namespace ur::level_zero
+
ur_result_t ur_kernel_handle_t_::initialize() {
// Retain the program and context to show it's used by this kernel.
- UR_CALL(urProgramRetain(Program));
+ UR_CALL(ur::level_zero::urProgramRetain(Program));
if (IndirectAccessTrackingEnabled)
// TODO: do piContextRetain without the guard
- UR_CALL(urContextRetain(Program->Context));
+ UR_CALL(ur::level_zero::urContextRetain(Program->Context));
// Set up how to obtain kernel properties when needed.
ZeKernelProperties.Compute = [this](ze_kernel_properties_t &Properties) {
@@ -1123,36 +1150,3 @@ ur_result_t ur_kernel_handle_t_::initialize() {
return UR_RESULT_SUCCESS;
}
-
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSetSpecializationConstants(
- ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object
- uint32_t Count, ///< [in] the number of elements in the pSpecConstants array
- const ur_specialization_constant_info_t
- *SpecConstants ///< [in] array of specialization constant value
- ///< descriptions
-) {
- std::ignore = Kernel;
- std::ignore = Count;
- std::ignore = SpecConstants;
- logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"),
- "{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunchCustomExp(
- ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkSize,
- const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList,
- const ur_exp_launch_property_t *launchPropList,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- std::ignore = hKernel;
- std::ignore = workDim;
- std::ignore = pGlobalWorkSize;
- std::ignore = pLocalWorkSize;
- std::ignore = numPropsInLaunchPropList;
- std::ignore = launchPropList;
- std::ignore = numEventsInWaitList;
- std::ignore = phEventWaitList;
- std::ignore = phEvent;
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp
index 585a10ef4f..69edf83a78 100644
--- a/source/adapters/level_zero/memory.cpp
+++ b/source/adapters/level_zero/memory.cpp
@@ -15,9 +15,11 @@
#include "context.hpp"
#include "event.hpp"
+#include "helpers/memory_helpers.hpp"
#include "image.hpp"
#include "logger/ur_logger.hpp"
#include "queue.hpp"
+#include "ur_interface_loader.hpp"
#include "ur_level_zero.hpp"
// Default to using compute engine for fill operation, but allow to
@@ -59,7 +61,7 @@ bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr) {
// PI interfaces must have queue's and destination buffer's mutexes locked for
// exclusive use and source buffer's mutex locked for shared use on entry.
ur_result_t enqueueMemCopyHelper(ur_command_t CommandType,
- ur_queue_handle_legacy_t Queue, void *Dst,
+ ur_queue_handle_t Queue, void *Dst,
ur_bool_t BlockingWrite, size_t Size,
const void *Src, uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList,
@@ -112,13 +114,12 @@ ur_result_t enqueueMemCopyHelper(ur_command_t CommandType,
// PI interfaces must have queue's and destination buffer's mutexes locked for
// exclusive use and source buffer's mutex locked for shared use on entry.
ur_result_t enqueueMemCopyRectHelper(
- ur_command_t CommandType, ur_queue_handle_legacy_t Queue,
- const void *SrcBuffer, void *DstBuffer, ur_rect_offset_t SrcOrigin,
- ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch,
- size_t DstRowPitch, size_t SrcSlicePitch, size_t DstSlicePitch,
- ur_bool_t Blocking, uint32_t NumEventsInWaitList,
- const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent,
- bool PreferCopyEngine) {
+ ur_command_t CommandType, ur_queue_handle_t Queue, const void *SrcBuffer,
+ void *DstBuffer, ur_rect_offset_t SrcOrigin, ur_rect_offset_t DstOrigin,
+ ur_rect_region_t Region, size_t SrcRowPitch, size_t DstRowPitch,
+ size_t SrcSlicePitch, size_t DstSlicePitch, ur_bool_t Blocking,
+ uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
+ ur_event_handle_t *OutEvent, bool PreferCopyEngine) {
bool UseCopyEngine = Queue->useCopyEngine(PreferCopyEngine);
_ur_ze_event_list_t TmpWaitList;
@@ -198,9 +199,9 @@ ur_result_t enqueueMemCopyRectHelper(
// PI interfaces must have queue's and buffer's mutexes locked on entry.
static ur_result_t enqueueMemFillHelper(ur_command_t CommandType,
- ur_queue_handle_legacy_t Queue,
- void *Ptr, const void *Pattern,
- size_t PatternSize, size_t Size,
+ ur_queue_handle_t Queue, void *Ptr,
+ const void *Pattern, size_t PatternSize,
+ size_t Size,
uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList,
ur_event_handle_t *OutEvent) {
@@ -315,7 +316,7 @@ static ur_result_t ZeHostMemAllocHelper(void **ResultPtr,
// indirect access, that is why explicitly retain context to be sure
// that it is released after all memory allocations in this context are
// released.
- UR_CALL(urContextRetain(UrContext));
+ UR_CALL(ur::level_zero::urContextRetain(UrContext));
}
ZeStruct ZeDesc;
@@ -337,7 +338,7 @@ static ur_result_t ZeHostMemAllocHelper(void **ResultPtr,
// PI interfaces must have queue's and destination image's mutexes locked for
// exclusive use and source image's mutex locked for shared use on entry.
static ur_result_t enqueueMemImageCommandHelper(
- ur_command_t CommandType, ur_queue_handle_legacy_t Queue,
+ ur_command_t CommandType, ur_queue_handle_t Queue,
const void *Src, // image or ptr
void *Dst, // image or ptr
ur_bool_t IsBlocking, ur_rect_offset_t *SrcOrigin,
@@ -474,7 +475,10 @@ static ur_result_t enqueueMemImageCommandHelper(
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferRead(
+namespace ur::level_zero {
+
+ur_result_t urEnqueueMemBufferRead(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object
bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false)
size_t offset, ///< [in] offset in bytes in the buffer object
@@ -492,7 +496,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferRead(
*phEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
ur_mem_handle_t_ *Src = ur_cast(hBuffer);
std::shared_lock SrcLock(Src->Mutex, std::defer_lock);
@@ -508,7 +511,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferRead(
true /* PreferCopyEngine */);
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWrite(
+ur_result_t urEnqueueMemBufferWrite(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object
bool
blockingWrite, ///< [in] indicates blocking (true), non-blocking (false)
@@ -528,7 +532,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWrite(
*phEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
ur_mem_handle_t_ *Buffer = ur_cast(hBuffer);
std::scoped_lock Lock(Queue->Mutex,
@@ -545,7 +548,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWrite(
true /* PreferCopyEngine */);
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferReadRect(
+ur_result_t urEnqueueMemBufferReadRect(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object
bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false)
ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer
@@ -573,7 +577,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferReadRect(
*phEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
ur_mem_handle_t_ *Buffer = ur_cast(hBuffer);
std::shared_lock SrcLock(Buffer->Mutex, std::defer_lock);
@@ -590,7 +593,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferReadRect(
phEvent);
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWriteRect(
+ur_result_t urEnqueueMemBufferWriteRect(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object
bool
blockingWrite, ///< [in] indicates blocking (true), non-blocking (false)
@@ -620,7 +624,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWriteRect(
*phEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
ur_mem_handle_t_ *Buffer = ur_cast(hBuffer);
std::scoped_lock Lock(Queue->Mutex,
@@ -637,7 +640,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWriteRect(
phEventWaitList, phEvent);
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferCopy(
+ur_result_t urEnqueueMemBufferCopy(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
ur_mem_handle_t BufferSrc, ///< [in] handle of the src buffer object
ur_mem_handle_t BufferDst, ///< [in] handle of the dest buffer object
size_t SrcOffset, ///< [in] offset into hBufferSrc to begin copying from
@@ -655,7 +659,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferCopy(
*OutEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
_ur_buffer *SrcBuffer = ur_cast<_ur_buffer *>(BufferSrc);
_ur_buffer *DstBuffer = ur_cast<_ur_buffer *>(BufferDst);
@@ -688,9 +691,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferCopy(
OutEvent, PreferCopyEngine);
}
-ur_result_t
-ur_queue_handle_legacy_t_::enqueueMemBufferCopyRect( ///< [in] handle of the
- ///< queue object
+ur_result_t urEnqueueMemBufferCopyRect(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
ur_mem_handle_t BufferSrc, ///< [in] handle of the source buffer object
ur_mem_handle_t BufferDst, ///< [in] handle of the dest buffer object
ur_rect_offset_t SrcOrigin, ///< [in] 3D offset in the source buffer
@@ -717,7 +719,6 @@ ur_queue_handle_legacy_t_::enqueueMemBufferCopyRect( ///< [in] handle of the
*OutEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
_ur_buffer *SrcBuffer = ur_cast<_ur_buffer *>(BufferSrc);
_ur_buffer *DstBuffer = ur_cast<_ur_buffer *>(BufferDst);
@@ -748,11 +749,12 @@ ur_queue_handle_legacy_t_::enqueueMemBufferCopyRect( ///< [in] handle of the
NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine);
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferFill(
- ur_mem_handle_t Buffer, ///< [in] handle of the buffer object
- const void *Pattern, ///< [in] pointer to the fill pattern
- size_t PatternSize, ///< [in] size in bytes of the pattern
- size_t Offset, ///< [in] offset into the buffer
+ur_result_t urEnqueueMemBufferFill(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
+ ur_mem_handle_t Buffer, ///< [in] handle of the buffer object
+ const void *Pattern, ///< [in] pointer to the fill pattern
+ size_t PatternSize, ///< [in] size in bytes of the pattern
+ size_t Offset, ///< [in] offset into the buffer
size_t Size, ///< [in] fill size in bytes, must be a multiple of patternSize
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
const ur_event_handle_t
@@ -766,7 +768,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferFill(
*OutEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
std::scoped_lock Lock(Queue->Mutex,
Buffer->Mutex);
@@ -781,8 +782,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferFill(
Size, NumEventsInWaitList, EventWaitList, OutEvent);
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageRead(
- ur_mem_handle_t Image, ///< [in] handle of the image object
+ur_result_t urEnqueueMemImageRead(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
+ ur_mem_handle_t Image, ///< [in] handle of the image object
bool BlockingRead, ///< [in] indicates blocking (true), non-blocking (false)
ur_rect_offset_t Origin, ///< [in] defines the (x,y,z) offset in pixels in
///< the 1D, 2D, or 3D image
@@ -803,7 +805,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageRead(
*OutEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
std::scoped_lock Lock(Queue->Mutex,
Image->Mutex);
return enqueueMemImageCommandHelper(
@@ -812,8 +813,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageRead(
EventWaitList, OutEvent);
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageWrite(
- ur_mem_handle_t Image, ///< [in] handle of the image object
+ur_result_t urEnqueueMemImageWrite(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
+ ur_mem_handle_t Image, ///< [in] handle of the image object
bool
BlockingWrite, ///< [in] indicates blocking (true), non-blocking (false)
ur_rect_offset_t Origin, ///< [in] defines the (x,y,z) offset in pixels in
@@ -835,7 +837,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageWrite(
*OutEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
std::scoped_lock Lock(Queue->Mutex,
Image->Mutex);
return enqueueMemImageCommandHelper(
@@ -844,9 +845,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageWrite(
EventWaitList, OutEvent);
}
-ur_result_t
-ur_queue_handle_legacy_t_::enqueueMemImageCopy( ///< [in] handle of
- ///< the queue object
+ur_result_t urEnqueueMemImageCopy(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
ur_mem_handle_t ImageSrc, ///< [in] handle of the src image object
ur_mem_handle_t ImageDst, ///< [in] handle of the dest image object
ur_rect_offset_t SrcOrigin, ///< [in] defines the (x,y,z) offset in pixels
@@ -867,7 +867,6 @@ ur_queue_handle_legacy_t_::enqueueMemImageCopy( ///< [in] handle of
*OutEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
std::shared_lock SrcLock(ImageSrc->Mutex, std::defer_lock);
std::scoped_lock, ur_shared_mutex,
ur_shared_mutex>
@@ -885,8 +884,9 @@ ur_queue_handle_legacy_t_::enqueueMemImageCopy( ///< [in] handle of
NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine);
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap(
- ur_mem_handle_t Buf, ///< [in] handle of the buffer object
+ur_result_t urEnqueueMemBufferMap(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
+ ur_mem_handle_t Buf, ///< [in] handle of the buffer object
bool BlockingMap, ///< [in] indicates blocking (true), non-blocking (false)
ur_map_flags_t MapFlags, ///< [in] flags for read, write, readwrite mapping
size_t Offset, ///< [in] offset in bytes of the buffer region being mapped
@@ -905,7 +905,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap(
void **RetMap ///< [in,out] return mapped pointer. TODO: move it before
///< numEventsInWaitList?
) {
- auto Queue = this;
auto Buffer = ur_cast<_ur_buffer *>(Buf);
UR_ASSERT(!Buffer->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT);
@@ -964,10 +963,10 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap(
if (Buffer->OnHost) {
// Wait on incoming events before doing the copy
if (NumEventsInWaitList > 0)
- UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList));
+ UR_CALL(ur::level_zero::urEventWait(NumEventsInWaitList, EventWaitList));
if (Queue->isInOrderQueue())
- UR_CALL(urQueueFinish(Queue));
+ UR_CALL(ur::level_zero::urQueueFinish(Queue));
// Lock automatically releases when this goes out of scope.
std::scoped_lock Guard(Buffer->Mutex);
@@ -1053,7 +1052,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap(
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap(
+ur_result_t urEnqueueMemUnmap(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
ur_mem_handle_t Mem, ///< [in] handle of the memory (buffer or image) object
void *MappedPtr, ///< [in] mapped host address
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
@@ -1068,7 +1068,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap(
*OutEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
UR_ASSERT(!Mem->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT);
auto Buffer = ur_cast<_ur_buffer *>(Mem);
@@ -1120,10 +1119,10 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap(
if (Buffer->OnHost) {
// Wait on incoming events before doing the copy
if (NumEventsInWaitList > 0)
- UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList));
+ UR_CALL(ur::level_zero::urEventWait(NumEventsInWaitList, EventWaitList));
if (Queue->isInOrderQueue())
- UR_CALL(urQueueFinish(Queue));
+ UR_CALL(ur::level_zero::urQueueFinish(Queue));
char *ZeHandleDst;
UR_CALL(Buffer->getZeHandle(ZeHandleDst, ur_mem_handle_t_::write_only,
@@ -1146,8 +1145,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap(
ur_command_list_ptr_t CommandList{};
UR_CALL(Queue->Context->getAvailableCommandList(
- reinterpret_cast(Queue), CommandList,
- UseCopyEngine, NumEventsInWaitList, EventWaitList));
+ reinterpret_cast(Queue), CommandList, UseCopyEngine,
+ NumEventsInWaitList, EventWaitList));
CommandList->second.append(reinterpret_cast(*Event));
(*Event)->RefCount.increment();
@@ -1180,8 +1179,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap(
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy(
- bool Blocking, ///< [in] blocking or non-blocking copy
+ur_result_t urEnqueueUSMMemcpy(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
+ bool Blocking, ///< [in] blocking or non-blocking copy
void *Dst, ///< [in] pointer to the destination USM memory object
const void *Src, ///< [in] pointer to the source USM memory object
size_t Size, ///< [in] size in bytes to be copied
@@ -1197,7 +1197,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy(
*OutEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
std::scoped_lock lock(Queue->Mutex);
// Device to Device copies are found to execute slower on copy engine
@@ -1219,7 +1218,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy(
NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine);
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueUSMPrefetch(
+ur_result_t urEnqueueUSMPrefetch(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
const void *Mem, ///< [in] pointer to the USM memory object
size_t Size, ///< [in] size in bytes to be fetched
ur_usm_migration_flags_t Flags, ///< [in] USM prefetch flags
@@ -1235,7 +1235,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMPrefetch(
*OutEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
std::ignore = Flags;
// Lock automatically releases when this goes out of scope.
std::scoped_lock lock(Queue->Mutex);
@@ -1287,7 +1286,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMPrefetch(
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueUSMAdvise(
+ur_result_t urEnqueueUSMAdvise(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
const void *Mem, ///< [in] pointer to the USM memory object
size_t Size, ///< [in] size in bytes to be advised
ur_usm_advice_flags_t Advice, ///< [in] USM memory advice
@@ -1295,7 +1295,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMAdvise(
*OutEvent ///< [in,out][optional] return an event object that identifies
///< this particular command instance.
) {
- auto Queue = this;
// Lock automatically releases when this goes out of scope.
std::scoped_lock lock(Queue->Mutex);
@@ -1345,8 +1344,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMAdvise(
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill2D(
- void *Mem, ///< [in] pointer to memory to be filled.
+ur_result_t urEnqueueUSMFill2D(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to.
+ void *Mem, ///< [in] pointer to memory to be filled.
size_t Pitch, ///< [in] the total width of the destination memory including
///< padding.
size_t PatternSize, ///< [in] the size in bytes of the pattern.
@@ -1364,6 +1364,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill2D(
*OutEvent ///< [in,out][optional] return an event object that identifies
///< this particular kernel execution instance.
) {
+ std::ignore = Queue;
std::ignore = Mem;
std::ignore = Pitch;
std::ignore = PatternSize;
@@ -1378,7 +1379,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill2D(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy2D(
+ur_result_t urEnqueueUSMMemcpy2D(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to.
bool Blocking, ///< [in] indicates if this operation should block the host.
void *Dst, ///< [in] pointer to memory where data will be copied.
size_t DstPitch, ///< [in] the total width of the source memory including
@@ -1399,7 +1401,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy2D(
*Event ///< [in,out][optional] return an event object that identifies
///< this particular kernel execution instance.
) {
- auto Queue = this;
ur_rect_offset_t ZeroOffset{0, 0, 0};
ur_rect_region_t Region{Width, Height, 0};
@@ -1500,7 +1501,7 @@ static ur_result_t ur2zeImageDesc(const ur_image_format_t *ImageFormat,
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate(
+ur_result_t urMemImageCreate(
ur_context_handle_t Context, ///< [in] handle of the context object
ur_mem_flags_t Flags, ///< [in] allocation and usage information flags
const ur_image_format_t
@@ -1549,7 +1550,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle(
+ur_result_t urMemImageCreateWithNativeHandle(
ur_native_handle_t NativeMem, ///< [in] the native handle to the memory.
ur_context_handle_t Context, ///< [in] handle of the context object.
[[maybe_unused]] const ur_image_format_t
@@ -1577,7 +1578,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
+ur_result_t urMemBufferCreate(
ur_context_handle_t Context, ///< [in] handle of the context object
ur_mem_flags_t Flags, ///< [in] allocation and usage information flags
size_t Size, ///< [in] size in bytes of the memory object to be allocated
@@ -1599,30 +1600,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
Host = Properties->pHost;
}
- // If USM Import feature is enabled and hostptr is supplied,
- // import the hostptr if not already imported into USM.
- // Data transfer rate is maximized when both source and destination
- // are USM pointers. Promotion of the host pointer to USM thus
- // optimizes data transfer performance.
bool HostPtrImported = false;
- if (ZeUSMImport.Enabled && Host != nullptr &&
- (Flags & UR_MEM_FLAG_USE_HOST_POINTER) != 0) {
- // Query memory type of the host pointer
- ze_device_handle_t ZeDeviceHandle;
- ZeStruct ZeMemoryAllocationProperties;
- ZE2UR_CALL(zeMemGetAllocProperties,
- (Context->ZeContext, Host, &ZeMemoryAllocationProperties,
- &ZeDeviceHandle));
-
- // If not shared of any type, we can import the ptr
- if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) {
- // Promote the host ptr to USM host memory
- ze_driver_handle_t driverHandle =
- Context->getPlatform()->ZeDriverHandleExpTranslated;
- ZeUSMImport.doZeUSMImport(driverHandle, Host, Size);
- HostPtrImported = true;
- }
- }
+ if (Flags & UR_MEM_FLAG_USE_HOST_POINTER)
+ HostPtrImported =
+ maybeImportUSM(Context->getPlatform()->ZeDriverHandleExpTranslated,
+ Context->ZeContext, Host, Size);
_ur_buffer *Buffer = nullptr;
auto HostPtrOrNull = (Flags & UR_MEM_FLAG_USE_HOST_POINTER)
@@ -1671,14 +1653,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(
+ur_result_t urMemRetain(
ur_mem_handle_t Mem ///< [in] handle of the memory object to get access
) {
Mem->RefCount.increment();
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(
+ur_result_t urMemRelease(
ur_mem_handle_t Mem ///< [in] handle of the memory object to release
) {
if (!Mem->RefCount.decrementAndTest())
@@ -1704,7 +1686,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition(
+ur_result_t urMemBufferPartition(
ur_mem_handle_t
Buffer, ///< [in] handle of the buffer object to allocate from
ur_mem_flags_t Flags, ///< [in] allocation and usage information flags
@@ -1740,7 +1722,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle(
+ur_result_t urMemGetNativeHandle(
ur_mem_handle_t Mem, ///< [in] handle of the mem.
ur_device_handle_t, ///< [in] handle of the device.
ur_native_handle_t
@@ -1754,7 +1736,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle(
+ur_result_t urMemBufferCreateWithNativeHandle(
ur_native_handle_t NativeMem, ///< [in] the native handle to the memory.
ur_context_handle_t Context, ///< [in] handle of the context object.
const ur_mem_native_properties_t
@@ -1821,7 +1803,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle(
ContextsLock.lock();
// Retain context to be sure that it is released after all memory
// allocations in this context are released.
- UR_CALL(urContextRetain(Context));
+ UR_CALL(ur::level_zero::urContextRetain(Context));
Context->MemAllocs.emplace(std::piecewise_construct,
std::forward_as_tuple(Ptr),
@@ -1857,7 +1839,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(
+ur_result_t urMemGetInfo(
ur_mem_handle_t Memory, ///< [in] handle to the memory object being queried.
ur_mem_info_t MemInfoType, ///< [in] type of the info to retrieve.
size_t PropSize, ///< [in] the number of bytes of memory pointed to by
@@ -1893,7 +1875,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(
+ur_result_t urMemImageGetInfo(
ur_mem_handle_t Memory, ///< [in] handle to the image object being queried.
ur_image_info_t ImgInfoType, ///< [in] type of image info to retrieve.
size_t PropSize, ///< [in] the number of bytes of memory pointer to by
@@ -1916,6 +1898,79 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
+ur_result_t urEnqueueUSMFill(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
+ void *Ptr, ///< [in] pointer to USM memory object
+ size_t PatternSize, ///< [in] the size in bytes of the pattern. Must be a
+ ///< power of 2 and less than or equal to width.
+ const void *Pattern, ///< [in] pointer with the bytes of the pattern to set.
+ size_t Size, ///< [in] size in bytes to be set. Must be a multiple of
+ ///< patternSize.
+ uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
+ const ur_event_handle_t *
+ EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
+ ///< pointer to a list of events that must be complete
+ ///< before this command can be executed. If nullptr, the
+ ///< numEventsInWaitList must be 0, indicating that this
+ ///< command does not wait on any event to complete.
+ ur_event_handle_t *Event ///< [out][optional] return an event object that
+ ///< identifies this particular command instance.
+) {
+ std::scoped_lock Lock(Queue->Mutex);
+
+ return enqueueMemFillHelper(
+ // TODO: do we need a new command type for USM memset?
+ UR_COMMAND_MEM_BUFFER_FILL, Queue, Ptr,
+ Pattern, // It will be interpreted as an 8-bit value,
+ PatternSize, // which is indicated with this pattern_size==1
+ Size, NumEventsInWaitList, EventWaitList, Event);
+}
+
+/// Host Pipes
+ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue,
+ ur_program_handle_t hProgram,
+ const char *pipe_symbol, bool blocking,
+ void *pDst, size_t size,
+ uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent) {
+ std::ignore = hQueue;
+ std::ignore = hProgram;
+ std::ignore = pipe_symbol;
+ std::ignore = blocking;
+ std::ignore = pDst;
+ std::ignore = size;
+ std::ignore = numEventsInWaitList;
+ std::ignore = phEventWaitList;
+ std::ignore = phEvent;
+ logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"),
+ "{} function not implemented!", __FUNCTION__);
+ return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue,
+ ur_program_handle_t hProgram,
+ const char *pipe_symbol, bool blocking,
+ void *pSrc, size_t size,
+ uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent) {
+ std::ignore = hQueue;
+ std::ignore = hProgram;
+ std::ignore = pipe_symbol;
+ std::ignore = blocking;
+ std::ignore = pSrc;
+ std::ignore = size;
+ std::ignore = numEventsInWaitList;
+ std::ignore = phEventWaitList;
+ std::ignore = phEvent;
+ logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"),
+ "{} function not implemented!", __FUNCTION__);
+ return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+} // namespace ur::level_zero
+
// If indirect access tracking is enabled then performs reference counting,
// otherwise just calls zeMemAllocDevice.
static ur_result_t ZeDeviceMemAllocHelper(void **ResultPtr,
@@ -1935,7 +1990,7 @@ static ur_result_t ZeDeviceMemAllocHelper(void **ResultPtr,
// indirect access, that is why explicitly retain context to be sure
// that it is released after all memory allocations in this context are
// released.
- UR_CALL(urContextRetain(Context));
+ UR_CALL(ur::level_zero::urContextRetain(Context));
}
ze_device_mem_alloc_desc_t ZeDesc = {};
@@ -1995,8 +2050,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
ur_usm_desc_t USMDesc{};
USMDesc.align = getAlignment();
ur_usm_pool_handle_t Pool{};
- UR_CALL(urUSMHostAlloc(UrContext, &USMDesc, Pool, Size,
- reinterpret_cast(&ZeHandle)));
+ UR_CALL(ur::level_zero::urUSMHostAlloc(
+ UrContext, &USMDesc, Pool, Size,
+ reinterpret_cast(&ZeHandle)));
} else {
HostAllocation.ReleaseAction = allocation_t::free_native;
UR_CALL(ZeHostMemAllocHelper(reinterpret_cast(&ZeHandle),
@@ -2054,8 +2110,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
ur_usm_desc_t USMDesc{};
USMDesc.align = getAlignment();
ur_usm_pool_handle_t Pool{};
- UR_CALL(urUSMDeviceAlloc(UrContext, Device, &USMDesc, Pool, Size,
- reinterpret_cast(&ZeHandle)));
+ UR_CALL(ur::level_zero::urUSMDeviceAlloc(
+ UrContext, Device, &USMDesc, Pool, Size,
+ reinterpret_cast(&ZeHandle)));
} else {
Allocation.ReleaseAction = allocation_t::free_native;
UR_CALL(ZeDeviceMemAllocHelper(reinterpret_cast(&ZeHandle),
@@ -2118,8 +2175,8 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
ur_usm_desc_t USMDesc{};
USMDesc.align = getAlignment();
ur_usm_pool_handle_t Pool{};
- UR_CALL(
- urUSMHostAlloc(UrContext, &USMDesc, Pool, Size, &ZeHandleHost));
+ UR_CALL(ur::level_zero::urUSMHostAlloc(UrContext, &USMDesc, Pool,
+ Size, &ZeHandleHost));
} else {
HostAllocation.ReleaseAction = allocation_t::free_native;
UR_CALL(ZeHostMemAllocHelper(&ZeHandleHost, UrContext, Size));
@@ -2301,66 +2358,3 @@ size_t _ur_buffer::getAlignment() const {
Alignment = 1UL;
return Alignment;
}
-
-ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill(
- void *Ptr, ///< [in] pointer to USM memory object
- size_t PatternSize, ///< [in] the size in bytes of the pattern. Must be a
- ///< power of 2 and less than or equal to width.
- const void *Pattern, ///< [in] pointer with the bytes of the pattern to set.
- size_t Size, ///< [in] size in bytes to be set. Must be a multiple of
- ///< patternSize.
- uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
- const ur_event_handle_t *
- EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
- ///< pointer to a list of events that must be complete
- ///< before this command can be executed. If nullptr, the
- ///< numEventsInWaitList must be 0, indicating that this
- ///< command does not wait on any event to complete.
- ur_event_handle_t *Event ///< [out][optional] return an event object that
- ///< identifies this particular command instance.
-) {
- auto Queue = this;
- std::scoped_lock Lock(Queue->Mutex);
-
- return enqueueMemFillHelper(
- // TODO: do we need a new command type for USM memset?
- UR_COMMAND_MEM_BUFFER_FILL, Queue, Ptr,
- Pattern, // It will be interpreted as an 8-bit value,
- PatternSize, // which is indicated with this pattern_size==1
- Size, NumEventsInWaitList, EventWaitList, Event);
-}
-
-/// Host Pipes
-ur_result_t ur_queue_handle_legacy_t_::enqueueReadHostPipe(
- ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking,
- void *pDst, size_t size, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- std::ignore = hProgram;
- std::ignore = pipe_symbol;
- std::ignore = blocking;
- std::ignore = pDst;
- std::ignore = size;
- std::ignore = numEventsInWaitList;
- std::ignore = phEventWaitList;
- std::ignore = phEvent;
- logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"),
- "{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t ur_queue_handle_legacy_t_::enqueueWriteHostPipe(
- ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking,
- void *pSrc, size_t size, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- std::ignore = hProgram;
- std::ignore = pipe_symbol;
- std::ignore = blocking;
- std::ignore = pSrc;
- std::ignore = size;
- std::ignore = numEventsInWaitList;
- std::ignore = phEventWaitList;
- std::ignore = phEvent;
- logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"),
- "{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
diff --git a/source/adapters/level_zero/memory.hpp b/source/adapters/level_zero/memory.hpp
index 43d548f16b..71d102e9dd 100644
--- a/source/adapters/level_zero/memory.hpp
+++ b/source/adapters/level_zero/memory.hpp
@@ -20,15 +20,12 @@
#include
#include
-#include
+#include
#include
#include
#include "ur_level_zero.hpp"
-struct ur_queue_handle_legacy_t_;
-using ur_queue_handle_legacy_t = ur_queue_handle_legacy_t_ *;
-
struct ur_device_handle_t_;
bool IsDevicePointer(ur_context_handle_t Context, const void *Ptr);
@@ -48,7 +45,7 @@ const bool UseCopyEngineForD2DCopy = [] {
// PI interfaces must have queue's and destination buffer's mutexes locked for
// exclusive use and source buffer's mutex locked for shared use on entry.
ur_result_t enqueueMemCopyHelper(ur_command_t CommandType,
- ur_queue_handle_legacy_t Queue, void *Dst,
+ ur_queue_handle_t Queue, void *Dst,
ur_bool_t BlockingWrite, size_t Size,
const void *Src, uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList,
@@ -56,13 +53,12 @@ ur_result_t enqueueMemCopyHelper(ur_command_t CommandType,
bool PreferCopyEngine);
ur_result_t enqueueMemCopyRectHelper(
- ur_command_t CommandType, ur_queue_handle_legacy_t Queue,
- const void *SrcBuffer, void *DstBuffer, ur_rect_offset_t SrcOrigin,
- ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch,
- size_t DstRowPitch, size_t SrcSlicePitch, size_t DstSlicePitch,
- ur_bool_t Blocking, uint32_t NumEventsInWaitList,
- const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent,
- bool PreferCopyEngine = false);
+ ur_command_t CommandType, ur_queue_handle_t Queue, const void *SrcBuffer,
+ void *DstBuffer, ur_rect_offset_t SrcOrigin, ur_rect_offset_t DstOrigin,
+ ur_rect_region_t Region, size_t SrcRowPitch, size_t DstRowPitch,
+ size_t SrcSlicePitch, size_t DstSlicePitch, ur_bool_t Blocking,
+ uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
+ ur_event_handle_t *OutEvent, bool PreferCopyEngine = false);
struct ur_mem_handle_t_ : _ur_object {
// Keeps the PI context of this memory handle.
diff --git a/source/adapters/level_zero/physical_mem.cpp b/source/adapters/level_zero/physical_mem.cpp
index d4d9792f24..e7bb498859 100644
--- a/source/adapters/level_zero/physical_mem.cpp
+++ b/source/adapters/level_zero/physical_mem.cpp
@@ -14,7 +14,9 @@
#include "device.hpp"
#include "ur_level_zero.hpp"
-UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate(
+namespace ur::level_zero {
+
+ur_result_t urPhysicalMemCreate(
ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
[[maybe_unused]] const ur_physical_mem_properties_t *pProperties,
ur_physical_mem_handle_t *phPhysicalMem) {
@@ -35,14 +37,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL
-urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) {
+ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) {
hPhysicalMem->RefCount.increment();
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL
-urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) {
+ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) {
if (!hPhysicalMem->RefCount.decrementAndTest())
return UR_RESULT_SUCCESS;
@@ -52,3 +52,4 @@ urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) {
return UR_RESULT_SUCCESS;
}
+} // namespace ur::level_zero
diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp
index 68aebf97c7..721db3c359 100644
--- a/source/adapters/level_zero/platform.cpp
+++ b/source/adapters/level_zero/platform.cpp
@@ -12,7 +12,9 @@
#include "adapter.hpp"
#include "ur_level_zero.hpp"
-UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(
+namespace ur::level_zero {
+
+ur_result_t urPlatformGet(
ur_adapter_handle_t *, uint32_t,
uint32_t NumEntries, ///< [in] the number of platforms to be added to
///< phPlatforms. If phPlatforms is not NULL, then
@@ -47,7 +49,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo(
+ur_result_t urPlatformGetInfo(
ur_platform_handle_t Platform, ///< [in] handle of the platform
ur_platform_info_t ParamName, ///< [in] type of the info to retrieve
size_t Size, ///< [in] the number of bytes pointed to by pPlatformInfo.
@@ -101,7 +103,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetApiVersion(
+ur_result_t urPlatformGetApiVersion(
ur_platform_handle_t Driver, ///< [in] handle of the platform
ur_api_version_t *Version ///< [out] api version
) {
@@ -110,7 +112,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetApiVersion(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle(
+ur_result_t urPlatformGetNativeHandle(
ur_platform_handle_t Platform, ///< [in] handle of the platform.
ur_native_handle_t *NativePlatform ///< [out] a pointer to the native
///< handle of the platform.
@@ -120,7 +122,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle(
+ur_result_t urPlatformCreateWithNativeHandle(
ur_native_handle_t
NativePlatform, ///< [in] the native handle of the platform.
ur_adapter_handle_t,
@@ -135,12 +137,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle(
uint32_t NumPlatforms = 0;
ur_adapter_handle_t AdapterHandle = GlobalAdapter;
- UR_CALL(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms));
+ UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, 0, nullptr,
+ &NumPlatforms));
if (NumPlatforms) {
std::vector Platforms(NumPlatforms);
- UR_CALL(urPlatformGet(&AdapterHandle, 1, NumPlatforms, Platforms.data(),
- nullptr));
+ UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, NumPlatforms,
+ Platforms.data(), nullptr));
// The SYCL spec requires that the set of platforms must remain fixed for
// the duration of the application's execution. We assume that we found all
@@ -158,6 +161,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle(
return UR_RESULT_ERROR_INVALID_VALUE;
}
+// Returns plugin specific backend option.
+// Current support is only for optimization options.
+// Return '-ze-opt-disable' for frontend_option = -O0.
+// Return '-ze-opt-level=2' for frontend_option = -O1, -O2 or -O3.
+// Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for
+// frontend_option=-ftarget-compile-fast.
+ur_result_t urPlatformGetBackendOption(
+ ur_platform_handle_t Platform, ///< [in] handle of the platform instance.
+ const char *FrontendOption, ///< [in] string containing the frontend option.
+ const char *
+ *PlatformOption ///< [out] returns the correct platform specific
+ ///< compiler option based on the frontend option.
+) {
+ std::ignore = Platform;
+ using namespace std::literals;
+ if (FrontendOption == nullptr) {
+ return UR_RESULT_SUCCESS;
+ }
+ if (FrontendOption == ""sv) {
+ *PlatformOption = "";
+ return UR_RESULT_SUCCESS;
+ }
+ if (FrontendOption == "-O0"sv) {
+ *PlatformOption = "-ze-opt-disable";
+ return UR_RESULT_SUCCESS;
+ }
+ if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv ||
+ FrontendOption == "-O3"sv) {
+ *PlatformOption = "-ze-opt-level=2";
+ return UR_RESULT_SUCCESS;
+ }
+ if (FrontendOption == "-ftarget-compile-fast"sv) {
+ *PlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'";
+ return UR_RESULT_SUCCESS;
+ }
+ return UR_RESULT_ERROR_INVALID_VALUE;
+}
+
+} // namespace ur::level_zero
+
ur_result_t ur_platform_handle_t_::initialize() {
ZE2UR_CALL(zeDriverGetApiVersion, (ZeDriver, &ZeApiVersion));
ZeDriverApiVersion = std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + "." +
@@ -513,41 +556,3 @@ ur_device_handle_t ur_platform_handle_t_::getDeviceById(DeviceId id) {
}
return nullptr;
}
-
-// Returns plugin specific backend option.
-// Current support is only for optimization options.
-// Return '-ze-opt-disable' for frontend_option = -O0.
-// Return '-ze-opt-level=2' for frontend_option = -O1, -O2 or -O3.
-// Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for
-// frontend_option=-ftarget-compile-fast.
-UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption(
- ur_platform_handle_t Platform, ///< [in] handle of the platform instance.
- const char *FrontendOption, ///< [in] string containing the frontend option.
- const char *
- *PlatformOption ///< [out] returns the correct platform specific
- ///< compiler option based on the frontend option.
-) {
- std::ignore = Platform;
- using namespace std::literals;
- if (FrontendOption == nullptr) {
- return UR_RESULT_SUCCESS;
- }
- if (FrontendOption == ""sv) {
- *PlatformOption = "";
- return UR_RESULT_SUCCESS;
- }
- if (FrontendOption == "-O0"sv) {
- *PlatformOption = "-ze-opt-disable";
- return UR_RESULT_SUCCESS;
- }
- if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv ||
- FrontendOption == "-O3"sv) {
- *PlatformOption = "-ze-opt-level=2";
- return UR_RESULT_SUCCESS;
- }
- if (FrontendOption == "-ftarget-compile-fast"sv) {
- *PlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'";
- return UR_RESULT_SUCCESS;
- }
- return UR_RESULT_ERROR_INVALID_VALUE;
-}
diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp
index a6d34ccb23..02aef2d058 100644
--- a/source/adapters/level_zero/program.cpp
+++ b/source/adapters/level_zero/program.cpp
@@ -11,6 +11,7 @@
#include "program.hpp"
#include "device.hpp"
#include "logger/ur_logger.hpp"
+#include "ur_interface_loader.hpp"
#ifdef UR_ADAPTER_LEVEL_ZERO_V2
#include "v2/context.hpp"
@@ -54,7 +55,9 @@ checkUnresolvedSymbols(ze_module_handle_t ZeModule,
}
} // extern "C"
-UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL(
+namespace ur::level_zero {
+
+ur_result_t urProgramCreateWithIL(
ur_context_handle_t Context, ///< [in] handle of the context instance
const void *IL, ///< [in] pointer to IL binary.
size_t Length, ///< [in] length of `pIL` in bytes.
@@ -79,7 +82,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
+ur_result_t urProgramCreateWithBinary(
ur_context_handle_t Context, ///< [in] handle of the context instance
ur_device_handle_t
Device, ///< [in] handle to device associated with binary.
@@ -115,17 +118,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(
+ur_result_t urProgramBuild(
ur_context_handle_t Context, ///< [in] handle of the context instance.
ur_program_handle_t Program, ///< [in] Handle of the program to build.
const char *Options ///< [in][optional] pointer to build options
///< null-terminated string.
) {
std::vector Devices = Context->getDevices();
- return urProgramBuildExp(Program, Devices.size(), Devices.data(), Options);
+ return ur::level_zero::urProgramBuildExp(Program, Devices.size(),
+ Devices.data(), Options);
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(
+ur_result_t urProgramBuildExp(
ur_program_handle_t hProgram, ///< [in] Handle of the program to build.
uint32_t numDevices, ///< [in] number of devices
ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to
@@ -228,7 +232,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(
return Result;
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(
+ur_result_t urProgramCompileExp(
ur_program_handle_t
hProgram, ///< [in][out] handle of the program to compile.
uint32_t numDevices, ///< [in] number of devices
@@ -239,10 +243,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(
) {
std::ignore = numDevices;
std::ignore = phDevices;
- return urProgramCompile(hProgram->Context, hProgram, pOptions);
+ return ur::level_zero::urProgramCompile(hProgram->Context, hProgram,
+ pOptions);
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile(
+ur_result_t urProgramCompile(
ur_context_handle_t Context, ///< [in] handle of the context instance.
ur_program_handle_t
Program, ///< [in][out] handle of the program to compile.
@@ -281,7 +286,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramLink(
+ur_result_t urProgramLink(
ur_context_handle_t Context, ///< [in] handle of the context instance.
uint32_t Count, ///< [in] number of program handles in `phPrograms`.
const ur_program_handle_t *Programs, ///< [in][range(0, count)] pointer to
@@ -292,11 +297,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLink(
*Program ///< [out] pointer to handle of program object created.
) {
std::vector Devices = Context->getDevices();
- return urProgramLinkExp(Context, Devices.size(), Devices.data(), Count,
- Programs, Options, Program);
+ return ur::level_zero::urProgramLinkExp(Context, Devices.size(),
+ Devices.data(), Count, Programs,
+ Options, Program);
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp(
+ur_result_t urProgramLinkExp(
ur_context_handle_t hContext, ///< [in] handle of the context instance.
uint32_t numDevices, ///< [in] number of devices
ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to
@@ -482,14 +488,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp(
return UrResult;
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramRetain(
+ur_result_t urProgramRetain(
ur_program_handle_t Program ///< [in] handle for the Program to retain
) {
Program->RefCount.increment();
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease(
+ur_result_t urProgramRelease(
ur_program_handle_t Program ///< [in] handle for the Program to release
) {
if (!Program->RefCount.decrementAndTest())
@@ -526,7 +532,7 @@ static bool is_in_separated_string(const std::string &str, char delimiter,
return false;
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer(
+ur_result_t urProgramGetFunctionPointer(
ur_device_handle_t
Device, ///< [in] handle of the device to retrieve pointer for.
ur_program_handle_t
@@ -566,12 +572,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer(
if (ZeResult == ZE_RESULT_ERROR_INVALID_ARGUMENT) {
size_t Size;
*FunctionPointerRet = 0;
- UR_CALL(urProgramGetInfo(Program, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr,
- &Size));
+ UR_CALL(ur::level_zero::urProgramGetInfo(
+ Program, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &Size));
std::string ClResult(Size, ' ');
- UR_CALL(urProgramGetInfo(Program, UR_PROGRAM_INFO_KERNEL_NAMES,
- ClResult.size(), &ClResult[0], nullptr));
+ UR_CALL(ur::level_zero::urProgramGetInfo(
+ Program, UR_PROGRAM_INFO_KERNEL_NAMES, ClResult.size(), &ClResult[0],
+ nullptr));
// Get rid of the null terminator and search for kernel_name
// If function can be found return error code to indicate it
@@ -591,7 +598,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer(
return ze2urResult(ZeResult);
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer(
+ur_result_t urProgramGetGlobalVariablePointer(
ur_device_handle_t
Device, ///< [in] handle of the device to retrieve the pointer for.
ur_program_handle_t
@@ -626,7 +633,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer(
return ze2urResult(ZeResult);
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(
+ur_result_t urProgramGetInfo(
ur_program_handle_t Program, ///< [in] handle of the Program object
ur_program_info_t PropName, ///< [in] name of the Program property to query
size_t PropSize, ///< [in] the size of the Program property.
@@ -818,7 +825,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo(
+ur_result_t urProgramGetBuildInfo(
ur_program_handle_t Program, ///< [in] handle of the Program object
ur_device_handle_t Device, ///< [in] handle of the Device object
ur_program_build_info_t
@@ -898,7 +905,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstant(
+ur_result_t urProgramSetSpecializationConstant(
ur_program_handle_t Program, ///< [in] handle of the Program object
uint32_t SpecId, ///< [in] specification constant Id
size_t SpecSize, ///< [in] size of the specialization constant value
@@ -913,7 +920,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstant(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle(
+ur_result_t urProgramGetNativeHandle(
ur_program_handle_t Program, ///< [in] handle of the program.
ur_native_handle_t *NativeProgram ///< [out] a pointer to the native
///< handle of the program.
@@ -934,7 +941,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle(
+ur_result_t urProgramCreateWithNativeHandle(
ur_native_handle_t
NativeProgram, ///< [in] the native handle of the program.
ur_context_handle_t Context, ///< [in] handle of the context instance
@@ -966,6 +973,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle(
return UR_RESULT_SUCCESS;
}
+ur_result_t urProgramSetSpecializationConstants(
+ ur_program_handle_t Program, ///< [in] handle of the Program object
+ uint32_t Count, ///< [in] the number of elements in the pSpecConstants array
+ const ur_specialization_constant_info_t
+ *SpecConstants ///< [in][range(0, count)] array of specialization
+ ///< constant value descriptions
+) {
+ std::scoped_lock Guard(Program->Mutex);
+
+ // Remember the value of this specialization constant until the program is
+ // built. Note that we only save the pointer to the buffer that contains the
+ // value. The caller is responsible for maintaining storage for this buffer.
+ //
+ // NOTE: SpecSize is unused in Level Zero, the size is known from SPIR-V by
+ // SpecID.
+ for (uint32_t SpecIt = 0; SpecIt < Count; SpecIt++) {
+ uint32_t SpecId = SpecConstants[SpecIt].id;
+ Program->SpecConstants[SpecId] = SpecConstants[SpecIt].pValue;
+ }
+ return UR_RESULT_SUCCESS;
+}
+
+} // namespace ur::level_zero
+
ur_program_handle_t_::~ur_program_handle_t_() {
if (!resourcesReleased) {
ur_release_program_resources(true);
@@ -1000,25 +1031,3 @@ void ur_program_handle_t_::ur_release_program_resources(bool deletion) {
resourcesReleased = true;
}
}
-
-UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants(
- ur_program_handle_t Program, ///< [in] handle of the Program object
- uint32_t Count, ///< [in] the number of elements in the pSpecConstants array
- const ur_specialization_constant_info_t
- *SpecConstants ///< [in][range(0, count)] array of specialization
- ///< constant value descriptions
-) {
- std::scoped_lock Guard(Program->Mutex);
-
- // Remember the value of this specialization constant until the program is
- // built. Note that we only save the pointer to the buffer that contains the
- // value. The caller is responsible for maintaining storage for this buffer.
- //
- // NOTE: SpecSize is unused in Level Zero, the size is known from SPIR-V by
- // SpecID.
- for (uint32_t SpecIt = 0; SpecIt < Count; SpecIt++) {
- uint32_t SpecId = SpecConstants[SpecIt].id;
- Program->SpecConstants[SpecId] = SpecConstants[SpecIt].pValue;
- }
- return UR_RESULT_SUCCESS;
-}
diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp
index 2845120113..978547df10 100644
--- a/source/adapters/level_zero/queue.cpp
+++ b/source/adapters/level_zero/queue.cpp
@@ -19,7 +19,7 @@
#include "common.hpp"
#include "event.hpp"
#include "queue.hpp"
-#include "ur_api.h"
+#include "ur_interface_loader.hpp"
#include "ur_level_zero.hpp"
#include "ur_util.hpp"
#include "ze_api.h"
@@ -99,7 +99,7 @@ bool ur_completion_batch::checkComplete() {
return st == COMPLETED;
}
-ur_result_t ur_completion_batch::seal(ur_queue_handle_legacy_t queue,
+ur_result_t ur_completion_batch::seal(ur_queue_handle_t queue,
ze_command_list_handle_t cmdlist) {
assert(st == ACCUMULATING);
@@ -187,7 +187,7 @@ ur_completion_batches::ur_completion_batches() {
}
ur_result_t ur_completion_batches::tryCleanup(
- ur_queue_handle_legacy_t queue, ze_command_list_handle_t cmdlist,
+ ur_queue_handle_t queue, ze_command_list_handle_t cmdlist,
std::vector &events,
std::vector &EventListToCleanup) {
cleanup(events, EventListToCleanup);
@@ -229,7 +229,7 @@ void ur_completion_batches::forceReset() {
/// the call, in case of in-order queue it allows to cleanup all preceding
/// events.
/// @return PI_SUCCESS if successful, PI error code otherwise.
-ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_legacy_t UrQueue,
+ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue,
bool QueueLocked, bool QueueSynced,
ur_event_handle_t CompletedEvent) {
// Handle only immediate command lists here.
@@ -303,7 +303,7 @@ ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_legacy_t UrQueue,
/// @param Queue Queue where we look for signalled command lists and cleanup
/// events.
/// @return PI_SUCCESS if successful, PI error code otherwise.
-ur_result_t resetCommandLists(ur_queue_handle_legacy_t Queue) {
+ur_result_t resetCommandLists(ur_queue_handle_t Queue) {
// Handle immediate command lists here, they don't need to be reset and we
// only need to cleanup events.
if (Queue->UsingImmCmdLists) {
@@ -342,7 +342,10 @@ ur_result_t resetCommandLists(ur_queue_handle_legacy_t Queue) {
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::queueGetInfo(
+namespace ur::level_zero {
+
+ur_result_t urQueueGetInfo(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue object
ur_queue_info_t ParamName, ///< [in] name of the queue property to query
size_t ParamValueSize, ///< [in] size in bytes of the queue property value
///< provided
@@ -350,8 +353,6 @@ ur_result_t ur_queue_handle_legacy_t_::queueGetInfo(
size_t *ParamValueSizeRet ///< [out] size in bytes returned in queue
///< property value
) {
- auto Queue = this;
-
std::shared_lock Lock(Queue->Mutex);
UrReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet);
// TODO: consider support for queue properties and size
@@ -467,7 +468,7 @@ static bool doEagerInit = [] {
return EagerInit ? std::atoi(EagerInit) != 0 : false;
}();
-UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate(
+ur_result_t urQueueCreate(
ur_context_handle_t Context, ///< [in] handle of the context object
ur_device_handle_t Device, ///< [in] handle of the device object
const ur_queue_properties_t
@@ -502,7 +503,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate(
// Create placeholder queues in the compute queue group.
// Actual L0 queues will be created at first use.
std::vector ZeComputeCommandQueues(
- Device->QueueGroup[ur_queue_handle_legacy_t_::queue_type::Compute]
+ Device->QueueGroup[ur_queue_handle_t_::queue_type::Compute]
.ZeProperties.numQueues,
nullptr);
@@ -512,21 +513,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate(
size_t NumCopyGroups = 0;
if (Device->hasMainCopyEngine()) {
NumCopyGroups +=
- Device->QueueGroup[ur_queue_handle_legacy_t_::queue_type::MainCopy]
+ Device->QueueGroup[ur_queue_handle_t_::queue_type::MainCopy]
.ZeProperties.numQueues;
}
if (Device->hasLinkCopyEngine()) {
NumCopyGroups +=
- Device->QueueGroup[ur_queue_handle_legacy_t_::queue_type::LinkCopy]
+ Device->QueueGroup[ur_queue_handle_t_::queue_type::LinkCopy]
.ZeProperties.numQueues;
}
std::vector ZeCopyCommandQueues(NumCopyGroups,
nullptr);
try {
- *Queue = new ur_queue_handle_legacy_t_(ZeComputeCommandQueues,
- ZeCopyCommandQueues, Context, Device,
- true, Flags, ForceComputeIndex);
+ *Queue =
+ new ur_queue_handle_t_(ZeComputeCommandQueues, ZeCopyCommandQueues,
+ Context, Device, true, Flags, ForceComputeIndex);
} catch (const std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
} catch (...) {
@@ -535,7 +536,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate(
// Do eager initialization of Level Zero handles on request.
if (doEagerInit) {
- ur_queue_handle_legacy_t Q = Legacy(*Queue);
+ auto Q = *Queue;
// Creates said number of command-lists.
auto warmupQueueGroup = [Q](bool UseCopyEngine,
uint32_t RepeatCount) -> ur_result_t {
@@ -576,9 +577,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate(
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::queueRetain() {
- auto Queue = this;
-
+ur_result_t urQueueRetain(
+ ur_queue_handle_t Queue ///< [in] handle of the queue object to get access
+) {
{
std::scoped_lock Lock(Queue->Mutex);
Queue->RefCountExternal++;
@@ -587,9 +588,9 @@ ur_result_t ur_queue_handle_legacy_t_::queueRetain() {
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::queueRelease() {
- auto Queue = this;
-
+ur_result_t urQueueRelease(
+ ur_queue_handle_t Queue ///< [in] handle of the queue object to release
+) {
std::vector EventListToCleanup;
{
std::scoped_lock Lock(Queue->Mutex);
@@ -690,13 +691,12 @@ ur_result_t ur_queue_handle_legacy_t_::queueRelease() {
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::queueGetNativeHandle(
+ur_result_t urQueueGetNativeHandle(
+ ur_queue_handle_t Queue, ///< [in] handle of the queue.
ur_queue_native_desc_t *Desc,
ur_native_handle_t
*NativeQueue ///< [out] a pointer to the native handle of the queue.
) {
- auto Queue = this;
-
// Lock automatically releases when this goes out of scope.
std::shared_lock lock(Queue->Mutex);
@@ -728,24 +728,7 @@ ur_result_t ur_queue_handle_legacy_t_::queueGetNativeHandle(
return UR_RESULT_SUCCESS;
}
-void ur_queue_handle_legacy_t_::ur_queue_group_t::setImmCmdList(
- ur_queue_handle_legacy_t queue, ze_command_list_handle_t ZeCommandList) {
- // An immediate command list was given to us but we don't have the queue
- // descriptor information. Create a dummy and note that it is not recycleable.
- ZeStruct ZeQueueDesc;
-
- ImmCmdLists = std::vector(
- 1,
- Queue->CommandListMap
- .insert(std::pair{
- ZeCommandList,
- ur_command_list_info_t(nullptr, true, false, nullptr, ZeQueueDesc,
- queue->useCompletionBatching(), false,
- false, true)})
- .first);
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle(
+ur_result_t urQueueCreateWithNativeHandle(
ur_native_handle_t NativeQueue, ///< [in] the native handle of the queue.
ur_context_handle_t Context, ///< [in] handle of the context object
ur_device_handle_t Device, ///
@@ -785,12 +768,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle(
uint32_t NumEntries = 1;
ur_platform_handle_t Platform{};
ur_adapter_handle_t AdapterHandle = GlobalAdapter;
- UR_CALL(urPlatformGet(&AdapterHandle, 1, NumEntries, &Platform, nullptr));
+ UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, NumEntries,
+ &Platform, nullptr));
ur_device_handle_t UrDevice = Device;
if (UrDevice == nullptr) {
- UR_CALL(urDeviceGet(Platform, UR_DEVICE_TYPE_GPU, NumEntries, &UrDevice,
- nullptr));
+ UR_CALL(ur::level_zero::urDeviceGet(Platform, UR_DEVICE_TYPE_GPU,
+ NumEntries, &UrDevice, nullptr));
}
// The NativeHandleDesc has value if if the native handle is an immediate
@@ -800,7 +784,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle(
std::vector CopyQueues;
try {
- ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_(
+ ur_queue_handle_t_ *Queue = new ur_queue_handle_t_(
ComputeQueues, CopyQueues, Context, UrDevice, OwnNativeHandle, Flags);
*RetQueue = reinterpret_cast(Queue);
} catch (const std::bad_alloc &) {
@@ -808,9 +792,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle(
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}
- auto &InitialGroup =
- Legacy(*RetQueue)->ComputeQueueGroupsByTID.begin()->second;
- InitialGroup.setImmCmdList(Legacy(*RetQueue),
+ auto &InitialGroup = (*RetQueue)->ComputeQueueGroupsByTID.begin()->second;
+ InitialGroup.setImmCmdList(*RetQueue,
ur_cast(NativeQueue));
} else {
auto ZeQueue = ur_cast(NativeQueue);
@@ -823,7 +806,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle(
std::vector ZeroCopyQueues;
try {
- ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_(
+ ur_queue_handle_t_ *Queue = new ur_queue_handle_t_(
ZeQueues, ZeroCopyQueues, Context, UrDevice, OwnNativeHandle, Flags);
*RetQueue = reinterpret_cast(Queue);
} catch (const std::bad_alloc &) {
@@ -832,13 +815,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle(
return UR_RESULT_ERROR_UNKNOWN;
}
}
- Legacy(*RetQueue)->UsingImmCmdLists = (NativeHandleDesc == 1);
+ (*RetQueue)->UsingImmCmdLists = (NativeHandleDesc == 1);
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::queueFinish() {
- auto Queue = this;
+ur_result_t urQueueFinish(
+ ur_queue_handle_t Queue ///< [in] handle of the queue to be finished.
+) {
if (Queue->UsingImmCmdLists) {
// Lock automatically releases when this goes out of scope.
std::scoped_lock Lock(Queue->Mutex);
@@ -903,12 +887,38 @@ ur_result_t ur_queue_handle_legacy_t_::queueFinish() {
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::queueFlush() {
- auto Queue = this;
+ur_result_t urQueueFlush(
+ ur_queue_handle_t Queue ///< [in] handle of the queue to be flushed.
+) {
std::scoped_lock Lock(Queue->Mutex);
return Queue->executeAllOpenCommandLists();
}
+ur_result_t urEnqueueKernelLaunchCustomExp(
+ ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
+ const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
+ uint32_t numPropsInLaunchPropList,
+ const ur_exp_launch_property_t *launchPropList,
+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent) {
+ std::ignore = hQueue;
+ std::ignore = hKernel;
+ std::ignore = workDim;
+ std::ignore = pGlobalWorkSize;
+ std::ignore = pLocalWorkSize;
+ std::ignore = numPropsInLaunchPropList;
+ std::ignore = launchPropList;
+ std::ignore = numEventsInWaitList;
+ std::ignore = phEventWaitList;
+ std::ignore = phEvent;
+
+ logger::error("[UR][L0] {} function not implemented!",
+ "{} function not implemented!", __FUNCTION__);
+ return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+} // namespace ur::level_zero
+
// Configuration of the command-list batching.
struct zeCommandListBatchConfig {
// Default value of 0. This specifies to use dynamic batch size adjustment.
@@ -1063,7 +1073,7 @@ static const zeCommandListBatchConfig ZeCommandListBatchCopyConfig = [] {
return ZeCommandListBatchConfig(IsCopy{true});
}();
-ur_queue_handle_legacy_t_::ur_queue_handle_legacy_t_(
+ur_queue_handle_t_::ur_queue_handle_t_(
std::vector &ComputeQueues,
std::vector &CopyQueues,
ur_context_handle_t Context, ur_device_handle_t Device,
@@ -1089,8 +1099,8 @@ ur_queue_handle_legacy_t_::ur_queue_handle_legacy_t_(
// First, see if the queue's device allows for round-robin or it is
// fixed to one particular compute CCS (it is so for sub-sub-devices).
auto &ComputeQueueGroupInfo = Device->QueueGroup[queue_type::Compute];
- ur_queue_group_t ComputeQueueGroup{
- reinterpret_cast(this), queue_type::Compute};
+ ur_queue_group_t ComputeQueueGroup{reinterpret_cast(this),
+ queue_type::Compute};
ComputeQueueGroup.ZeQueues = ComputeQueues;
// Create space to hold immediate commandlists corresponding to the
// ZeQueues
@@ -1136,8 +1146,8 @@ ur_queue_handle_legacy_t_::ur_queue_handle_legacy_t_(
ComputeQueueGroupsByTID.set(ComputeQueueGroup);
// Copy group initialization.
- ur_queue_group_t CopyQueueGroup{
- reinterpret_cast(this), queue_type::MainCopy};
+ ur_queue_group_t CopyQueueGroup{reinterpret_cast(this),
+ queue_type::MainCopy};
const auto &Range = getRangeOfAllowedCopyEngines((ur_device_handle_t)Device);
if (Range.first < 0 || Range.second < 0) {
// We are asked not to use copy engines, just do nothing.
@@ -1182,7 +1192,7 @@ ur_queue_handle_legacy_t_::ur_queue_handle_legacy_t_(
Device->Platform->ZeDriverEventPoolCountingEventsExtensionFound;
}
-void ur_queue_handle_legacy_t_::adjustBatchSizeForFullBatch(bool IsCopy) {
+void ur_queue_handle_t_::adjustBatchSizeForFullBatch(bool IsCopy) {
auto &CommandBatch = IsCopy ? CopyCommandBatch : ComputeCommandBatch;
auto &ZeCommandListBatchConfig =
IsCopy ? ZeCommandListBatchCopyConfig : ZeCommandListBatchComputeConfig;
@@ -1209,7 +1219,7 @@ void ur_queue_handle_legacy_t_::adjustBatchSizeForFullBatch(bool IsCopy) {
}
}
-void ur_queue_handle_legacy_t_::adjustBatchSizeForPartialBatch(bool IsCopy) {
+void ur_queue_handle_t_::adjustBatchSizeForPartialBatch(bool IsCopy) {
auto &CommandBatch = IsCopy ? CopyCommandBatch : ComputeCommandBatch;
auto &ZeCommandListBatchConfig =
IsCopy ? ZeCommandListBatchCopyConfig : ZeCommandListBatchComputeConfig;
@@ -1235,14 +1245,15 @@ void ur_queue_handle_legacy_t_::adjustBatchSizeForPartialBatch(bool IsCopy) {
}
}
-ur_result_t ur_queue_handle_legacy_t_::executeCommandList(
- ur_command_list_ptr_t CommandList, bool IsBlocking, bool OKToBatchCommand) {
+ur_result_t
+ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList,
+ bool IsBlocking, bool OKToBatchCommand) {
// Do nothing if command list is already closed.
if (CommandList->second.IsClosed)
return UR_RESULT_SUCCESS;
- bool UseCopyEngine = CommandList->second.isCopy(
- reinterpret_cast(this));
+ bool UseCopyEngine =
+ CommandList->second.isCopy(reinterpret_cast(this));
// If the current LastCommandEvent is the nullptr, then it means
// either that no command has ever been issued to the queue
@@ -1349,7 +1360,7 @@ ur_result_t ur_queue_handle_legacy_t_::executeCommandList(
//
ur_event_handle_t HostVisibleEvent;
auto Res = createEventAndAssociateQueue(
- reinterpret_cast(this), &HostVisibleEvent,
+ reinterpret_cast(this), &HostVisibleEvent,
UR_EXT_COMMAND_TYPE_USER, CommandList,
/* IsInternal */ false, /* IsMultiDevice */ true,
/* HostVisible */ true);
@@ -1473,12 +1484,12 @@ ur_result_t ur_queue_handle_legacy_t_::executeCommandList(
return UR_RESULT_SUCCESS;
}
-bool ur_queue_handle_legacy_t_::doReuseDiscardedEvents() {
+bool ur_queue_handle_t_::doReuseDiscardedEvents() {
return ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents();
}
-ur_result_t ur_queue_handle_legacy_t_::resetDiscardedEvent(
- ur_command_list_ptr_t CommandList) {
+ur_result_t
+ur_queue_handle_t_::resetDiscardedEvent(ur_command_list_ptr_t CommandList) {
if (LastCommandEvent && LastCommandEvent->IsDiscarded) {
ZE2UR_CALL(zeCommandListAppendBarrier,
(CommandList->first, nullptr, 1, &(LastCommandEvent->ZeEvent)));
@@ -1511,8 +1522,7 @@ ur_result_t ur_queue_handle_legacy_t_::resetDiscardedEvent(
return UR_RESULT_SUCCESS;
}
-ur_result_t
-ur_queue_handle_legacy_t_::addEventToQueueCache(ur_event_handle_t Event) {
+ur_result_t ur_queue_handle_t_::addEventToQueueCache(ur_event_handle_t Event) {
if (!Event->IsMultiDevice) {
auto EventCachesMap = Event->isHostVisible() ? &EventCachesDeviceMap[0]
: &EventCachesDeviceMap[1];
@@ -1528,19 +1538,19 @@ ur_queue_handle_legacy_t_::addEventToQueueCache(ur_event_handle_t Event) {
return UR_RESULT_SUCCESS;
}
-void ur_queue_handle_legacy_t_::active_barriers::add(ur_event_handle_t &Event) {
+void ur_queue_handle_t_::active_barriers::add(ur_event_handle_t &Event) {
Event->RefCount.increment();
Events.push_back(Event);
}
-ur_result_t ur_queue_handle_legacy_t_::active_barriers::clear() {
+ur_result_t ur_queue_handle_t_::active_barriers::clear() {
for (const auto &Event : Events)
UR_CALL(urEventReleaseInternal(Event));
Events.clear();
return UR_RESULT_SUCCESS;
}
-void ur_queue_handle_legacy_t_::clearEndTimeRecordings() {
+void ur_queue_handle_t_::clearEndTimeRecordings() {
uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution;
const uint64_t TimestampMaxValue = Device->getTimestampMask();
@@ -1567,7 +1577,7 @@ void ur_queue_handle_legacy_t_::clearEndTimeRecordings() {
EndTimeRecordings.clear();
}
-ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue) {
+ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) {
if (!Queue->RefCount.decrementAndTest())
return UR_RESULT_SUCCESS;
@@ -1606,33 +1616,33 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue) {
return UR_RESULT_SUCCESS;
}
-bool ur_queue_handle_legacy_t_::isBatchingAllowed(bool IsCopy) const {
+bool ur_queue_handle_t_::isBatchingAllowed(bool IsCopy) const {
auto &CommandBatch = IsCopy ? CopyCommandBatch : ComputeCommandBatch;
return (CommandBatch.QueueBatchSize > 0 &&
((UrL0Serialize & UrL0SerializeBlock) == 0));
}
-bool ur_queue_handle_legacy_t_::isDiscardEvents() const {
+bool ur_queue_handle_t_::isDiscardEvents() const {
return ((this->Properties & UR_QUEUE_FLAG_DISCARD_EVENTS) != 0);
}
-bool ur_queue_handle_legacy_t_::isPriorityLow() const {
+bool ur_queue_handle_t_::isPriorityLow() const {
return ((this->Properties & UR_QUEUE_FLAG_PRIORITY_LOW) != 0);
}
-bool ur_queue_handle_legacy_t_::isPriorityHigh() const {
+bool ur_queue_handle_t_::isPriorityHigh() const {
return ((this->Properties & UR_QUEUE_FLAG_PRIORITY_HIGH) != 0);
}
-bool ur_queue_handle_legacy_t_::isBatchedSubmission() const {
+bool ur_queue_handle_t_::isBatchedSubmission() const {
return ((this->Properties & UR_QUEUE_FLAG_SUBMISSION_BATCHED) != 0);
}
-bool ur_queue_handle_legacy_t_::isImmediateSubmission() const {
+bool ur_queue_handle_t_::isImmediateSubmission() const {
return ((this->Properties & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE) != 0);
}
-bool ur_queue_handle_legacy_t_::isInOrderQueue() const {
+bool ur_queue_handle_t_::isInOrderQueue() const {
// If out-of-order queue property is not set, then this is a in-order queue.
return ((this->Properties & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) ==
0);
@@ -1662,11 +1672,11 @@ ur_result_t CleanupEventListFromResetCmdList(
// TODO: Event release in immediate commandlist mode is driven by the SYCL
// runtime. Need to investigate whether relase can be done earlier, at sync
// points such as this, to reduce total number of active Events.
-ur_result_t ur_queue_handle_legacy_t_::synchronize() {
+ur_result_t ur_queue_handle_t_::synchronize() {
if (!Healthy)
return UR_RESULT_SUCCESS;
- auto syncImmCmdList = [](ur_queue_handle_legacy_t_ *Queue,
+ auto syncImmCmdList = [](ur_queue_handle_t_ *Queue,
ur_command_list_ptr_t ImmCmdList) {
if (ImmCmdList == Queue->CommandListMap.end())
return UR_RESULT_SUCCESS;
@@ -1757,9 +1767,8 @@ ur_result_t ur_queue_handle_legacy_t_::synchronize() {
return UR_RESULT_SUCCESS;
}
-ur_event_handle_t
-ur_queue_handle_legacy_t_::getEventFromQueueCache(bool IsMultiDevice,
- bool HostVisible) {
+ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice,
+ bool HostVisible) {
std::list *Cache;
if (!IsMultiDevice) {
@@ -1791,7 +1800,7 @@ ur_queue_handle_legacy_t_::getEventFromQueueCache(bool IsMultiDevice,
// at the end of a command list batch. This will only be true if the event does
// not have dependencies or the dependencies are not for events which exist in
// this batch.
-bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine,
+bool eventCanBeBatched(ur_queue_handle_t Queue, bool UseCopyEngine,
uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList) {
auto &CommandBatch =
@@ -1821,7 +1830,7 @@ bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine,
// dependencies, then this command can be enqueued without a signal event set in
// a command list batch. The signal event will be appended at the end of the
// batch to be signalled at the end of the command list.
-ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine,
+ur_result_t setSignalEvent(ur_queue_handle_t Queue, bool UseCopyEngine,
ze_event_handle_t *ZeEvent, ur_event_handle_t *Event,
uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList,
@@ -1852,7 +1861,7 @@ ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine,
// visible pool.
// \param HostVisible tells if the event must be created in the
// host-visible pool. If not set then this function will decide.
-ur_result_t createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue,
+ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue,
ur_event_handle_t *Event,
ur_command_t CommandType,
ur_command_list_ptr_t CommandList,
@@ -1908,12 +1917,12 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue,
// event will not be waited/released by SYCL RT, so it must be destroyed by
// EventRelease in resetCommandList.
if (!IsInternal)
- UR_CALL(urEventRetain(*Event));
+ UR_CALL(ur::level_zero::urEventRetain(*Event));
return UR_RESULT_SUCCESS;
}
-void ur_queue_handle_legacy_t_::CaptureIndirectAccesses() {
+void ur_queue_handle_t_::CaptureIndirectAccesses() {
for (auto &Kernel : KernelsToBeSubmitted) {
if (!Kernel->hasIndirectAccess())
continue;
@@ -1937,8 +1946,7 @@ void ur_queue_handle_legacy_t_::CaptureIndirectAccesses() {
KernelsToBeSubmitted.clear();
}
-ur_result_t
-ur_queue_handle_legacy_t_::signalEventFromCmdListIfLastEventDiscarded(
+ur_result_t ur_queue_handle_t_::signalEventFromCmdListIfLastEventDiscarded(
ur_command_list_ptr_t CommandList) {
// We signal new event at the end of command list only if we have queue with
// discard_events property and the last command event is discarded.
@@ -1952,7 +1960,7 @@ ur_queue_handle_legacy_t_::signalEventFromCmdListIfLastEventDiscarded(
// from the host.
ur_event_handle_t Event;
UR_CALL(createEventAndAssociateQueue(
- reinterpret_cast(this), &Event,
+ reinterpret_cast(this), &Event,
UR_EXT_COMMAND_TYPE_USER, CommandList,
/* IsInternal */ false, /* IsMultiDevice */ true,
/* HostVisible */ false));
@@ -1964,7 +1972,7 @@ ur_queue_handle_legacy_t_::signalEventFromCmdListIfLastEventDiscarded(
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::executeOpenCommandList(bool IsCopy) {
+ur_result_t ur_queue_handle_t_::executeOpenCommandList(bool IsCopy) {
auto &CommandBatch = IsCopy ? CopyCommandBatch : ComputeCommandBatch;
// If there are any commands still in the open command list for this
// queue, then close and execute that command list now.
@@ -1978,7 +1986,7 @@ ur_result_t ur_queue_handle_legacy_t_::executeOpenCommandList(bool IsCopy) {
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::resetCommandList(
+ur_result_t ur_queue_handle_t_::resetCommandList(
ur_command_list_ptr_t CommandList, bool MakeAvailable,
std::vector &EventListToCleanup, bool CheckStatus) {
bool UseCopyEngine = CommandList->second.isCopy(this);
@@ -2080,7 +2088,7 @@ ur_result_t ur_queue_handle_legacy_t_::resetCommandList(
return UR_RESULT_SUCCESS;
}
-bool ur_command_list_info_t::isCopy(ur_queue_handle_legacy_t Queue) const {
+bool ur_command_list_info_t::isCopy(ur_queue_handle_t Queue) const {
return ZeQueueDesc.ordinal !=
(uint32_t)Queue->Device
->QueueGroup
@@ -2096,7 +2104,7 @@ void ur_command_list_info_t::append(ur_event_handle_t Event) {
}
ur_command_list_ptr_t
-ur_queue_handle_legacy_t_::eventOpenCommandList(ur_event_handle_t Event) {
+ur_queue_handle_t_::eventOpenCommandList(ur_event_handle_t Event) {
using IsCopy = bool;
if (UsingImmCmdLists) {
@@ -2121,15 +2129,32 @@ ur_queue_handle_legacy_t_::eventOpenCommandList(ur_event_handle_t Event) {
return CommandListMap.end();
}
-ur_queue_handle_legacy_t_::ur_queue_group_t &
-ur_queue_handle_legacy_t_::getQueueGroup(bool UseCopyEngine) {
+void ur_queue_handle_t_::ur_queue_group_t::setImmCmdList(
+ ur_queue_handle_t queue, ze_command_list_handle_t ZeCommandList) {
+ // An immediate command list was given to us but we don't have the queue
+ // descriptor information. Create a dummy and note that it is not recycleable.
+ ZeStruct ZeQueueDesc;
+
+ ImmCmdLists = std::vector(
+ 1,
+ Queue->CommandListMap
+ .insert(std::pair{
+ ZeCommandList,
+ ur_command_list_info_t(nullptr, true, false, nullptr, ZeQueueDesc,
+ queue->useCompletionBatching(), false,
+ false, true)})
+ .first);
+}
+
+ur_queue_handle_t_::ur_queue_group_t &
+ur_queue_handle_t_::getQueueGroup(bool UseCopyEngine) {
auto &Map = (UseCopyEngine ? CopyQueueGroupsByTID : ComputeQueueGroupsByTID);
return Map.get();
}
// Return the index of the next queue to use based on a
// round robin strategy and the queue group ordinal.
-uint32_t ur_queue_handle_legacy_t_::ur_queue_group_t::getQueueIndex(
+uint32_t ur_queue_handle_t_::ur_queue_group_t::getQueueIndex(
uint32_t *QueueGroupOrdinal, uint32_t *QueueIndex, bool QueryOnly) {
auto CurrentIndex = NextIndex;
@@ -2163,8 +2188,7 @@ uint32_t ur_queue_handle_legacy_t_::ur_queue_group_t::getQueueIndex(
// This function will return one of possibly multiple available native
// queues and the value of the queue group ordinal.
ze_command_queue_handle_t &
-ur_queue_handle_legacy_t_::ur_queue_group_t::getZeQueue(
- uint32_t *QueueGroupOrdinal) {
+ur_queue_handle_t_::ur_queue_group_t::getZeQueue(uint32_t *QueueGroupOrdinal) {
// QueueIndex is the proper L0 index.
// Index is the plugins concept of index, with main and link copy engines in
@@ -2209,7 +2233,7 @@ ur_queue_handle_legacy_t_::ur_queue_group_t::getZeQueue(
return ZeQueue;
}
-int32_t ur_queue_handle_legacy_t_::ur_queue_group_t::getCmdQueueOrdinal(
+int32_t ur_queue_handle_t_::ur_queue_group_t::getCmdQueueOrdinal(
ze_command_queue_handle_t CmdQueue) {
// Find out the right queue group ordinal (first queue might be "main" or
// "link")
@@ -2221,7 +2245,7 @@ int32_t ur_queue_handle_legacy_t_::ur_queue_group_t::getCmdQueueOrdinal(
return Queue->Device->QueueGroup[QueueType].ZeOrdinal;
}
-bool ur_queue_handle_legacy_t_::useCompletionBatching() {
+bool ur_queue_handle_t_::useCompletionBatching() {
static bool enabled = getenv_tobool(
"UR_L0_IMMEDIATE_COMMANDLISTS_BATCH_EVENT_COMPLETIONS", false);
return enabled && !isInOrderQueue() && UsingImmCmdLists;
@@ -2231,7 +2255,7 @@ bool ur_queue_handle_legacy_t_::useCompletionBatching() {
// fence tracking its completion. This command list & fence are added to the
// map of command lists in this queue with ZeFenceInUse = false.
// The caller must hold a lock of the queue already.
-ur_result_t ur_queue_handle_legacy_t_::createCommandList(
+ur_result_t ur_queue_handle_t_::createCommandList(
bool UseCopyEngine, ur_command_list_ptr_t &CommandList,
ze_command_queue_handle_t *ForcedCmdQueue) {
@@ -2274,8 +2298,8 @@ ur_result_t ur_queue_handle_legacy_t_::createCommandList(
}
ur_result_t
-ur_queue_handle_legacy_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList,
- bool UseCopyEngine) {
+ur_queue_handle_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList,
+ bool UseCopyEngine) {
// Early exit if there are no active barriers.
if (ActiveBarriers.empty())
return UR_RESULT_SUCCESS;
@@ -2284,7 +2308,7 @@ ur_queue_handle_legacy_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList,
_ur_ze_event_list_t ActiveBarriersWaitList;
UR_CALL(ActiveBarriersWaitList.createAndRetainUrZeEventList(
ActiveBarriers.vector().size(), ActiveBarriers.vector().data(),
- reinterpret_cast(this), UseCopyEngine));
+ reinterpret_cast(this), UseCopyEngine));
// We can now replace active barriers with the ones in the wait list.
UR_CALL(ActiveBarriers.clear());
@@ -2300,7 +2324,7 @@ ur_queue_handle_legacy_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList,
ur_event_handle_t Event = nullptr;
if (auto Res = createEventAndAssociateQueue(
- reinterpret_cast(this), &Event,
+ reinterpret_cast(this), &Event,
UR_EXT_COMMAND_TYPE_USER, CmdList,
/* IsInternal */ true, /* IsMultiDevice */ true))
return Res;
@@ -2316,7 +2340,7 @@ ur_queue_handle_legacy_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList,
return UR_RESULT_SUCCESS;
}
-ur_result_t ur_queue_handle_legacy_t_::insertStartBarrierIfDiscardEventsMode(
+ur_result_t ur_queue_handle_t_::insertStartBarrierIfDiscardEventsMode(
ur_command_list_ptr_t &CmdList) {
// If current command list is different from the last command list then insert
// a barrier waiting for the last command event.
@@ -2342,7 +2366,7 @@ static const bool UseCopyEngineForInOrderQueue = [] {
(std::stoi(CopyEngineForInOrderQueue) != 0));
}();
-bool ur_queue_handle_legacy_t_::useCopyEngine(bool PreferCopyEngine) const {
+bool ur_queue_handle_t_::useCopyEngine(bool PreferCopyEngine) const {
auto InitialCopyGroup = CopyQueueGroupsByTID.begin()->second;
return PreferCopyEngine && InitialCopyGroup.ZeQueues.size() > 0 &&
(!isInOrderQueue() || UseCopyEngineForInOrderQueue);
@@ -2350,8 +2374,7 @@ bool ur_queue_handle_legacy_t_::useCopyEngine(bool PreferCopyEngine) const {
// This function will return one of po6ssibly multiple available
// immediate commandlists associated with this Queue.
-ur_command_list_ptr_t &
-ur_queue_handle_legacy_t_::ur_queue_group_t::getImmCmdList() {
+ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() {
uint32_t QueueIndex, QueueOrdinal;
auto Index = getQueueIndex(&QueueOrdinal, &QueueIndex);
@@ -2363,6 +2386,7 @@ ur_queue_handle_legacy_t_::ur_queue_group_t::getImmCmdList() {
ZeCommandQueueDesc.ordinal = QueueOrdinal;
ZeCommandQueueDesc.index = QueueIndex;
ZeCommandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
+ bool isInOrderList = false;
const char *Priority = "Normal";
if (Queue->isPriorityLow()) {
ZeCommandQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW;
@@ -2378,6 +2402,7 @@ ur_queue_handle_legacy_t_::ur_queue_group_t::getImmCmdList() {
}
if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) {
+ isInOrderList = true;
ZeCommandQueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_IN_ORDER;
}
@@ -2426,7 +2451,7 @@ ur_queue_handle_legacy_t_::ur_queue_group_t::getImmCmdList() {
ZeCommandList,
ur_command_list_info_t(
nullptr, true, false, nullptr, ZeCommandQueueDesc,
- Queue->useCompletionBatching(), true, false, true)})
+ Queue->useCompletionBatching(), true, isInOrderList, true)})
.first;
return ImmCmdLists[Index];
@@ -2455,7 +2480,7 @@ static const size_t ImmCmdListsEventCleanupThreshold = [] {
return Threshold;
}();
-size_t ur_queue_handle_legacy_t_::getImmdCmmdListsEventCleanupThreshold() {
+size_t ur_queue_handle_t_::getImmdCmmdListsEventCleanupThreshold() {
return useCompletionBatching() ? CompletionEventsPerBatch
: ImmCmdListsEventCleanupThreshold;
}
diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp
index 97ddcf014c..699d7ec960 100644
--- a/source/adapters/level_zero/queue.hpp
+++ b/source/adapters/level_zero/queue.hpp
@@ -20,19 +20,15 @@
#include
#include
-#include
+#include
#include
#include
#include "common.hpp"
#include "device.hpp"
-#include "queue_api.hpp"
-
-struct ur_queue_handle_legacy_t_;
-using ur_queue_handle_legacy_t = ur_queue_handle_legacy_t_ *;
extern "C" {
-ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue);
+ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue);
} // extern "C"
struct ur_completion_batch;
@@ -74,8 +70,7 @@ struct ur_completion_batch {
// Seals the event batch and appends a barrier to the command list.
// Adding any further events after this, but before reset, is undefined.
- ur_result_t seal(ur_queue_handle_legacy_t queue,
- ze_command_list_handle_t cmdlist);
+ ur_result_t seal(ur_queue_handle_t queue, ze_command_list_handle_t cmdlist);
// Resets a complete batch back to an empty state. Cleanups internal state
// but keeps allocated resources for reuse.
@@ -117,7 +112,7 @@ struct ur_completion_batches {
// returned to indicate that there are no batches available.
// This is safe, but will increase how many events are associated
// with the active batch.
- ur_result_t tryCleanup(ur_queue_handle_legacy_t queue,
+ ur_result_t tryCleanup(ur_queue_handle_t queue,
ze_command_list_handle_t cmdlist,
std::vector &EventList,
std::vector &EventListToCleanup);
@@ -154,10 +149,10 @@ struct ur_completion_batches {
ur_completion_batch_it active;
};
-ur_result_t resetCommandLists(ur_queue_handle_legacy_t Queue);
+ur_result_t resetCommandLists(ur_queue_handle_t Queue);
ur_result_t
-CleanupEventsInImmCmdLists(ur_queue_handle_legacy_t UrQueue,
- bool QueueLocked = false, bool QueueSynced = false,
+CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, bool QueueLocked = false,
+ bool QueueSynced = false,
ur_event_handle_t CompletedEvent = nullptr);
// Structure describing the specific use of a command-list in a queue.
@@ -208,7 +203,7 @@ struct ur_command_list_info_t {
bool IsImmediate;
// Helper functions to tell if this is a copy command-list.
- bool isCopy(ur_queue_handle_legacy_t Queue) const;
+ bool isCopy(ur_queue_handle_t Queue) const;
// An optional event completion batching mechanism for out-of-order immediate
// command lists.
@@ -230,209 +225,23 @@ using ur_command_list_map_t =
// The iterator pointing to a specific command-list in use.
using ur_command_list_ptr_t = ur_command_list_map_t::iterator;
-struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ {
- ur_queue_handle_legacy_t_(
- std::vector &ComputeQueues,
- std::vector &CopyQueues,
- ur_context_handle_t Context, ur_device_handle_t Device,
- bool OwnZeCommandQueue, ur_queue_flags_t Properties = 0,
- int ForceComputeIndex = -1);
-
- ur_result_t queueGetInfo(ur_queue_info_t propName, size_t propSize,
- void *pPropValue, size_t *pPropSizeRet) override;
- ur_result_t queueRetain() override;
- ur_result_t queueRelease() override;
- ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *pDesc,
- ur_native_handle_t *phNativeQueue) override;
- ur_result_t queueFinish() override;
- ur_result_t queueFlush() override;
- ur_result_t enqueueKernelLaunch(ur_kernel_handle_t hKernel, uint32_t workDim,
- const size_t *pGlobalWorkOffset,
- const size_t *pGlobalWorkSize,
- const size_t *pLocalWorkSize,
- uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueEventsWait(uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t
- enqueueEventsWaitWithBarrier(uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueMemBufferRead(ur_mem_handle_t hBuffer, bool blockingRead,
- size_t offset, size_t size, void *pDst,
- uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueMemBufferWrite(ur_mem_handle_t hBuffer, bool blockingWrite,
- size_t offset, size_t size,
- const void *pSrc,
- uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueMemBufferReadRect(
- ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin,
- ur_rect_offset_t hostOrigin, ur_rect_region_t region,
- size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch,
- size_t hostSlicePitch, void *pDst, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueMemBufferWriteRect(
- ur_mem_handle_t hBuffer, bool blockingWrite,
- ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin,
- ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
- size_t hostRowPitch, size_t hostSlicePitch, void *pSrc,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueMemBufferCopy(ur_mem_handle_t hBufferSrc,
- ur_mem_handle_t hBufferDst, size_t srcOffset,
- size_t dstOffset, size_t size,
- uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueMemBufferCopyRect(
- ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst,
- ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin,
- ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch,
- size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueMemBufferFill(ur_mem_handle_t hBuffer,
- const void *pPattern, size_t patternSize,
- size_t offset, size_t size,
- uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueMemImageRead(ur_mem_handle_t hImage, bool blockingRead,
- ur_rect_offset_t origin,
- ur_rect_region_t region, size_t rowPitch,
- size_t slicePitch, void *pDst,
- uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueMemImageWrite(ur_mem_handle_t hImage, bool blockingWrite,
- ur_rect_offset_t origin,
- ur_rect_region_t region, size_t rowPitch,
- size_t slicePitch, void *pSrc,
- uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t
- enqueueMemImageCopy(ur_mem_handle_t hImageSrc, ur_mem_handle_t hImageDst,
- ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin,
- ur_rect_region_t region, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueMemBufferMap(ur_mem_handle_t hBuffer, bool blockingMap,
- ur_map_flags_t mapFlags, size_t offset,
- size_t size, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent,
- void **ppRetMap) override;
- ur_result_t enqueueMemUnmap(ur_mem_handle_t hMem, void *pMappedPtr,
- uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueUSMFill(void *pMem, size_t patternSize,
- const void *pPattern, size_t size,
- uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueUSMMemcpy(bool blocking, void *pDst, const void *pSrc,
- size_t size, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueUSMFill2D(void *, size_t, size_t, const void *, size_t,
- size_t, uint32_t, const ur_event_handle_t *,
- ur_event_handle_t *) override;
- ur_result_t enqueueUSMMemcpy2D(bool, void *, size_t, const void *, size_t,
- size_t, size_t, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) override;
- ur_result_t enqueueUSMPrefetch(const void *pMem, size_t size,
- ur_usm_migration_flags_t flags,
- uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueUSMAdvise(const void *pMem, size_t size,
- ur_usm_advice_flags_t advice,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueDeviceGlobalVariableWrite(
- ur_program_handle_t hProgram, const char *name, bool blockingWrite,
- size_t count, size_t offset, const void *pSrc,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueDeviceGlobalVariableRead(
- ur_program_handle_t hProgram, const char *name, bool blockingRead,
- size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueReadHostPipe(ur_program_handle_t hProgram,
- const char *pipe_symbol, bool blocking,
- void *pDst, size_t size,
- uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueWriteHostPipe(ur_program_handle_t hProgram,
- const char *pipe_symbol, bool blocking,
- void *pSrc, size_t size,
- uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t bindlessImagesImageCopyExp(
- const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc,
- const ur_image_desc_t *pDstImageDesc,
- const ur_image_format_t *pSrcImageFormat,
- const ur_image_format_t *pDstImageFormat,
- ur_exp_image_copy_region_t *pCopyRegion,
- ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t bindlessImagesWaitExternalSemaphoreExp(
- ur_exp_external_semaphore_handle_t hSemaphore, bool hasWaitValue,
- uint64_t waitValue, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t bindlessImagesSignalExternalSemaphoreExp(
- ur_exp_external_semaphore_handle_t hSemaphore, bool hasSignalValue,
- uint64_t signalValue, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueCooperativeKernelLaunchExp(
- ur_kernel_handle_t hKernel, uint32_t workDim,
- const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
- const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t
- enqueueTimestampRecordingExp(bool blocking, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t enqueueKernelLaunchCustomExp(
- ur_kernel_handle_t hKernel, uint32_t workDim,
- const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
- uint32_t numPropsInLaunchPropList,
- const ur_exp_launch_property_t *launchPropList,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) override;
- ur_result_t
- enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *,
- uint32_t, const ur_mem_handle_t *,
- const ur_exp_enqueue_native_command_properties_t *,
- uint32_t, const ur_event_handle_t *,
- ur_event_handle_t *) override;
+struct ur_queue_handle_t_ : _ur_object {
+ ur_queue_handle_t_(std::vector &ComputeQueues,
+ std::vector &CopyQueues,
+ ur_context_handle_t Context, ur_device_handle_t Device,
+ bool OwnZeCommandQueue, ur_queue_flags_t Properties = 0,
+ int ForceComputeIndex = -1);
using queue_type = ur_device_handle_t_::queue_group_info_t::type;
// PI queue is in general a one to many mapping to L0 native queues.
struct ur_queue_group_t {
- ur_queue_handle_legacy_t Queue;
+ ur_queue_handle_t Queue;
ur_queue_group_t() = delete;
// The Queue argument captures the enclosing PI queue.
// The Type argument specifies the type of this queue group.
// The actual ZeQueues are populated at PI queue construction.
- ur_queue_group_t(ur_queue_handle_legacy_t Queue, queue_type Type)
+ ur_queue_group_t(ur_queue_handle_t Queue, queue_type Type)
: Queue(Queue), Type(Type) {}
// The type of the queue group.
@@ -462,8 +271,7 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ {
ze_command_queue_handle_t &getZeQueue(uint32_t *QueueGroupOrdinal);
// This function sets an immediate commandlist from the interop interface.
- void setImmCmdList(ur_queue_handle_legacy_t queue,
- ze_command_list_handle_t);
+ void setImmCmdList(ur_queue_handle_t queue, ze_command_list_handle_t);
// This function returns the next immediate commandlist to use.
ur_command_list_ptr_t &getImmCmdList();
@@ -530,15 +338,15 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ {
pi_queue_group_by_tid_t CopyQueueGroupsByTID;
// Keeps the PI context to which this queue belongs.
- // This field is only set at ur_queue_handle_legacy_t_ creation time, and
+ // This field is only set at ur_queue_handle_t_ creation time, and
// cannot change. Therefore it can be accessed without holding a lock on this
- // ur_queue_handle_legacy_t_.
+ // ur_queue_handle_t_.
const ur_context_handle_t Context;
// Keeps the PI device to which this queue belongs.
- // This field is only set at ur_queue_handle_legacy_t_ creation time, and
+ // This field is only set at ur_queue_handle_t_ creation time, and
// cannot change. Therefore it can be accessed without holding a lock on this
- // ur_queue_handle_legacy_t_.
+ // ur_queue_handle_t_.
const ur_device_handle_t Device;
// A queue may use either standard or immediate commandlists. At queue
@@ -881,21 +689,10 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ {
// Threshold for cleaning up the EventList for immediate command lists.
size_t getImmdCmmdListsEventCleanupThreshold();
-};
-
-template QueueT GetQueue(ur_queue_handle_t Queue) {
- if (!Queue)
- return nullptr;
- auto *Q = dynamic_cast(Queue);
- if (!Q) {
- throw UR_RESULT_ERROR_INVALID_QUEUE;
- }
- return Q;
-}
-static inline ur_queue_handle_legacy_t Legacy(ur_queue_handle_t Queue) {
- return GetQueue(Queue);
-}
+ // Pointer to the unified handle.
+ ur_queue_handle_t_ *UnifiedHandle;
+};
// This helper function creates a ur_event_handle_t and associate a
// ur_queue_handle_t. Note that the caller of this function must have acquired
@@ -910,18 +707,16 @@ static inline ur_queue_handle_legacy_t Legacy(ur_queue_handle_t Queue) {
// multiple devices.
// \param ForceHostVisible tells if the event must be created in
// the host-visible pool
-ur_result_t
-createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue,
- ur_event_handle_t *Event, ur_command_t CommandType,
- ur_command_list_ptr_t CommandList, bool IsInternal,
- bool IsMultiDevice,
- std::optional HostVisible = std::nullopt);
+ur_result_t createEventAndAssociateQueue(
+ ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType,
+ ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice,
+ std::optional HostVisible = std::nullopt);
// This helper function checks to see if an event for a command can be included
// at the end of a command list batch. This will only be true if the event does
// not have dependencies or the dependencies are not for events which exist in
// this batch.
-bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine,
+bool eventCanBeBatched(ur_queue_handle_t Queue, bool UseCopyEngine,
uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList);
@@ -930,7 +725,7 @@ bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine,
// dependencies, then this command can be enqueued without a signal event set in
// a command list batch. The signal event will be appended at the end of the
// batch to be signalled at the end of the command list.
-ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine,
+ur_result_t setSignalEvent(ur_queue_handle_t Queue, bool UseCopyEngine,
ze_event_handle_t *ZeEvent, ur_event_handle_t *Event,
uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList,
diff --git a/source/adapters/level_zero/queue_api.cpp b/source/adapters/level_zero/queue_api.cpp
deleted file mode 100644
index 188f7c3102..0000000000
--- a/source/adapters/level_zero/queue_api.cpp
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- *
- * Copyright (C) 2024 Intel Corporation
- *
- * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
- * Exceptions. See LICENSE.TXT
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- * @file queue_api.cpp
- *
- */
-
-#include "queue_api.hpp"
-
-ur_queue_handle_t_::~ur_queue_handle_t_() {}
-
-UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue,
- ur_queue_info_t propName,
- size_t propSize,
- void *pPropValue,
- size_t *pPropSizeRet) {
- return hQueue->queueGetInfo(propName, propSize, pPropValue, pPropSizeRet);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) {
- return hQueue->queueRetain();
-}
-UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) {
- return hQueue->queueRelease();
-}
-UR_APIEXPORT ur_result_t UR_APICALL
-urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc,
- ur_native_handle_t *phNativeQueue) {
- return hQueue->queueGetNativeHandle(pDesc, phNativeQueue);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) {
- return hQueue->queueFinish();
-}
-UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) {
- return hQueue->queueFlush();
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
- ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
- const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
- const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->enqueueKernelLaunch(
- hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
- numEventsInWaitList, phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
- ur_queue_handle_t hQueue, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->enqueueEventsWait(numEventsInWaitList, phEventWaitList,
- phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
- ur_queue_handle_t hQueue, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->enqueueEventsWaitWithBarrier(numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead(
- ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead,
- size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->enqueueMemBufferRead(hBuffer, blockingRead, offset, size, pDst,
- numEventsInWaitList, phEventWaitList,
- phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite(
- ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite,
- size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->enqueueMemBufferWrite(hBuffer, blockingWrite, offset, size,
- pSrc, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect(
- ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead,
- ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin,
- ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
- size_t hostRowPitch, size_t hostSlicePitch, void *pDst,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueMemBufferReadRect(
- hBuffer, blockingRead, bufferOrigin, hostOrigin, region, bufferRowPitch,
- bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect(
- ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite,
- ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin,
- ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
- size_t hostRowPitch, size_t hostSlicePitch, void *pSrc,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueMemBufferWriteRect(
- hBuffer, blockingWrite, bufferOrigin, hostOrigin, region, bufferRowPitch,
- bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy(
- ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc,
- ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueMemBufferCopy(hBufferSrc, hBufferDst, srcOffset,
- dstOffset, size, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect(
- ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc,
- ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin,
- ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch,
- size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueMemBufferCopyRect(
- hBufferSrc, hBufferDst, srcOrigin, dstOrigin, region, srcRowPitch,
- srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill(
- ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern,
- size_t patternSize, size_t offset, size_t size,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueMemBufferFill(hBuffer, pPattern, patternSize, offset,
- size, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(
- ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead,
- ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch,
- size_t slicePitch, void *pDst, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->enqueueMemImageRead(
- hImage, blockingRead, origin, region, rowPitch, slicePitch, pDst,
- numEventsInWaitList, phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(
- ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite,
- ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch,
- size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->enqueueMemImageWrite(
- hImage, blockingWrite, origin, region, rowPitch, slicePitch, pSrc,
- numEventsInWaitList, phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
- ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc,
- ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin,
- ur_rect_offset_t dstOrigin, ur_rect_region_t region,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueMemImageCopy(hImageSrc, hImageDst, srcOrigin, dstOrigin,
- region, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap(
- ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap,
- ur_map_flags_t mapFlags, size_t offset, size_t size,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent, void **ppRetMap) {
- return hQueue->enqueueMemBufferMap(hBuffer, blockingMap, mapFlags, offset,
- size, numEventsInWaitList, phEventWaitList,
- phEvent, ppRetMap);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap(
- ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueMemUnmap(hMem, pMappedPtr, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
- ur_queue_handle_t hQueue, void *pMem, size_t patternSize,
- const void *pPattern, size_t size, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->enqueueUSMFill(pMem, patternSize, pPattern, size,
- numEventsInWaitList, phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
- ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc,
- size_t size, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->enqueueUSMMemcpy(blocking, pDst, pSrc, size,
- numEventsInWaitList, phEventWaitList,
- phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch(
- ur_queue_handle_t hQueue, const void *pMem, size_t size,
- ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->enqueueUSMPrefetch(pMem, size, flags, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL
-urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
- ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) {
- return hQueue->enqueueUSMAdvise(pMem, size, advice, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D(
- ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize,
- const void *pPattern, size_t width, size_t height,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueUSMFill2D(pMem, pitch, patternSize, pPattern, width,
- height, numEventsInWaitList, phEventWaitList,
- phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D(
- ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch,
- const void *pSrc, size_t srcPitch, size_t width, size_t height,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueUSMMemcpy2D(blocking, pDst, dstPitch, pSrc, srcPitch,
- width, height, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite(
- ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name,
- bool blockingWrite, size_t count, size_t offset, const void *pSrc,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueDeviceGlobalVariableWrite(
- hProgram, name, blockingWrite, count, offset, pSrc, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead(
- ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name,
- bool blockingRead, size_t count, size_t offset, void *pDst,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueDeviceGlobalVariableRead(
- hProgram, name, blockingRead, count, offset, pDst, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe(
- ur_queue_handle_t hQueue, ur_program_handle_t hProgram,
- const char *pipe_symbol, bool blocking, void *pDst, size_t size,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueReadHostPipe(hProgram, pipe_symbol, blocking, pDst,
- size, numEventsInWaitList, phEventWaitList,
- phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe(
- ur_queue_handle_t hQueue, ur_program_handle_t hProgram,
- const char *pipe_symbol, bool blocking, void *pSrc, size_t size,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueWriteHostPipe(hProgram, pipe_symbol, blocking, pSrc,
- size, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
- ur_queue_handle_t hQueue, const void *pSrc, void *pDst,
- const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc,
- const ur_image_format_t *pSrcImageFormat,
- const ur_image_format_t *pDstImageFormat,
- ur_exp_image_copy_region_t *pCopyRegion,
- ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->bindlessImagesImageCopyExp(
- pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat,
- pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp(
- ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore,
- bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->bindlessImagesWaitExternalSemaphoreExp(
- hSemaphore, hasWaitValue, waitValue, numEventsInWaitList, phEventWaitList,
- phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp(
- ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore,
- bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->bindlessImagesSignalExternalSemaphoreExp(
- hSemaphore, hasSignalValue, signalValue, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
- ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
- const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
- const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->enqueueCooperativeKernelLaunchExp(
- hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
- numEventsInWaitList, phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp(
- ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList,
- const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
- return hQueue->enqueueTimestampRecordingExp(blocking, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
- ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
- const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
- uint32_t numPropsInLaunchPropList,
- const ur_exp_launch_property_t *launchPropList,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueKernelLaunchCustomExp(
- hKernel, workDim, pGlobalWorkSize, pLocalWorkSize,
- numPropsInLaunchPropList, launchPropList, numEventsInWaitList,
- phEventWaitList, phEvent);
-}
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp(
- ur_queue_handle_t hQueue,
- ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data,
- uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList,
- const ur_exp_enqueue_native_command_properties_t *pProperties,
- uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
- ur_event_handle_t *phEvent) {
- return hQueue->enqueueNativeCommandExp(
- pfnNativeEnqueue, data, numMemsInMemList, phMemList, pProperties,
- numEventsInWaitList, phEventWaitList, phEvent);
-}
diff --git a/source/adapters/level_zero/queue_api.hpp b/source/adapters/level_zero/queue_api.hpp
deleted file mode 100644
index bc01596d2b..0000000000
--- a/source/adapters/level_zero/queue_api.hpp
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- *
- * Copyright (C) 2024 Intel Corporation
- *
- * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
- * Exceptions. See LICENSE.TXT
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- * @file queue_api.hpp
- *
- */
-
-#pragma once
-
-#include
-
-struct ur_queue_handle_t_ {
- virtual ~ur_queue_handle_t_();
- virtual ur_result_t queueGetInfo(ur_queue_info_t, size_t, void *,
- size_t *) = 0;
- virtual ur_result_t queueRetain() = 0;
- virtual ur_result_t queueRelease() = 0;
- virtual ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *,
- ur_native_handle_t *) = 0;
- virtual ur_result_t queueFinish() = 0;
- virtual ur_result_t queueFlush() = 0;
- virtual ur_result_t enqueueKernelLaunch(ur_kernel_handle_t, uint32_t,
- const size_t *, const size_t *,
- const size_t *, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueEventsWait(uint32_t, const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueEventsWaitWithBarrier(uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueMemBufferRead(ur_mem_handle_t, bool, size_t,
- size_t, void *, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueMemBufferWrite(ur_mem_handle_t, bool, size_t,
- size_t, const void *, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t
- enqueueMemBufferReadRect(ur_mem_handle_t, bool, ur_rect_offset_t,
- ur_rect_offset_t, ur_rect_region_t, size_t, size_t,
- size_t, size_t, void *, uint32_t,
- const ur_event_handle_t *, ur_event_handle_t *) = 0;
- virtual ur_result_t
- enqueueMemBufferWriteRect(ur_mem_handle_t, bool, ur_rect_offset_t,
- ur_rect_offset_t, ur_rect_region_t, size_t, size_t,
- size_t, size_t, void *, uint32_t,
- const ur_event_handle_t *, ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueMemBufferCopy(ur_mem_handle_t, ur_mem_handle_t,
- size_t, size_t, size_t, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t
- enqueueMemBufferCopyRect(ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t,
- ur_rect_offset_t, ur_rect_region_t, size_t, size_t,
- size_t, size_t, uint32_t, const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueMemBufferFill(ur_mem_handle_t, const void *,
- size_t, size_t, size_t, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueMemImageRead(ur_mem_handle_t, bool,
- ur_rect_offset_t, ur_rect_region_t,
- size_t, size_t, void *, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueMemImageWrite(ur_mem_handle_t, bool,
- ur_rect_offset_t, ur_rect_region_t,
- size_t, size_t, void *, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueMemImageCopy(ur_mem_handle_t, ur_mem_handle_t,
- ur_rect_offset_t, ur_rect_offset_t,
- ur_rect_region_t, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueMemBufferMap(ur_mem_handle_t, bool, ur_map_flags_t,
- size_t, size_t, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *, void **) = 0;
- virtual ur_result_t enqueueMemUnmap(ur_mem_handle_t, void *, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueUSMFill(void *, size_t, const void *, size_t,
- uint32_t, const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueUSMMemcpy(bool, void *, const void *, size_t,
- uint32_t, const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueUSMPrefetch(const void *, size_t,
- ur_usm_migration_flags_t, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueUSMAdvise(const void *, size_t,
- ur_usm_advice_flags_t,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueUSMFill2D(void *, size_t, size_t, const void *,
- size_t, size_t, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueUSMMemcpy2D(bool, void *, size_t, const void *,
- size_t, size_t, size_t, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueDeviceGlobalVariableWrite(
- ur_program_handle_t, const char *, bool, size_t, size_t, const void *,
- uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueDeviceGlobalVariableRead(
- ur_program_handle_t, const char *, bool, size_t, size_t, void *, uint32_t,
- const ur_event_handle_t *, ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueReadHostPipe(ur_program_handle_t, const char *,
- bool, void *, size_t, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueWriteHostPipe(ur_program_handle_t, const char *,
- bool, void *, size_t, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t bindlessImagesImageCopyExp(
- const void *, void *, const ur_image_desc_t *, const ur_image_desc_t *,
- const ur_image_format_t *, const ur_image_format_t *,
- ur_exp_image_copy_region_t *, ur_exp_image_copy_flags_t, uint32_t,
- const ur_event_handle_t *, ur_event_handle_t *) = 0;
- virtual ur_result_t bindlessImagesWaitExternalSemaphoreExp(
- ur_exp_external_semaphore_handle_t, bool, uint64_t, uint32_t,
- const ur_event_handle_t *, ur_event_handle_t *) = 0;
- virtual ur_result_t bindlessImagesSignalExternalSemaphoreExp(
- ur_exp_external_semaphore_handle_t, bool, uint64_t, uint32_t,
- const ur_event_handle_t *, ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueCooperativeKernelLaunchExp(
- ur_kernel_handle_t, uint32_t, const size_t *, const size_t *,
- const size_t *, uint32_t, const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueTimestampRecordingExp(bool, uint32_t,
- const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t enqueueKernelLaunchCustomExp(
- ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, uint32_t,
- const ur_exp_launch_property_t *, uint32_t, const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
- virtual ur_result_t
- enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *,
- uint32_t, const ur_mem_handle_t *,
- const ur_exp_enqueue_native_command_properties_t *,
- uint32_t, const ur_event_handle_t *,
- ur_event_handle_t *) = 0;
-};
diff --git a/source/adapters/level_zero/sampler.cpp b/source/adapters/level_zero/sampler.cpp
index 54ca1b6672..d48e6aeede 100644
--- a/source/adapters/level_zero/sampler.cpp
+++ b/source/adapters/level_zero/sampler.cpp
@@ -12,7 +12,9 @@
#include "logger/ur_logger.hpp"
#include "ur_level_zero.hpp"
-UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreate(
+namespace ur::level_zero {
+
+ur_result_t urSamplerCreate(
ur_context_handle_t Context, ///< [in] handle of the context object
const ur_sampler_desc_t
*Props, ///< [in] specifies a list of sampler property names and their
@@ -109,17 +111,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreate(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urSamplerRetain(
- ur_sampler_handle_t
- Sampler ///< [in] handle of the sampler object to get access
+ur_result_t
+urSamplerRetain(ur_sampler_handle_t
+ Sampler ///< [in] handle of the sampler object to get access
) {
Sampler->RefCount.increment();
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease(
- ur_sampler_handle_t
- Sampler ///< [in] handle of the sampler object to release
+ur_result_t
+urSamplerRelease(ur_sampler_handle_t
+ Sampler ///< [in] handle of the sampler object to release
) {
if (!Sampler->RefCount.decrementAndTest())
return UR_RESULT_SUCCESS;
@@ -133,7 +135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo(
+ur_result_t urSamplerGetInfo(
ur_sampler_handle_t Sampler, ///< [in] handle of the sampler object
ur_sampler_info_t PropName, ///< [in] name of the sampler property to query
size_t PropValueSize, ///< [in] size in bytes of the sampler property value
@@ -152,7 +154,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle(
+ur_result_t urSamplerGetNativeHandle(
ur_sampler_handle_t Sampler, ///< [in] handle of the sampler.
ur_native_handle_t *NativeSampler ///< [out] a pointer to the native
///< handle of the sampler.
@@ -164,7 +166,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle(
+ur_result_t urSamplerCreateWithNativeHandle(
ur_native_handle_t
NativeSampler, ///< [in] the native handle of the sampler.
ur_context_handle_t Context, ///< [in] handle of the context object
@@ -182,3 +184,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle(
"{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
+} // namespace ur::level_zero
diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp
index 8941f756ea..9bdd672818 100644
--- a/source/adapters/level_zero/ur_interface_loader.cpp
+++ b/source/adapters/level_zero/ur_interface_loader.cpp
@@ -1,19 +1,19 @@
-//===--------- ur_interface_loader.cpp - Level Zero Adapter----------------===//
+//===--------- ur_interface_loader.cpp - Level Zero Adapter ------------===//
//
-// Copyright (C) 2023 Intel Corporation
+// Copyright (C) 2024 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-
#include
#include
-namespace {
+#include "ur_interface_loader.hpp"
-ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) {
+static ur_result_t validateProcInputs(ur_api_version_t version,
+ void *pDdiTable) {
if (nullptr == pDdiTable) {
return UR_RESULT_ERROR_INVALID_NULL_POINTER;
}
@@ -24,475 +24,592 @@ ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) {
}
return UR_RESULT_SUCCESS;
}
-} // namespace
-#if defined(__cplusplus)
+#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO
+namespace ur::level_zero {
+#elif defined(__cplusplus)
extern "C" {
#endif
-UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_global_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable(
+ ur_api_version_t version, ur_global_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnAdapterGet = urAdapterGet;
- pDdiTable->pfnAdapterRelease = urAdapterRelease;
- pDdiTable->pfnAdapterRetain = urAdapterRetain;
- pDdiTable->pfnAdapterGetLastError = urAdapterGetLastError;
- pDdiTable->pfnAdapterGetInfo = urAdapterGetInfo;
- return retVal;
+ pDdiTable->pfnAdapterGet = ur::level_zero::urAdapterGet;
+ pDdiTable->pfnAdapterRelease = ur::level_zero::urAdapterRelease;
+ pDdiTable->pfnAdapterRetain = ur::level_zero::urAdapterRetain;
+ pDdiTable->pfnAdapterGetLastError = ur::level_zero::urAdapterGetLastError;
+ pDdiTable->pfnAdapterGetInfo = ur::level_zero::urAdapterGetInfo;
+
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_context_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable(
+ ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnCreate = urContextCreate;
- pDdiTable->pfnRetain = urContextRetain;
- pDdiTable->pfnRelease = urContextRelease;
- pDdiTable->pfnGetInfo = urContextGetInfo;
- pDdiTable->pfnGetNativeHandle = urContextGetNativeHandle;
- pDdiTable->pfnCreateWithNativeHandle = urContextCreateWithNativeHandle;
- pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter;
+ pDdiTable->pfnUnsampledImageHandleDestroyExp =
+ ur::level_zero::urBindlessImagesUnsampledImageHandleDestroyExp;
+ pDdiTable->pfnSampledImageHandleDestroyExp =
+ ur::level_zero::urBindlessImagesSampledImageHandleDestroyExp;
+ pDdiTable->pfnImageAllocateExp =
+ ur::level_zero::urBindlessImagesImageAllocateExp;
+ pDdiTable->pfnImageFreeExp = ur::level_zero::urBindlessImagesImageFreeExp;
+ pDdiTable->pfnUnsampledImageCreateExp =
+ ur::level_zero::urBindlessImagesUnsampledImageCreateExp;
+ pDdiTable->pfnSampledImageCreateExp =
+ ur::level_zero::urBindlessImagesSampledImageCreateExp;
+ pDdiTable->pfnImageCopyExp = ur::level_zero::urBindlessImagesImageCopyExp;
+ pDdiTable->pfnImageGetInfoExp =
+ ur::level_zero::urBindlessImagesImageGetInfoExp;
+ pDdiTable->pfnMipmapGetLevelExp =
+ ur::level_zero::urBindlessImagesMipmapGetLevelExp;
+ pDdiTable->pfnMipmapFreeExp = ur::level_zero::urBindlessImagesMipmapFreeExp;
+ pDdiTable->pfnImportExternalMemoryExp =
+ ur::level_zero::urBindlessImagesImportExternalMemoryExp;
+ pDdiTable->pfnMapExternalArrayExp =
+ ur::level_zero::urBindlessImagesMapExternalArrayExp;
+ pDdiTable->pfnMapExternalLinearMemoryExp =
+ ur::level_zero::urBindlessImagesMapExternalLinearMemoryExp;
+ pDdiTable->pfnReleaseExternalMemoryExp =
+ ur::level_zero::urBindlessImagesReleaseExternalMemoryExp;
+ pDdiTable->pfnImportExternalSemaphoreExp =
+ ur::level_zero::urBindlessImagesImportExternalSemaphoreExp;
+ pDdiTable->pfnReleaseExternalSemaphoreExp =
+ ur::level_zero::urBindlessImagesReleaseExternalSemaphoreExp;
+ pDdiTable->pfnWaitExternalSemaphoreExp =
+ ur::level_zero::urBindlessImagesWaitExternalSemaphoreExp;
+ pDdiTable->pfnSignalExternalSemaphoreExp =
+ ur::level_zero::urBindlessImagesSignalExternalSemaphoreExp;
- return retVal;
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_enqueue_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable(
+ ur_api_version_t version, ur_command_buffer_exp_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch;
- pDdiTable->pfnEventsWait = urEnqueueEventsWait;
- pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier;
- pDdiTable->pfnMemBufferRead = urEnqueueMemBufferRead;
- pDdiTable->pfnMemBufferWrite = urEnqueueMemBufferWrite;
- pDdiTable->pfnMemBufferReadRect = urEnqueueMemBufferReadRect;
- pDdiTable->pfnMemBufferWriteRect = urEnqueueMemBufferWriteRect;
- pDdiTable->pfnMemBufferCopy = urEnqueueMemBufferCopy;
- pDdiTable->pfnMemBufferCopyRect = urEnqueueMemBufferCopyRect;
- pDdiTable->pfnMemBufferFill = urEnqueueMemBufferFill;
- pDdiTable->pfnMemImageRead = urEnqueueMemImageRead;
- pDdiTable->pfnMemImageWrite = urEnqueueMemImageWrite;
- pDdiTable->pfnMemImageCopy = urEnqueueMemImageCopy;
- pDdiTable->pfnMemBufferMap = urEnqueueMemBufferMap;
- pDdiTable->pfnMemUnmap = urEnqueueMemUnmap;
- pDdiTable->pfnUSMFill = urEnqueueUSMFill;
- pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy;
- pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch;
- pDdiTable->pfnUSMAdvise = urEnqueueUSMAdvise;
- pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D;
- pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D;
- pDdiTable->pfnDeviceGlobalVariableWrite = urEnqueueDeviceGlobalVariableWrite;
- pDdiTable->pfnDeviceGlobalVariableRead = urEnqueueDeviceGlobalVariableRead;
-
- return retVal;
+ pDdiTable->pfnCreateExp = ur::level_zero::urCommandBufferCreateExp;
+ pDdiTable->pfnRetainExp = ur::level_zero::urCommandBufferRetainExp;
+ pDdiTable->pfnReleaseExp = ur::level_zero::urCommandBufferReleaseExp;
+ pDdiTable->pfnFinalizeExp = ur::level_zero::urCommandBufferFinalizeExp;
+ pDdiTable->pfnAppendKernelLaunchExp =
+ ur::level_zero::urCommandBufferAppendKernelLaunchExp;
+ pDdiTable->pfnAppendUSMMemcpyExp =
+ ur::level_zero::urCommandBufferAppendUSMMemcpyExp;
+ pDdiTable->pfnAppendUSMFillExp =
+ ur::level_zero::urCommandBufferAppendUSMFillExp;
+ pDdiTable->pfnAppendMemBufferCopyExp =
+ ur::level_zero::urCommandBufferAppendMemBufferCopyExp;
+ pDdiTable->pfnAppendMemBufferWriteExp =
+ ur::level_zero::urCommandBufferAppendMemBufferWriteExp;
+ pDdiTable->pfnAppendMemBufferReadExp =
+ ur::level_zero::urCommandBufferAppendMemBufferReadExp;
+ pDdiTable->pfnAppendMemBufferCopyRectExp =
+ ur::level_zero::urCommandBufferAppendMemBufferCopyRectExp;
+ pDdiTable->pfnAppendMemBufferWriteRectExp =
+ ur::level_zero::urCommandBufferAppendMemBufferWriteRectExp;
+ pDdiTable->pfnAppendMemBufferReadRectExp =
+ ur::level_zero::urCommandBufferAppendMemBufferReadRectExp;
+ pDdiTable->pfnAppendMemBufferFillExp =
+ ur::level_zero::urCommandBufferAppendMemBufferFillExp;
+ pDdiTable->pfnAppendUSMPrefetchExp =
+ ur::level_zero::urCommandBufferAppendUSMPrefetchExp;
+ pDdiTable->pfnAppendUSMAdviseExp =
+ ur::level_zero::urCommandBufferAppendUSMAdviseExp;
+ pDdiTable->pfnEnqueueExp = ur::level_zero::urCommandBufferEnqueueExp;
+ pDdiTable->pfnRetainCommandExp =
+ ur::level_zero::urCommandBufferRetainCommandExp;
+ pDdiTable->pfnReleaseCommandExp =
+ ur::level_zero::urCommandBufferReleaseCommandExp;
+ pDdiTable->pfnUpdateKernelLaunchExp =
+ ur::level_zero::urCommandBufferUpdateKernelLaunchExp;
+ pDdiTable->pfnGetInfoExp = ur::level_zero::urCommandBufferGetInfoExp;
+ pDdiTable->pfnCommandGetInfoExp =
+ ur::level_zero::urCommandBufferCommandGetInfoExp;
+
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_event_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable(
+ ur_api_version_t version, ur_context_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnGetInfo = urEventGetInfo;
- pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo;
- pDdiTable->pfnWait = urEventWait;
- pDdiTable->pfnRetain = urEventRetain;
- pDdiTable->pfnRelease = urEventRelease;
- pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle;
- pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle;
- pDdiTable->pfnSetCallback = urEventSetCallback;
-
- return retVal;
+
+ pDdiTable->pfnCreate = ur::level_zero::urContextCreate;
+ pDdiTable->pfnRetain = ur::level_zero::urContextRetain;
+ pDdiTable->pfnRelease = ur::level_zero::urContextRelease;
+ pDdiTable->pfnGetInfo = ur::level_zero::urContextGetInfo;
+ pDdiTable->pfnGetNativeHandle = ur::level_zero::urContextGetNativeHandle;
+ pDdiTable->pfnCreateWithNativeHandle =
+ ur::level_zero::urContextCreateWithNativeHandle;
+ pDdiTable->pfnSetExtendedDeleter =
+ ur::level_zero::urContextSetExtendedDeleter;
+
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_kernel_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable(
+ ur_api_version_t version, ur_enqueue_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnCreate = urKernelCreate;
- pDdiTable->pfnGetInfo = urKernelGetInfo;
- pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo;
- pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo;
- pDdiTable->pfnRetain = urKernelRetain;
- pDdiTable->pfnRelease = urKernelRelease;
- pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle;
- pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle;
- pDdiTable->pfnSetArgValue = urKernelSetArgValue;
- pDdiTable->pfnSetArgLocal = urKernelSetArgLocal;
- pDdiTable->pfnSetArgPointer = urKernelSetArgPointer;
- pDdiTable->pfnSetExecInfo = urKernelSetExecInfo;
- pDdiTable->pfnSetArgSampler = urKernelSetArgSampler;
- pDdiTable->pfnSetArgMemObj = urKernelSetArgMemObj;
- pDdiTable->pfnSetSpecializationConstants = urKernelSetSpecializationConstants;
- pDdiTable->pfnGetSuggestedLocalWorkSize = urKernelGetSuggestedLocalWorkSize;
- return retVal;
+
+ pDdiTable->pfnKernelLaunch = ur::level_zero::urEnqueueKernelLaunch;
+ pDdiTable->pfnEventsWait = ur::level_zero::urEnqueueEventsWait;
+ pDdiTable->pfnEventsWaitWithBarrier =
+ ur::level_zero::urEnqueueEventsWaitWithBarrier;
+ pDdiTable->pfnMemBufferRead = ur::level_zero::urEnqueueMemBufferRead;
+ pDdiTable->pfnMemBufferWrite = ur::level_zero::urEnqueueMemBufferWrite;
+ pDdiTable->pfnMemBufferReadRect = ur::level_zero::urEnqueueMemBufferReadRect;
+ pDdiTable->pfnMemBufferWriteRect =
+ ur::level_zero::urEnqueueMemBufferWriteRect;
+ pDdiTable->pfnMemBufferCopy = ur::level_zero::urEnqueueMemBufferCopy;
+ pDdiTable->pfnMemBufferCopyRect = ur::level_zero::urEnqueueMemBufferCopyRect;
+ pDdiTable->pfnMemBufferFill = ur::level_zero::urEnqueueMemBufferFill;
+ pDdiTable->pfnMemImageRead = ur::level_zero::urEnqueueMemImageRead;
+ pDdiTable->pfnMemImageWrite = ur::level_zero::urEnqueueMemImageWrite;
+ pDdiTable->pfnMemImageCopy = ur::level_zero::urEnqueueMemImageCopy;
+ pDdiTable->pfnMemBufferMap = ur::level_zero::urEnqueueMemBufferMap;
+ pDdiTable->pfnMemUnmap = ur::level_zero::urEnqueueMemUnmap;
+ pDdiTable->pfnUSMFill = ur::level_zero::urEnqueueUSMFill;
+ pDdiTable->pfnUSMMemcpy = ur::level_zero::urEnqueueUSMMemcpy;
+ pDdiTable->pfnUSMPrefetch = ur::level_zero::urEnqueueUSMPrefetch;
+ pDdiTable->pfnUSMAdvise = ur::level_zero::urEnqueueUSMAdvise;
+ pDdiTable->pfnUSMFill2D = ur::level_zero::urEnqueueUSMFill2D;
+ pDdiTable->pfnUSMMemcpy2D = ur::level_zero::urEnqueueUSMMemcpy2D;
+ pDdiTable->pfnDeviceGlobalVariableWrite =
+ ur::level_zero::urEnqueueDeviceGlobalVariableWrite;
+ pDdiTable->pfnDeviceGlobalVariableRead =
+ ur::level_zero::urEnqueueDeviceGlobalVariableRead;
+ pDdiTable->pfnReadHostPipe = ur::level_zero::urEnqueueReadHostPipe;
+ pDdiTable->pfnWriteHostPipe = ur::level_zero::urEnqueueWriteHostPipe;
+
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetMemProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_mem_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
+ ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnImageCreate = urMemImageCreate;
- pDdiTable->pfnBufferCreate = urMemBufferCreate;
- pDdiTable->pfnRetain = urMemRetain;
- pDdiTable->pfnRelease = urMemRelease;
- pDdiTable->pfnBufferPartition = urMemBufferPartition;
- pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle;
- pDdiTable->pfnBufferCreateWithNativeHandle =
- urMemBufferCreateWithNativeHandle;
- pDdiTable->pfnImageCreateWithNativeHandle = urMemImageCreateWithNativeHandle;
- pDdiTable->pfnGetInfo = urMemGetInfo;
- pDdiTable->pfnImageGetInfo = urMemImageGetInfo;
- return retVal;
+ pDdiTable->pfnKernelLaunchCustomExp =
+ ur::level_zero::urEnqueueKernelLaunchCustomExp;
+ pDdiTable->pfnCooperativeKernelLaunchExp =
+ ur::level_zero::urEnqueueCooperativeKernelLaunchExp;
+ pDdiTable->pfnTimestampRecordingExp =
+ ur::level_zero::urEnqueueTimestampRecordingExp;
+ pDdiTable->pfnNativeCommandExp = ur::level_zero::urEnqueueNativeCommandExp;
+
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_platform_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable(
+ ur_api_version_t version, ur_event_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnGet = urPlatformGet;
- pDdiTable->pfnGetInfo = urPlatformGetInfo;
- pDdiTable->pfnGetNativeHandle = urPlatformGetNativeHandle;
- pDdiTable->pfnCreateWithNativeHandle = urPlatformCreateWithNativeHandle;
- pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion;
- pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption;
-
- return retVal;
-}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_program_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
+ pDdiTable->pfnGetInfo = ur::level_zero::urEventGetInfo;
+ pDdiTable->pfnGetProfilingInfo = ur::level_zero::urEventGetProfilingInfo;
+ pDdiTable->pfnWait = ur::level_zero::urEventWait;
+ pDdiTable->pfnRetain = ur::level_zero::urEventRetain;
+ pDdiTable->pfnRelease = ur::level_zero::urEventRelease;
+ pDdiTable->pfnGetNativeHandle = ur::level_zero::urEventGetNativeHandle;
+ pDdiTable->pfnCreateWithNativeHandle =
+ ur::level_zero::urEventCreateWithNativeHandle;
+ pDdiTable->pfnSetCallback = ur::level_zero::urEventSetCallback;
+
+ return result;
+}
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
+ ur_api_version_t version, ur_kernel_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnCreateWithIL = urProgramCreateWithIL;
- pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary;
- pDdiTable->pfnBuild = urProgramBuild;
- pDdiTable->pfnCompile = urProgramCompile;
- pDdiTable->pfnLink = urProgramLink;
- pDdiTable->pfnRetain = urProgramRetain;
- pDdiTable->pfnRelease = urProgramRelease;
- pDdiTable->pfnGetFunctionPointer = urProgramGetFunctionPointer;
- pDdiTable->pfnGetGlobalVariablePointer = urProgramGetGlobalVariablePointer;
- pDdiTable->pfnGetInfo = urProgramGetInfo;
- pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo;
+
+ pDdiTable->pfnCreate = ur::level_zero::urKernelCreate;
+ pDdiTable->pfnGetInfo = ur::level_zero::urKernelGetInfo;
+ pDdiTable->pfnGetGroupInfo = ur::level_zero::urKernelGetGroupInfo;
+ pDdiTable->pfnGetSubGroupInfo = ur::level_zero::urKernelGetSubGroupInfo;
+ pDdiTable->pfnRetain = ur::level_zero::urKernelRetain;
+ pDdiTable->pfnRelease = ur::level_zero::urKernelRelease;
+ pDdiTable->pfnGetNativeHandle = ur::level_zero::urKernelGetNativeHandle;
+ pDdiTable->pfnCreateWithNativeHandle =
+ ur::level_zero::urKernelCreateWithNativeHandle;
+ pDdiTable->pfnGetSuggestedLocalWorkSize =
+ ur::level_zero::urKernelGetSuggestedLocalWorkSize;
+ pDdiTable->pfnSetArgValue = ur::level_zero::urKernelSetArgValue;
+ pDdiTable->pfnSetArgLocal = ur::level_zero::urKernelSetArgLocal;
+ pDdiTable->pfnSetArgPointer = ur::level_zero::urKernelSetArgPointer;
+ pDdiTable->pfnSetExecInfo = ur::level_zero::urKernelSetExecInfo;
+ pDdiTable->pfnSetArgSampler = ur::level_zero::urKernelSetArgSampler;
+ pDdiTable->pfnSetArgMemObj = ur::level_zero::urKernelSetArgMemObj;
pDdiTable->pfnSetSpecializationConstants =
- urProgramSetSpecializationConstants;
- pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle;
- pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle;
+ ur::level_zero::urKernelSetSpecializationConstants;
- return retVal;
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_queue_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable(
+ ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnGetInfo = urQueueGetInfo;
- pDdiTable->pfnCreate = urQueueCreate;
- pDdiTable->pfnRetain = urQueueRetain;
- pDdiTable->pfnRelease = urQueueRelease;
- pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle;
- pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle;
- pDdiTable->pfnFinish = urQueueFinish;
- pDdiTable->pfnFlush = urQueueFlush;
+ pDdiTable->pfnSuggestMaxCooperativeGroupCountExp =
+ ur::level_zero::urKernelSuggestMaxCooperativeGroupCountExp;
- return retVal;
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_sampler_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL
+urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnCreate = urSamplerCreate;
- pDdiTable->pfnRetain = urSamplerRetain;
- pDdiTable->pfnRelease = urSamplerRelease;
- pDdiTable->pfnGetInfo = urSamplerGetInfo;
- pDdiTable->pfnGetNativeHandle = urSamplerGetNativeHandle;
- pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle;
-
- return retVal;
+
+ pDdiTable->pfnImageCreate = ur::level_zero::urMemImageCreate;
+ pDdiTable->pfnBufferCreate = ur::level_zero::urMemBufferCreate;
+ pDdiTable->pfnRetain = ur::level_zero::urMemRetain;
+ pDdiTable->pfnRelease = ur::level_zero::urMemRelease;
+ pDdiTable->pfnBufferPartition = ur::level_zero::urMemBufferPartition;
+ pDdiTable->pfnGetNativeHandle = ur::level_zero::urMemGetNativeHandle;
+ pDdiTable->pfnBufferCreateWithNativeHandle =
+ ur::level_zero::urMemBufferCreateWithNativeHandle;
+ pDdiTable->pfnImageCreateWithNativeHandle =
+ ur::level_zero::urMemImageCreateWithNativeHandle;
+ pDdiTable->pfnGetInfo = ur::level_zero::urMemGetInfo;
+ pDdiTable->pfnImageGetInfo = ur::level_zero::urMemImageGetInfo;
+
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_usm_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable(
+ ur_api_version_t version, ur_physical_mem_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnHostAlloc = urUSMHostAlloc;
- pDdiTable->pfnDeviceAlloc = urUSMDeviceAlloc;
- pDdiTable->pfnSharedAlloc = urUSMSharedAlloc;
- pDdiTable->pfnFree = urUSMFree;
- pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo;
- pDdiTable->pfnPoolCreate = urUSMPoolCreate;
- pDdiTable->pfnPoolRetain = urUSMPoolRetain;
- pDdiTable->pfnPoolRelease = urUSMPoolRelease;
- pDdiTable->pfnPoolGetInfo = urUSMPoolGetInfo;
-
- return retVal;
+ pDdiTable->pfnCreate = ur::level_zero::urPhysicalMemCreate;
+ pDdiTable->pfnRetain = ur::level_zero::urPhysicalMemRetain;
+ pDdiTable->pfnRelease = ur::level_zero::urPhysicalMemRelease;
+
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_device_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable(
+ ur_api_version_t version, ur_platform_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnGet = urDeviceGet;
- pDdiTable->pfnGetInfo = urDeviceGetInfo;
- pDdiTable->pfnRetain = urDeviceRetain;
- pDdiTable->pfnRelease = urDeviceRelease;
- pDdiTable->pfnPartition = urDevicePartition;
- pDdiTable->pfnSelectBinary = urDeviceSelectBinary;
- pDdiTable->pfnGetNativeHandle = urDeviceGetNativeHandle;
- pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle;
- pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps;
-
- return retVal;
+
+ pDdiTable->pfnGet = ur::level_zero::urPlatformGet;
+ pDdiTable->pfnGetInfo = ur::level_zero::urPlatformGetInfo;
+ pDdiTable->pfnGetNativeHandle = ur::level_zero::urPlatformGetNativeHandle;
+ pDdiTable->pfnCreateWithNativeHandle =
+ ur::level_zero::urPlatformCreateWithNativeHandle;
+ pDdiTable->pfnGetApiVersion = ur::level_zero::urPlatformGetApiVersion;
+ pDdiTable->pfnGetBackendOption = ur::level_zero::urPlatformGetBackendOption;
+
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_command_buffer_exp_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable(
+ ur_api_version_t version, ur_program_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnCreateExp = urCommandBufferCreateExp;
- pDdiTable->pfnRetainExp = urCommandBufferRetainExp;
- pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp;
- pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp;
- pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp;
- pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp;
- pDdiTable->pfnAppendUSMFillExp = urCommandBufferAppendUSMFillExp;
- pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp;
- pDdiTable->pfnAppendMemBufferCopyRectExp =
- urCommandBufferAppendMemBufferCopyRectExp;
- pDdiTable->pfnAppendMemBufferReadExp = urCommandBufferAppendMemBufferReadExp;
- pDdiTable->pfnAppendMemBufferReadRectExp =
- urCommandBufferAppendMemBufferReadRectExp;
- pDdiTable->pfnAppendMemBufferWriteExp =
- urCommandBufferAppendMemBufferWriteExp;
- pDdiTable->pfnAppendMemBufferWriteRectExp =
- urCommandBufferAppendMemBufferWriteRectExp;
- pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp;
- pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp;
- pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp;
- pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp;
- pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp;
- pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp;
- pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp;
- pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp;
- pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp;
-
- return retVal;
+
+ pDdiTable->pfnCreateWithIL = ur::level_zero::urProgramCreateWithIL;
+ pDdiTable->pfnCreateWithBinary = ur::level_zero::urProgramCreateWithBinary;
+ pDdiTable->pfnBuild = ur::level_zero::urProgramBuild;
+ pDdiTable->pfnCompile = ur::level_zero::urProgramCompile;
+ pDdiTable->pfnLink = ur::level_zero::urProgramLink;
+ pDdiTable->pfnRetain = ur::level_zero::urProgramRetain;
+ pDdiTable->pfnRelease = ur::level_zero::urProgramRelease;
+ pDdiTable->pfnGetFunctionPointer =
+ ur::level_zero::urProgramGetFunctionPointer;
+ pDdiTable->pfnGetGlobalVariablePointer =
+ ur::level_zero::urProgramGetGlobalVariablePointer;
+ pDdiTable->pfnGetInfo = ur::level_zero::urProgramGetInfo;
+ pDdiTable->pfnGetBuildInfo = ur::level_zero::urProgramGetBuildInfo;
+ pDdiTable->pfnSetSpecializationConstants =
+ ur::level_zero::urProgramSetSpecializationConstants;
+ pDdiTable->pfnGetNativeHandle = ur::level_zero::urProgramGetNativeHandle;
+ pDdiTable->pfnCreateWithNativeHandle =
+ ur::level_zero::urProgramCreateWithNativeHandle;
+
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable(
- ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable(
+ ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnEnablePeerAccessExp = urUsmP2PEnablePeerAccessExp;
- pDdiTable->pfnDisablePeerAccessExp = urUsmP2PDisablePeerAccessExp;
- pDdiTable->pfnPeerAccessGetInfoExp = urUsmP2PPeerAccessGetInfoExp;
- return retVal;
+ pDdiTable->pfnBuildExp = ur::level_zero::urProgramBuildExp;
+ pDdiTable->pfnCompileExp = ur::level_zero::urProgramCompileExp;
+ pDdiTable->pfnLinkExp = ur::level_zero::urProgramLinkExp;
+
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable(
- ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) {
+UR_APIEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable(
+ ur_api_version_t version, ur_queue_dditable_t *pDdiTable) {
auto result = validateProcInputs(version, pDdiTable);
if (UR_RESULT_SUCCESS != result) {
return result;
}
- pDdiTable->pfnUnsampledImageHandleDestroyExp =
- urBindlessImagesUnsampledImageHandleDestroyExp;
- pDdiTable->pfnSampledImageHandleDestroyExp =
- urBindlessImagesSampledImageHandleDestroyExp;
- pDdiTable->pfnImageAllocateExp = urBindlessImagesImageAllocateExp;
- pDdiTable->pfnImageFreeExp = urBindlessImagesImageFreeExp;
- pDdiTable->pfnUnsampledImageCreateExp =
- urBindlessImagesUnsampledImageCreateExp;
- pDdiTable->pfnSampledImageCreateExp = urBindlessImagesSampledImageCreateExp;
- pDdiTable->pfnImageCopyExp = urBindlessImagesImageCopyExp;
- pDdiTable->pfnImageGetInfoExp = urBindlessImagesImageGetInfoExp;
- pDdiTable->pfnMipmapGetLevelExp = urBindlessImagesMipmapGetLevelExp;
- pDdiTable->pfnMipmapFreeExp = urBindlessImagesMipmapFreeExp;
- pDdiTable->pfnImportExternalMemoryExp =
- urBindlessImagesImportExternalMemoryExp;
- pDdiTable->pfnMapExternalArrayExp = urBindlessImagesMapExternalArrayExp;
- pDdiTable->pfnMapExternalLinearMemoryExp =
- urBindlessImagesMapExternalLinearMemoryExp;
- pDdiTable->pfnReleaseExternalMemoryExp =
- urBindlessImagesReleaseExternalMemoryExp;
- pDdiTable->pfnImportExternalSemaphoreExp =
- urBindlessImagesImportExternalSemaphoreExp;
- pDdiTable->pfnReleaseExternalSemaphoreExp =
- urBindlessImagesReleaseExternalSemaphoreExp;
- pDdiTable->pfnWaitExternalSemaphoreExp =
- urBindlessImagesWaitExternalSemaphoreExp;
- pDdiTable->pfnSignalExternalSemaphoreExp =
- urBindlessImagesSignalExternalSemaphoreExp;
- return UR_RESULT_SUCCESS;
+
+ pDdiTable->pfnGetInfo = ur::level_zero::urQueueGetInfo;
+ pDdiTable->pfnCreate = ur::level_zero::urQueueCreate;
+ pDdiTable->pfnRetain = ur::level_zero::urQueueRetain;
+ pDdiTable->pfnRelease = ur::level_zero::urQueueRelease;
+ pDdiTable->pfnGetNativeHandle = ur::level_zero::urQueueGetNativeHandle;
+ pDdiTable->pfnCreateWithNativeHandle =
+ ur::level_zero::urQueueCreateWithNativeHandle;
+ pDdiTable->pfnFinish = ur::level_zero::urQueueFinish;
+ pDdiTable->pfnFlush = ur::level_zero::urQueueFlush;
+
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable(
- ur_api_version_t version, ur_usm_exp_dditable_t *pDdiTable) {
+UR_APIEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable(
+ ur_api_version_t version, ur_sampler_dditable_t *pDdiTable) {
auto result = validateProcInputs(version, pDdiTable);
if (UR_RESULT_SUCCESS != result) {
return result;
}
- pDdiTable->pfnPitchedAllocExp = urUSMPitchedAllocExp;
- pDdiTable->pfnImportExp = urUSMImportExp;
- pDdiTable->pfnReleaseExp = urUSMReleaseExp;
- return UR_RESULT_SUCCESS;
+
+ pDdiTable->pfnCreate = ur::level_zero::urSamplerCreate;
+ pDdiTable->pfnRetain = ur::level_zero::urSamplerRetain;
+ pDdiTable->pfnRelease = ur::level_zero::urSamplerRelease;
+ pDdiTable->pfnGetInfo = ur::level_zero::urSamplerGetInfo;
+ pDdiTable->pfnGetNativeHandle = ur::level_zero::urSamplerGetNativeHandle;
+ pDdiTable->pfnCreateWithNativeHandle =
+ ur::level_zero::urSamplerCreateWithNativeHandle;
+
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_virtual_mem_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL
+urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnFree = urVirtualMemFree;
- pDdiTable->pfnGetInfo = urVirtualMemGetInfo;
- pDdiTable->pfnGranularityGetInfo = urVirtualMemGranularityGetInfo;
- pDdiTable->pfnMap = urVirtualMemMap;
- pDdiTable->pfnReserve = urVirtualMemReserve;
- pDdiTable->pfnSetAccess = urVirtualMemSetAccess;
- pDdiTable->pfnUnmap = urVirtualMemUnmap;
-
- return retVal;
+ pDdiTable->pfnHostAlloc = ur::level_zero::urUSMHostAlloc;
+ pDdiTable->pfnDeviceAlloc = ur::level_zero::urUSMDeviceAlloc;
+ pDdiTable->pfnSharedAlloc = ur::level_zero::urUSMSharedAlloc;
+ pDdiTable->pfnFree = ur::level_zero::urUSMFree;
+ pDdiTable->pfnGetMemAllocInfo = ur::level_zero::urUSMGetMemAllocInfo;
+ pDdiTable->pfnPoolCreate = ur::level_zero::urUSMPoolCreate;
+ pDdiTable->pfnPoolRetain = ur::level_zero::urUSMPoolRetain;
+ pDdiTable->pfnPoolRelease = ur::level_zero::urUSMPoolRelease;
+ pDdiTable->pfnPoolGetInfo = ur::level_zero::urUSMPoolGetInfo;
+
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable(
- ur_api_version_t version, ///< [in] API version requested
- ur_physical_mem_dditable_t
- *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
- auto retVal = validateProcInputs(version, pDdiTable);
- if (UR_RESULT_SUCCESS != retVal) {
- return retVal;
+UR_APIEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable(
+ ur_api_version_t version, ur_usm_exp_dditable_t *pDdiTable) {
+ auto result = validateProcInputs(version, pDdiTable);
+ if (UR_RESULT_SUCCESS != result) {
+ return result;
}
- pDdiTable->pfnCreate = urPhysicalMemCreate;
- pDdiTable->pfnRelease = urPhysicalMemRelease;
- pDdiTable->pfnRetain = urPhysicalMemRetain;
+ pDdiTable->pfnPitchedAllocExp = ur::level_zero::urUSMPitchedAllocExp;
+ pDdiTable->pfnImportExp = ur::level_zero::urUSMImportExp;
+ pDdiTable->pfnReleaseExp = ur::level_zero::urUSMReleaseExp;
- return retVal;
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
- ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) {
+UR_APIEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable(
+ ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) {
auto result = validateProcInputs(version, pDdiTable);
if (UR_RESULT_SUCCESS != result) {
return result;
}
- pDdiTable->pfnCooperativeKernelLaunchExp =
- urEnqueueCooperativeKernelLaunchExp;
- pDdiTable->pfnTimestampRecordingExp = urEnqueueTimestampRecordingExp;
- pDdiTable->pfnNativeCommandExp = urEnqueueNativeCommandExp;
+ pDdiTable->pfnEnablePeerAccessExp =
+ ur::level_zero::urUsmP2PEnablePeerAccessExp;
+ pDdiTable->pfnDisablePeerAccessExp =
+ ur::level_zero::urUsmP2PDisablePeerAccessExp;
+ pDdiTable->pfnPeerAccessGetInfoExp =
+ ur::level_zero::urUsmP2PPeerAccessGetInfoExp;
- return UR_RESULT_SUCCESS;
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable(
- ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) {
+UR_APIEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable(
+ ur_api_version_t version, ur_virtual_mem_dditable_t *pDdiTable) {
auto result = validateProcInputs(version, pDdiTable);
if (UR_RESULT_SUCCESS != result) {
return result;
}
- pDdiTable->pfnSuggestMaxCooperativeGroupCountExp =
- urKernelSuggestMaxCooperativeGroupCountExp;
+ pDdiTable->pfnGranularityGetInfo =
+ ur::level_zero::urVirtualMemGranularityGetInfo;
+ pDdiTable->pfnReserve = ur::level_zero::urVirtualMemReserve;
+ pDdiTable->pfnFree = ur::level_zero::urVirtualMemFree;
+ pDdiTable->pfnMap = ur::level_zero::urVirtualMemMap;
+ pDdiTable->pfnUnmap = ur::level_zero::urVirtualMemUnmap;
+ pDdiTable->pfnSetAccess = ur::level_zero::urVirtualMemSetAccess;
+ pDdiTable->pfnGetInfo = ur::level_zero::urVirtualMemGetInfo;
- return UR_RESULT_SUCCESS;
+ return result;
}
-UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable(
- ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) {
+UR_APIEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable(
+ ur_api_version_t version, ur_device_dditable_t *pDdiTable) {
auto result = validateProcInputs(version, pDdiTable);
if (UR_RESULT_SUCCESS != result) {
return result;
}
- pDdiTable->pfnBuildExp = urProgramBuildExp;
- pDdiTable->pfnCompileExp = urProgramCompileExp;
- pDdiTable->pfnLinkExp = urProgramLinkExp;
-
- return UR_RESULT_SUCCESS;
+ pDdiTable->pfnGet = ur::level_zero::urDeviceGet;
+ pDdiTable->pfnGetInfo = ur::level_zero::urDeviceGetInfo;
+ pDdiTable->pfnRetain = ur::level_zero::urDeviceRetain;
+ pDdiTable->pfnRelease = ur::level_zero::urDeviceRelease;
+ pDdiTable->pfnPartition = ur::level_zero::urDevicePartition;
+ pDdiTable->pfnSelectBinary = ur::level_zero::urDeviceSelectBinary;
+ pDdiTable->pfnGetNativeHandle = ur::level_zero::urDeviceGetNativeHandle;
+ pDdiTable->pfnCreateWithNativeHandle =
+ ur::level_zero::urDeviceCreateWithNativeHandle;
+ pDdiTable->pfnGetGlobalTimestamps =
+ ur::level_zero::urDeviceGetGlobalTimestamps;
+
+ return result;
}
-#if defined(__cplusplus)
+
+#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO
+} // namespace ur::level_zero
+#elif defined(__cplusplus)
} // extern "C"
#endif
+
+#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO
+namespace ur::level_zero {
+ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) {
+ if (ddi == nullptr) {
+ return UR_RESULT_ERROR_INVALID_NULL_POINTER;
+ }
+
+ ur_result_t result;
+
+ result = ur::level_zero::urGetGlobalProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->Global);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetBindlessImagesExpProcAddrTable(
+ UR_API_VERSION_CURRENT, &ddi->BindlessImagesExp);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetCommandBufferExpProcAddrTable(
+ UR_API_VERSION_CURRENT, &ddi->CommandBufferExp);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetContextProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->Context);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetEnqueueProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->Enqueue);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->EnqueueExp);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetEventProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->Event);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetKernelProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->Kernel);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->KernelExp);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result =
+ ur::level_zero::urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Mem);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetPhysicalMemProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->PhysicalMem);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetPlatformProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->Platform);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetProgramProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->Program);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->ProgramExp);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetQueueProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->Queue);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->Sampler);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result =
+ ur::level_zero::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &ddi->USM);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetUSMExpProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->USMExp);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetUsmP2PExpProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->UsmP2PExp);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetVirtualMemProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->VirtualMem);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+ result = ur::level_zero::urGetDeviceProcAddrTable(UR_API_VERSION_CURRENT,
+ &ddi->Device);
+ if (result != UR_RESULT_SUCCESS)
+ return result;
+
+ return result;
+}
+} // namespace ur::level_zero
+#endif
diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp
new file mode 100644
index 0000000000..f95625dd5b
--- /dev/null
+++ b/source/adapters/level_zero/ur_interface_loader.hpp
@@ -0,0 +1,706 @@
+//===--------- ur_interface_loader.hpp - Level Zero Adapter ------------===//
+//
+// Copyright (C) 2024 Intel Corporation
+//
+// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
+// Exceptions. See LICENSE.TXT
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include
+#include
+
+namespace ur::level_zero {
+ur_result_t urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters,
+ uint32_t *pNumAdapters);
+ur_result_t urAdapterRelease(ur_adapter_handle_t hAdapter);
+ur_result_t urAdapterRetain(ur_adapter_handle_t hAdapter);
+ur_result_t urAdapterGetLastError(ur_adapter_handle_t hAdapter,
+ const char **ppMessage, int32_t *pError);
+ur_result_t urAdapterGetInfo(ur_adapter_handle_t hAdapter,
+ ur_adapter_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet);
+ur_result_t urPlatformGet(ur_adapter_handle_t *phAdapters, uint32_t NumAdapters,
+ uint32_t NumEntries,
+ ur_platform_handle_t *phPlatforms,
+ uint32_t *pNumPlatforms);
+ur_result_t urPlatformGetInfo(ur_platform_handle_t hPlatform,
+ ur_platform_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet);
+ur_result_t urPlatformGetApiVersion(ur_platform_handle_t hPlatform,
+ ur_api_version_t *pVersion);
+ur_result_t urPlatformGetNativeHandle(ur_platform_handle_t hPlatform,
+ ur_native_handle_t *phNativePlatform);
+ur_result_t urPlatformCreateWithNativeHandle(
+ ur_native_handle_t hNativePlatform, ur_adapter_handle_t hAdapter,
+ const ur_platform_native_properties_t *pProperties,
+ ur_platform_handle_t *phPlatform);
+ur_result_t urPlatformGetBackendOption(ur_platform_handle_t hPlatform,
+ const char *pFrontendOption,
+ const char **ppPlatformOption);
+ur_result_t urDeviceGet(ur_platform_handle_t hPlatform,
+ ur_device_type_t DeviceType, uint32_t NumEntries,
+ ur_device_handle_t *phDevices, uint32_t *pNumDevices);
+ur_result_t urDeviceGetInfo(ur_device_handle_t hDevice,
+ ur_device_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet);
+ur_result_t urDeviceRetain(ur_device_handle_t hDevice);
+ur_result_t urDeviceRelease(ur_device_handle_t hDevice);
+ur_result_t
+urDevicePartition(ur_device_handle_t hDevice,
+ const ur_device_partition_properties_t *pProperties,
+ uint32_t NumDevices, ur_device_handle_t *phSubDevices,
+ uint32_t *pNumDevicesRet);
+ur_result_t urDeviceSelectBinary(ur_device_handle_t hDevice,
+ const ur_device_binary_t *pBinaries,
+ uint32_t NumBinaries,
+ uint32_t *pSelectedBinary);
+ur_result_t urDeviceGetNativeHandle(ur_device_handle_t hDevice,
+ ur_native_handle_t *phNativeDevice);
+ur_result_t
+urDeviceCreateWithNativeHandle(ur_native_handle_t hNativeDevice,
+ ur_adapter_handle_t hAdapter,
+ const ur_device_native_properties_t *pProperties,
+ ur_device_handle_t *phDevice);
+ur_result_t urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice,
+ uint64_t *pDeviceTimestamp,
+ uint64_t *pHostTimestamp);
+ur_result_t urContextCreate(uint32_t DeviceCount,
+ const ur_device_handle_t *phDevices,
+ const ur_context_properties_t *pProperties,
+ ur_context_handle_t *phContext);
+ur_result_t urContextRetain(ur_context_handle_t hContext);
+ur_result_t urContextRelease(ur_context_handle_t hContext);
+ur_result_t urContextGetInfo(ur_context_handle_t hContext,
+ ur_context_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet);
+ur_result_t urContextGetNativeHandle(ur_context_handle_t hContext,
+ ur_native_handle_t *phNativeContext);
+ur_result_t urContextCreateWithNativeHandle(
+ ur_native_handle_t hNativeContext, ur_adapter_handle_t hAdapter,
+ uint32_t numDevices, const ur_device_handle_t *phDevices,
+ const ur_context_native_properties_t *pProperties,
+ ur_context_handle_t *phContext);
+ur_result_t
+urContextSetExtendedDeleter(ur_context_handle_t hContext,
+ ur_context_extended_deleter_t pfnDeleter,
+ void *pUserData);
+ur_result_t urMemImageCreate(ur_context_handle_t hContext, ur_mem_flags_t flags,
+ const ur_image_format_t *pImageFormat,
+ const ur_image_desc_t *pImageDesc, void *pHost,
+ ur_mem_handle_t *phMem);
+ur_result_t urMemBufferCreate(ur_context_handle_t hContext,
+ ur_mem_flags_t flags, size_t size,
+ const ur_buffer_properties_t *pProperties,
+ ur_mem_handle_t *phBuffer);
+ur_result_t urMemRetain(ur_mem_handle_t hMem);
+ur_result_t urMemRelease(ur_mem_handle_t hMem);
+ur_result_t urMemBufferPartition(ur_mem_handle_t hBuffer, ur_mem_flags_t flags,
+ ur_buffer_create_type_t bufferCreateType,
+ const ur_buffer_region_t *pRegion,
+ ur_mem_handle_t *phMem);
+ur_result_t urMemGetNativeHandle(ur_mem_handle_t hMem,
+ ur_device_handle_t hDevice,
+ ur_native_handle_t *phNativeMem);
+ur_result_t urMemBufferCreateWithNativeHandle(
+ ur_native_handle_t hNativeMem, ur_context_handle_t hContext,
+ const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem);
+ur_result_t urMemImageCreateWithNativeHandle(
+ ur_native_handle_t hNativeMem, ur_context_handle_t hContext,
+ const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
+ const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem);
+ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet);
+ur_result_t urMemImageGetInfo(ur_mem_handle_t hMemory, ur_image_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet);
+ur_result_t urSamplerCreate(ur_context_handle_t hContext,
+ const ur_sampler_desc_t *pDesc,
+ ur_sampler_handle_t *phSampler);
+ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler);
+ur_result_t urSamplerRelease(ur_sampler_handle_t hSampler);
+ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler,
+ ur_sampler_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet);
+ur_result_t urSamplerGetNativeHandle(ur_sampler_handle_t hSampler,
+ ur_native_handle_t *phNativeSampler);
+ur_result_t urSamplerCreateWithNativeHandle(
+ ur_native_handle_t hNativeSampler, ur_context_handle_t hContext,
+ const ur_sampler_native_properties_t *pProperties,
+ ur_sampler_handle_t *phSampler);
+ur_result_t urUSMHostAlloc(ur_context_handle_t hContext,
+ const ur_usm_desc_t *pUSMDesc,
+ ur_usm_pool_handle_t pool, size_t size,
+ void **ppMem);
+ur_result_t urUSMDeviceAlloc(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ const ur_usm_desc_t *pUSMDesc,
+ ur_usm_pool_handle_t pool, size_t size,
+ void **ppMem);
+ur_result_t urUSMSharedAlloc(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ const ur_usm_desc_t *pUSMDesc,
+ ur_usm_pool_handle_t pool, size_t size,
+ void **ppMem);
+ur_result_t urUSMFree(ur_context_handle_t hContext, void *pMem);
+ur_result_t urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem,
+ ur_usm_alloc_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet);
+ur_result_t urUSMPoolCreate(ur_context_handle_t hContext,
+ ur_usm_pool_desc_t *pPoolDesc,
+ ur_usm_pool_handle_t *ppPool);
+ur_result_t urUSMPoolRetain(ur_usm_pool_handle_t pPool);
+ur_result_t urUSMPoolRelease(ur_usm_pool_handle_t pPool);
+ur_result_t urUSMPoolGetInfo(ur_usm_pool_handle_t hPool,
+ ur_usm_pool_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet);
+ur_result_t urVirtualMemGranularityGetInfo(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice,
+ ur_virtual_mem_granularity_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet);
+ur_result_t urVirtualMemReserve(ur_context_handle_t hContext,
+ const void *pStart, size_t size,
+ void **ppStart);
+ur_result_t urVirtualMemFree(ur_context_handle_t hContext, const void *pStart,
+ size_t size);
+ur_result_t urVirtualMemMap(ur_context_handle_t hContext, const void *pStart,
+ size_t size, ur_physical_mem_handle_t hPhysicalMem,
+ size_t offset, ur_virtual_mem_access_flags_t flags);
+ur_result_t urVirtualMemUnmap(ur_context_handle_t hContext, const void *pStart,
+ size_t size);
+ur_result_t urVirtualMemSetAccess(ur_context_handle_t hContext,
+ const void *pStart, size_t size,
+ ur_virtual_mem_access_flags_t flags);
+ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext,
+ const void *pStart, size_t size,
+ ur_virtual_mem_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet);
+ur_result_t urPhysicalMemCreate(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice, size_t size,
+ const ur_physical_mem_properties_t *pProperties,
+ ur_physical_mem_handle_t *phPhysicalMem);
+ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem);
+ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem);
+ur_result_t urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL,
+ size_t length,
+ const ur_program_properties_t *pProperties,
+ ur_program_handle_t *phProgram);
+ur_result_t urProgramCreateWithBinary(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
+ const uint8_t *pBinary, const ur_program_properties_t *pProperties,
+ ur_program_handle_t *phProgram);
+ur_result_t urProgramBuild(ur_context_handle_t hContext,
+ ur_program_handle_t hProgram, const char *pOptions);
+ur_result_t urProgramCompile(ur_context_handle_t hContext,
+ ur_program_handle_t hProgram,
+ const char *pOptions);
+ur_result_t urProgramLink(ur_context_handle_t hContext, uint32_t count,
+ const ur_program_handle_t *phPrograms,
+ const char *pOptions, ur_program_handle_t *phProgram);
+ur_result_t urProgramRetain(ur_program_handle_t hProgram);
+ur_result_t urProgramRelease(ur_program_handle_t hProgram);
+ur_result_t urProgramGetFunctionPointer(ur_device_handle_t hDevice,
+ ur_program_handle_t hProgram,
+ const char *pFunctionName,
+ void **ppFunctionPointer);
+ur_result_t urProgramGetGlobalVariablePointer(
+ ur_device_handle_t hDevice, ur_program_handle_t hProgram,
+ const char *pGlobalVariableName, size_t *pGlobalVariableSizeRet,
+ void **ppGlobalVariablePointerRet);
+ur_result_t urProgramGetInfo(ur_program_handle_t hProgram,
+ ur_program_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet);
+ur_result_t urProgramGetBuildInfo(ur_program_handle_t hProgram,
+ ur_device_handle_t hDevice,
+ ur_program_build_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet);
+ur_result_t urProgramSetSpecializationConstants(
+ ur_program_handle_t hProgram, uint32_t count,
+ const ur_specialization_constant_info_t *pSpecConstants);
+ur_result_t urProgramGetNativeHandle(ur_program_handle_t hProgram,
+ ur_native_handle_t *phNativeProgram);
+ur_result_t urProgramCreateWithNativeHandle(
+ ur_native_handle_t hNativeProgram, ur_context_handle_t hContext,
+ const ur_program_native_properties_t *pProperties,
+ ur_program_handle_t *phProgram);
+ur_result_t urKernelCreate(ur_program_handle_t hProgram,
+ const char *pKernelName,
+ ur_kernel_handle_t *phKernel);
+ur_result_t urKernelSetArgValue(
+ ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize,
+ const ur_kernel_arg_value_properties_t *pProperties, const void *pArgValue);
+ur_result_t
+urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex,
+ size_t argSize,
+ const ur_kernel_arg_local_properties_t *pProperties);
+ur_result_t urKernelGetInfo(ur_kernel_handle_t hKernel,
+ ur_kernel_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet);
+ur_result_t urKernelGetGroupInfo(ur_kernel_handle_t hKernel,
+ ur_device_handle_t hDevice,
+ ur_kernel_group_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet);
+ur_result_t urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel,
+ ur_device_handle_t hDevice,
+ ur_kernel_sub_group_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet);
+ur_result_t urKernelRetain(ur_kernel_handle_t hKernel);
+ur_result_t urKernelRelease(ur_kernel_handle_t hKernel);
+ur_result_t
+urKernelSetArgPointer(ur_kernel_handle_t hKernel, uint32_t argIndex,
+ const ur_kernel_arg_pointer_properties_t *pProperties,
+ const void *pArgValue);
+ur_result_t
+urKernelSetExecInfo(ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName,
+ size_t propSize,
+ const ur_kernel_exec_info_properties_t *pProperties,
+ const void *pPropValue);
+ur_result_t
+urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex,
+ const ur_kernel_arg_sampler_properties_t *pProperties,
+ ur_sampler_handle_t hArgValue);
+ur_result_t
+urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex,
+ const ur_kernel_arg_mem_obj_properties_t *pProperties,
+ ur_mem_handle_t hArgValue);
+ur_result_t urKernelSetSpecializationConstants(
+ ur_kernel_handle_t hKernel, uint32_t count,
+ const ur_specialization_constant_info_t *pSpecConstants);
+ur_result_t urKernelGetNativeHandle(ur_kernel_handle_t hKernel,
+ ur_native_handle_t *phNativeKernel);
+ur_result_t
+urKernelCreateWithNativeHandle(ur_native_handle_t hNativeKernel,
+ ur_context_handle_t hContext,
+ ur_program_handle_t hProgram,
+ const ur_kernel_native_properties_t *pProperties,
+ ur_kernel_handle_t *phKernel);
+ur_result_t urKernelGetSuggestedLocalWorkSize(ur_kernel_handle_t hKernel,
+ ur_queue_handle_t hQueue,
+ uint32_t numWorkDim,
+ const size_t *pGlobalWorkOffset,
+ const size_t *pGlobalWorkSize,
+ size_t *pSuggestedLocalWorkSize);
+ur_result_t urQueueGetInfo(ur_queue_handle_t hQueue, ur_queue_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet);
+ur_result_t urQueueCreate(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ const ur_queue_properties_t *pProperties,
+ ur_queue_handle_t *phQueue);
+ur_result_t urQueueRetain(ur_queue_handle_t hQueue);
+ur_result_t urQueueRelease(ur_queue_handle_t hQueue);
+ur_result_t urQueueGetNativeHandle(ur_queue_handle_t hQueue,
+ ur_queue_native_desc_t *pDesc,
+ ur_native_handle_t *phNativeQueue);
+ur_result_t urQueueCreateWithNativeHandle(
+ ur_native_handle_t hNativeQueue, ur_context_handle_t hContext,
+ ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties,
+ ur_queue_handle_t *phQueue);
+ur_result_t urQueueFinish(ur_queue_handle_t hQueue);
+ur_result_t urQueueFlush(ur_queue_handle_t hQueue);
+ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet);
+ur_result_t urEventGetProfilingInfo(ur_event_handle_t hEvent,
+ ur_profiling_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet);
+ur_result_t urEventWait(uint32_t numEvents,
+ const ur_event_handle_t *phEventWaitList);
+ur_result_t urEventRetain(ur_event_handle_t hEvent);
+ur_result_t urEventRelease(ur_event_handle_t hEvent);
+ur_result_t urEventGetNativeHandle(ur_event_handle_t hEvent,
+ ur_native_handle_t *phNativeEvent);
+ur_result_t
+urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent,
+ ur_context_handle_t hContext,
+ const ur_event_native_properties_t *pProperties,
+ ur_event_handle_t *phEvent);
+ur_result_t urEventSetCallback(ur_event_handle_t hEvent,
+ ur_execution_info_t execStatus,
+ ur_event_callback_t pfnNotify, void *pUserData);
+ur_result_t urEnqueueKernelLaunch(
+ ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
+ const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
+ const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
+ur_result_t urEnqueueEventsWait(ur_queue_handle_t hQueue,
+ uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueEventsWaitWithBarrier(
+ ur_queue_handle_t hQueue, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
+ur_result_t urEnqueueMemBufferRead(ur_queue_handle_t hQueue,
+ ur_mem_handle_t hBuffer, bool blockingRead,
+ size_t offset, size_t size, void *pDst,
+ uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueMemBufferWrite(
+ ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite,
+ size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
+ur_result_t urEnqueueMemBufferReadRect(
+ ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead,
+ ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin,
+ ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
+ size_t hostRowPitch, size_t hostSlicePitch, void *pDst,
+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueMemBufferWriteRect(
+ ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite,
+ ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin,
+ ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
+ size_t hostRowPitch, size_t hostSlicePitch, void *pSrc,
+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueMemBufferCopy(ur_queue_handle_t hQueue,
+ ur_mem_handle_t hBufferSrc,
+ ur_mem_handle_t hBufferDst, size_t srcOffset,
+ size_t dstOffset, size_t size,
+ uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueMemBufferCopyRect(
+ ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc,
+ ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin,
+ ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch,
+ size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueMemBufferFill(ur_queue_handle_t hQueue,
+ ur_mem_handle_t hBuffer,
+ const void *pPattern, size_t patternSize,
+ size_t offset, size_t size,
+ uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueMemImageRead(
+ ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead,
+ ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch,
+ size_t slicePitch, void *pDst, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
+ur_result_t urEnqueueMemImageWrite(
+ ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite,
+ ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch,
+ size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
+ur_result_t
+urEnqueueMemImageCopy(ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc,
+ ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin,
+ ur_rect_offset_t dstOrigin, ur_rect_region_t region,
+ uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueMemBufferMap(ur_queue_handle_t hQueue,
+ ur_mem_handle_t hBuffer, bool blockingMap,
+ ur_map_flags_t mapFlags, size_t offset,
+ size_t size, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent, void **ppRetMap);
+ur_result_t urEnqueueMemUnmap(ur_queue_handle_t hQueue, ur_mem_handle_t hMem,
+ void *pMappedPtr, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueUSMFill(ur_queue_handle_t hQueue, void *pMem,
+ size_t patternSize, const void *pPattern,
+ size_t size, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueUSMMemcpy(ur_queue_handle_t hQueue, bool blocking,
+ void *pDst, const void *pSrc, size_t size,
+ uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueUSMPrefetch(ur_queue_handle_t hQueue, const void *pMem,
+ size_t size, ur_usm_migration_flags_t flags,
+ uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem,
+ size_t size, ur_usm_advice_flags_t advice,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueUSMFill2D(ur_queue_handle_t hQueue, void *pMem,
+ size_t pitch, size_t patternSize,
+ const void *pPattern, size_t width,
+ size_t height, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueUSMMemcpy2D(ur_queue_handle_t hQueue, bool blocking,
+ void *pDst, size_t dstPitch, const void *pSrc,
+ size_t srcPitch, size_t width, size_t height,
+ uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueDeviceGlobalVariableWrite(
+ ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name,
+ bool blockingWrite, size_t count, size_t offset, const void *pSrc,
+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueDeviceGlobalVariableRead(
+ ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name,
+ bool blockingRead, size_t count, size_t offset, void *pDst,
+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue,
+ ur_program_handle_t hProgram,
+ const char *pipe_symbol, bool blocking,
+ void *pDst, size_t size,
+ uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue,
+ ur_program_handle_t hProgram,
+ const char *pipe_symbol, bool blocking,
+ void *pSrc, size_t size,
+ uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ const ur_usm_desc_t *pUSMDesc,
+ ur_usm_pool_handle_t pool, size_t widthInBytes,
+ size_t height, size_t elementSizeBytes,
+ void **ppMem, size_t *pResultPitch);
+ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice,
+ ur_exp_image_native_handle_t hImage);
+ur_result_t urBindlessImagesSampledImageHandleDestroyExp(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice,
+ ur_exp_image_native_handle_t hImage);
+ur_result_t urBindlessImagesImageAllocateExp(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice,
+ const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
+ ur_exp_image_mem_native_handle_t *phImageMem);
+ur_result_t
+urBindlessImagesImageFreeExp(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ ur_exp_image_mem_native_handle_t hImageMem);
+ur_result_t urBindlessImagesUnsampledImageCreateExp(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice,
+ ur_exp_image_mem_native_handle_t hImageMem,
+ const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
+ ur_exp_image_native_handle_t *phImage);
+ur_result_t urBindlessImagesSampledImageCreateExp(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice,
+ ur_exp_image_mem_native_handle_t hImageMem,
+ const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
+ ur_sampler_handle_t hSampler, ur_exp_image_native_handle_t *phImage);
+ur_result_t urBindlessImagesImageCopyExp(
+ ur_queue_handle_t hQueue, const void *pSrc, void *pDst,
+ const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc,
+ const ur_image_format_t *pSrcImageFormat,
+ const ur_image_format_t *pDstImageFormat,
+ ur_exp_image_copy_region_t *pCopyRegion,
+ ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
+ur_result_t urBindlessImagesImageGetInfoExp(
+ ur_context_handle_t hContext, ur_exp_image_mem_native_handle_t hImageMem,
+ ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet);
+ur_result_t urBindlessImagesMipmapGetLevelExp(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice,
+ ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel,
+ ur_exp_image_mem_native_handle_t *phImageMem);
+ur_result_t
+urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ ur_exp_image_mem_native_handle_t hMem);
+ur_result_t urBindlessImagesImportExternalMemoryExp(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
+ ur_exp_external_mem_type_t memHandleType,
+ ur_exp_external_mem_desc_t *pExternalMemDesc,
+ ur_exp_external_mem_handle_t *phExternalMem);
+ur_result_t urBindlessImagesMapExternalArrayExp(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice,
+ const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
+ ur_exp_external_mem_handle_t hExternalMem,
+ ur_exp_image_mem_native_handle_t *phImageMem);
+ur_result_t urBindlessImagesMapExternalLinearMemoryExp(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice, uint64_t offset,
+ uint64_t size, ur_exp_external_mem_handle_t hExternalMem, void **ppRetMem);
+ur_result_t urBindlessImagesReleaseExternalMemoryExp(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice,
+ ur_exp_external_mem_handle_t hExternalMem);
+ur_result_t urBindlessImagesImportExternalSemaphoreExp(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice,
+ ur_exp_external_semaphore_type_t semHandleType,
+ ur_exp_external_semaphore_desc_t *pExternalSemaphoreDesc,
+ ur_exp_external_semaphore_handle_t *phExternalSemaphore);
+ur_result_t urBindlessImagesReleaseExternalSemaphoreExp(
+ ur_context_handle_t hContext, ur_device_handle_t hDevice,
+ ur_exp_external_semaphore_handle_t hExternalSemaphore);
+ur_result_t urBindlessImagesWaitExternalSemaphoreExp(
+ ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore,
+ bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
+ur_result_t urBindlessImagesSignalExternalSemaphoreExp(
+ ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore,
+ bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
+ur_result_t
+urCommandBufferCreateExp(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ const ur_exp_command_buffer_desc_t *pCommandBufferDesc,
+ ur_exp_command_buffer_handle_t *phCommandBuffer);
+ur_result_t
+urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer);
+ur_result_t
+urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer);
+ur_result_t
+urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer);
+ur_result_t urCommandBufferAppendKernelLaunchExp(
+ ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel,
+ uint32_t workDim, const size_t *pGlobalWorkOffset,
+ const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
+ uint32_t numSyncPointsInWaitList,
+ const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
+ ur_exp_command_buffer_sync_point_t *pSyncPoint,
+ ur_exp_command_buffer_command_handle_t *phCommand);
+ur_result_t urCommandBufferAppendUSMMemcpyExp(
+ ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc,
+ size_t size, uint32_t numSyncPointsInWaitList,
+ const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
+ ur_exp_command_buffer_sync_point_t *pSyncPoint);
+ur_result_t urCommandBufferAppendUSMFillExp(
+ ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory,
+ const void *pPattern, size_t patternSize, size_t size,
+ uint32_t numSyncPointsInWaitList,
+ const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
+ ur_exp_command_buffer_sync_point_t *pSyncPoint);
+ur_result_t urCommandBufferAppendMemBufferCopyExp(
+ ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem,
+ ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size,
+ uint32_t numSyncPointsInWaitList,
+ const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
+ ur_exp_command_buffer_sync_point_t *pSyncPoint);
+ur_result_t urCommandBufferAppendMemBufferWriteExp(
+ ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
+ size_t offset, size_t size, const void *pSrc,
+ uint32_t numSyncPointsInWaitList,
+ const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
+ ur_exp_command_buffer_sync_point_t *pSyncPoint);
+ur_result_t urCommandBufferAppendMemBufferReadExp(
+ ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
+ size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList,
+ const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
+ ur_exp_command_buffer_sync_point_t *pSyncPoint);
+ur_result_t urCommandBufferAppendMemBufferCopyRectExp(
+ ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem,
+ ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin,
+ ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch,
+ size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
+ uint32_t numSyncPointsInWaitList,
+ const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
+ ur_exp_command_buffer_sync_point_t *pSyncPoint);
+ur_result_t urCommandBufferAppendMemBufferWriteRectExp(
+ ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
+ ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset,
+ ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
+ size_t hostRowPitch, size_t hostSlicePitch, void *pSrc,
+ uint32_t numSyncPointsInWaitList,
+ const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
+ ur_exp_command_buffer_sync_point_t *pSyncPoint);
+ur_result_t urCommandBufferAppendMemBufferReadRectExp(
+ ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
+ ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset,
+ ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
+ size_t hostRowPitch, size_t hostSlicePitch, void *pDst,
+ uint32_t numSyncPointsInWaitList,
+ const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
+ ur_exp_command_buffer_sync_point_t *pSyncPoint);
+ur_result_t urCommandBufferAppendMemBufferFillExp(
+ ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
+ const void *pPattern, size_t patternSize, size_t offset, size_t size,
+ uint32_t numSyncPointsInWaitList,
+ const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
+ ur_exp_command_buffer_sync_point_t *pSyncPoint);
+ur_result_t urCommandBufferAppendUSMPrefetchExp(
+ ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
+ size_t size, ur_usm_migration_flags_t flags,
+ uint32_t numSyncPointsInWaitList,
+ const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
+ ur_exp_command_buffer_sync_point_t *pSyncPoint);
+ur_result_t urCommandBufferAppendUSMAdviseExp(
+ ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
+ size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList,
+ const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
+ ur_exp_command_buffer_sync_point_t *pSyncPoint);
+ur_result_t urCommandBufferEnqueueExp(
+ ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue,
+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urCommandBufferRetainCommandExp(
+ ur_exp_command_buffer_command_handle_t hCommand);
+ur_result_t urCommandBufferReleaseCommandExp(
+ ur_exp_command_buffer_command_handle_t hCommand);
+ur_result_t urCommandBufferUpdateKernelLaunchExp(
+ ur_exp_command_buffer_command_handle_t hCommand,
+ const ur_exp_command_buffer_update_kernel_launch_desc_t
+ *pUpdateKernelLaunch);
+ur_result_t
+urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer,
+ ur_exp_command_buffer_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet);
+ur_result_t urCommandBufferCommandGetInfoExp(
+ ur_exp_command_buffer_command_handle_t hCommand,
+ ur_exp_command_buffer_command_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet);
+ur_result_t urEnqueueCooperativeKernelLaunchExp(
+ ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
+ const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
+ const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
+ur_result_t urKernelSuggestMaxCooperativeGroupCountExp(
+ ur_kernel_handle_t hKernel, size_t localWorkSize,
+ size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet);
+ur_result_t urEnqueueTimestampRecordingExp(
+ ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList,
+ const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent);
+ur_result_t urEnqueueKernelLaunchCustomExp(
+ ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
+ const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
+ uint32_t numPropsInLaunchPropList,
+ const ur_exp_launch_property_t *launchPropList,
+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+ur_result_t urProgramBuildExp(ur_program_handle_t hProgram, uint32_t numDevices,
+ ur_device_handle_t *phDevices,
+ const char *pOptions);
+ur_result_t urProgramCompileExp(ur_program_handle_t hProgram,
+ uint32_t numDevices,
+ ur_device_handle_t *phDevices,
+ const char *pOptions);
+ur_result_t urProgramLinkExp(ur_context_handle_t hContext, uint32_t numDevices,
+ ur_device_handle_t *phDevices, uint32_t count,
+ const ur_program_handle_t *phPrograms,
+ const char *pOptions,
+ ur_program_handle_t *phProgram);
+ur_result_t urUSMImportExp(ur_context_handle_t hContext, void *pMem,
+ size_t size);
+ur_result_t urUSMReleaseExp(ur_context_handle_t hContext, void *pMem);
+ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice,
+ ur_device_handle_t peerDevice);
+ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice,
+ ur_device_handle_t peerDevice);
+ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice,
+ ur_device_handle_t peerDevice,
+ ur_exp_peer_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet);
+ur_result_t urEnqueueNativeCommandExp(
+ ur_queue_handle_t hQueue,
+ ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data,
+ uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList,
+ const ur_exp_enqueue_native_command_properties_t *pProperties,
+ uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+ ur_event_handle_t *phEvent);
+#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO
+ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi);
+#endif
+} // namespace ur::level_zero
diff --git a/source/adapters/level_zero/ur_level_zero.hpp b/source/adapters/level_zero/ur_level_zero.hpp
index 096ae076f9..36965c5d58 100644
--- a/source/adapters/level_zero/ur_level_zero.hpp
+++ b/source/adapters/level_zero/ur_level_zero.hpp
@@ -20,7 +20,7 @@
#include
#include
-#include
+#include
#include
#include
diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp
index 1069ec78da..5296391794 100644
--- a/source/adapters/level_zero/usm.cpp
+++ b/source/adapters/level_zero/usm.cpp
@@ -17,6 +17,7 @@
#include "usm.hpp"
#include "logger/ur_logger.hpp"
+#include "ur_interface_loader.hpp"
#include "ur_level_zero.hpp"
#include "ur_util.hpp"
@@ -296,7 +297,9 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr,
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc(
+namespace ur::level_zero {
+
+ur_result_t urUSMHostAlloc(
ur_context_handle_t Context, ///< [in] handle of the context object
const ur_usm_desc_t
*USMDesc, ///< [in][optional] USM memory allocation descriptor
@@ -335,7 +338,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc(
// We are going to defer memory release if there are kernels with indirect
// access, that is why explicitly retain context to be sure that it is
// released after all memory allocations in this context are released.
- UR_CALL(urContextRetain(Context));
+ UR_CALL(ur::level_zero::urContextRetain(Context));
} else {
ContextLock.lock();
}
@@ -368,7 +371,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc(
+ur_result_t urUSMDeviceAlloc(
ur_context_handle_t Context, ///< [in] handle of the context object
ur_device_handle_t Device, ///< [in] handle of the device object
const ur_usm_desc_t
@@ -410,7 +413,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc(
// We are going to defer memory release if there are kernels with indirect
// access, that is why explicitly retain context to be sure that it is
// released after all memory allocations in this context are released.
- UR_CALL(urContextRetain(Context));
+ UR_CALL(ur::level_zero::urContextRetain(Context));
} else {
ContextLock.lock();
}
@@ -448,7 +451,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc(
+ur_result_t urUSMSharedAlloc(
ur_context_handle_t Context, ///< [in] handle of the context object
ur_device_handle_t Device, ///< [in] handle of the device object
const ur_usm_desc_t
@@ -513,7 +516,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc(
// We are going to defer memory release if there are kernels with indirect
// access, that is why explicitly retain context to be sure that it is
// released after all memory allocations in this context are released.
- UR_CALL(urContextRetain(Context));
+ UR_CALL(ur::level_zero::urContextRetain(Context));
}
umf_memory_pool_handle_t hPoolInternal = nullptr;
@@ -555,9 +558,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(
- ur_context_handle_t Context, ///< [in] handle of the context object
- void *Mem ///< [in] pointer to USM memory object
+ur_result_t
+urUSMFree(ur_context_handle_t Context, ///< [in] handle of the context object
+ void *Mem ///< [in] pointer to USM memory object
) {
ur_platform_handle_t Plt = Context->getPlatform();
@@ -567,7 +570,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(
return USMFreeHelper(Context, Mem);
}
-UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo(
+ur_result_t urUSMGetMemAllocInfo(
ur_context_handle_t Context, ///< [in] handle of the context object
const void *Ptr, ///< [in] pointer to USM memory object
ur_usm_alloc_info_t
@@ -667,6 +670,103 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo(
return UR_RESULT_SUCCESS;
}
+ur_result_t urUSMPoolCreate(
+ ur_context_handle_t Context, ///< [in] handle of the context object
+ ur_usm_pool_desc_t
+ *PoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with
+ ///< ::ur_usm_pool_limits_desc_t
+ ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool
+) {
+
+ try {
+ *Pool = reinterpret_cast(
+ new ur_usm_pool_handle_t_(Context, PoolDesc));
+
+ std::shared_lock ContextLock(Context->Mutex);
+ Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool);
+
+ } catch (const UsmAllocationException &Ex) {
+ return Ex.getError();
+ }
+ return UR_RESULT_SUCCESS;
+}
+
+ur_result_t
+urUSMPoolRetain(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool
+) {
+ Pool->RefCount.increment();
+ return UR_RESULT_SUCCESS;
+}
+
+ur_result_t
+urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool
+) {
+ if (Pool->RefCount.decrementAndTest()) {
+ std::shared_lock ContextLock(Pool->Context->Mutex);
+ Pool->Context->UsmPoolHandles.remove(Pool);
+ delete Pool;
+ }
+ return UR_RESULT_SUCCESS;
+}
+
+ur_result_t urUSMPoolGetInfo(
+ ur_usm_pool_handle_t Pool, ///< [in] handle of the USM memory pool
+ ur_usm_pool_info_t PropName, ///< [in] name of the pool property to query
+ size_t PropSize, ///< [in] size in bytes of the pool property value provided
+ void *PropValue, ///< [out][typename(propName, propSize)] value of the pool
+ ///< property
+ size_t *PropSizeRet ///< [out] size in bytes returned in pool property value
+) {
+ UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet);
+
+ switch (PropName) {
+ case UR_USM_POOL_INFO_REFERENCE_COUNT: {
+ return ReturnValue(Pool->RefCount.load());
+ }
+ case UR_USM_POOL_INFO_CONTEXT: {
+ return ReturnValue(Pool->Context);
+ }
+ default: {
+ return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
+ }
+ }
+}
+
+ur_result_t urUSMImportExp(ur_context_handle_t Context, void *HostPtr,
+ size_t Size) {
+ UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT);
+
+ // Promote the host ptr to USM host memory.
+ if (ZeUSMImport.Supported && HostPtr != nullptr) {
+ // Query memory type of the host pointer
+ ze_device_handle_t ZeDeviceHandle;
+ ZeStruct ZeMemoryAllocationProperties;
+ ZE2UR_CALL(zeMemGetAllocProperties,
+ (Context->ZeContext, HostPtr, &ZeMemoryAllocationProperties,
+ &ZeDeviceHandle));
+
+ // If not shared of any type, we can import the ptr
+ if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) {
+ // Promote the host ptr to USM host memory
+ ze_driver_handle_t driverHandle =
+ Context->getPlatform()->ZeDriverHandleExpTranslated;
+ ZeUSMImport.doZeUSMImport(driverHandle, HostPtr, Size);
+ }
+ }
+ return UR_RESULT_SUCCESS;
+}
+
+ur_result_t urUSMReleaseExp(ur_context_handle_t Context, void *HostPtr) {
+ UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT);
+
+ // Release the imported memory.
+ if (ZeUSMImport.Supported && HostPtr != nullptr)
+ ZeUSMImport.doZeUSMRelease(
+ Context->getPlatform()->ZeDriverHandleExpTranslated, HostPtr);
+ return UR_RESULT_SUCCESS;
+}
+} // namespace ur::level_zero
+
static ur_result_t USMFreeImpl(ur_context_handle_t Context, void *Ptr) {
auto ZeResult = ZE_CALL_NOCHECK(zeMemFree, (Context->ZeContext, Ptr));
// Handle When the driver is already released
@@ -972,68 +1072,6 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context,
}
}
-UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate(
- ur_context_handle_t Context, ///< [in] handle of the context object
- ur_usm_pool_desc_t
- *PoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with
- ///< ::ur_usm_pool_limits_desc_t
- ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool
-) {
-
- try {
- *Pool = reinterpret_cast(
- new ur_usm_pool_handle_t_(Context, PoolDesc));
-
- std::shared_lock ContextLock(Context->Mutex);
- Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool);
-
- } catch (const UsmAllocationException &Ex) {
- return Ex.getError();
- }
- return UR_RESULT_SUCCESS;
-}
-
-ur_result_t
-urUSMPoolRetain(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool
-) {
- Pool->RefCount.increment();
- return UR_RESULT_SUCCESS;
-}
-
-ur_result_t
-urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool
-) {
- if (Pool->RefCount.decrementAndTest()) {
- std::shared_lock ContextLock(Pool->Context->Mutex);
- Pool->Context->UsmPoolHandles.remove(Pool);
- delete Pool;
- }
- return UR_RESULT_SUCCESS;
-}
-
-ur_result_t urUSMPoolGetInfo(
- ur_usm_pool_handle_t Pool, ///< [in] handle of the USM memory pool
- ur_usm_pool_info_t PropName, ///< [in] name of the pool property to query
- size_t PropSize, ///< [in] size in bytes of the pool property value provided
- void *PropValue, ///< [out][typename(propName, propSize)] value of the pool
- ///< property
- size_t *PropSizeRet ///< [out] size in bytes returned in pool property value
-) {
- UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet);
-
- switch (PropName) {
- case UR_USM_POOL_INFO_REFERENCE_COUNT: {
- return ReturnValue(Pool->RefCount.load());
- }
- case UR_USM_POOL_INFO_CONTEXT: {
- return ReturnValue(Pool->Context);
- }
- default: {
- return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
- }
- }
-}
-
// If indirect access tracking is not enabled then this functions just performs
// zeMemFree. If indirect access tracking is enabled then reference counting is
// performed.
@@ -1116,38 +1154,3 @@ ur_result_t USMFreeHelper(ur_context_handle_t Context, void *Ptr,
UR_CALL(ContextReleaseHelper(Context));
return umf2urResult(umfRet);
}
-
-UR_APIEXPORT ur_result_t UR_APICALL urUSMImportExp(ur_context_handle_t Context,
- void *HostPtr, size_t Size) {
- UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT);
-
- // Promote the host ptr to USM host memory.
- if (ZeUSMImport.Supported && HostPtr != nullptr) {
- // Query memory type of the host pointer
- ze_device_handle_t ZeDeviceHandle;
- ZeStruct ZeMemoryAllocationProperties;
- ZE2UR_CALL(zeMemGetAllocProperties,
- (Context->ZeContext, HostPtr, &ZeMemoryAllocationProperties,
- &ZeDeviceHandle));
-
- // If not shared of any type, we can import the ptr
- if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) {
- // Promote the host ptr to USM host memory
- ze_driver_handle_t driverHandle =
- Context->getPlatform()->ZeDriverHandleExpTranslated;
- ZeUSMImport.doZeUSMImport(driverHandle, HostPtr, Size);
- }
- }
- return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t Context,
- void *HostPtr) {
- UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT);
-
- // Release the imported memory.
- if (ZeUSMImport.Supported && HostPtr != nullptr)
- ZeUSMImport.doZeUSMRelease(
- Context->getPlatform()->ZeDriverHandleExpTranslated, HostPtr);
- return UR_RESULT_SUCCESS;
-}
diff --git a/source/adapters/level_zero/usm_p2p.cpp b/source/adapters/level_zero/usm_p2p.cpp
index 2b81828423..6e701aa803 100644
--- a/source/adapters/level_zero/usm_p2p.cpp
+++ b/source/adapters/level_zero/usm_p2p.cpp
@@ -11,8 +11,10 @@
#include "logger/ur_logger.hpp"
#include "ur_level_zero.hpp"
-UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp(
- ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) {
+namespace ur::level_zero {
+
+ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice,
+ ur_device_handle_t peerDevice) {
std::ignore = commandDevice;
std::ignore = peerDevice;
@@ -21,8 +23,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp(
- ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) {
+ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice,
+ ur_device_handle_t peerDevice) {
std::ignore = commandDevice;
std::ignore = peerDevice;
@@ -31,10 +33,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp(
- ur_device_handle_t commandDevice, ur_device_handle_t peerDevice,
- ur_exp_peer_info_t propName, size_t propSize, void *pPropValue,
- size_t *pPropSizeRet) {
+ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice,
+ ur_device_handle_t peerDevice,
+ ur_exp_peer_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet) {
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
@@ -69,3 +72,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp(
return ReturnValue(propertyValue);
}
+} // namespace ur::level_zero
diff --git a/source/adapters/level_zero/v2/api.cpp b/source/adapters/level_zero/v2/api.cpp
index dc52874364..cd25f838fe 100644
--- a/source/adapters/level_zero/v2/api.cpp
+++ b/source/adapters/level_zero/v2/api.cpp
@@ -17,13 +17,14 @@
std::mutex ZeCall::GlobalLock;
-ur_result_t UR_APICALL urContextGetNativeHandle(
- ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) {
+namespace ur::level_zero {
+ur_result_t urContextGetNativeHandle(ur_context_handle_t hContext,
+ ur_native_handle_t *phNativeContext) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urContextCreateWithNativeHandle(
+ur_result_t urContextCreateWithNativeHandle(
ur_native_handle_t hNativeContext, ur_adapter_handle_t hAdapter,
uint32_t numDevices, const ur_device_handle_t *phDevices,
const ur_context_native_properties_t *pProperties,
@@ -32,62 +33,30 @@ ur_result_t UR_APICALL urContextCreateWithNativeHandle(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urContextSetExtendedDeleter(
- ur_context_handle_t hContext, ur_context_extended_deleter_t pfnDeleter,
- void *pUserData) {
+ur_result_t
+urContextSetExtendedDeleter(ur_context_handle_t hContext,
+ ur_context_extended_deleter_t pfnDeleter,
+ void *pUserData) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urMemImageCreate(ur_context_handle_t hContext,
- ur_mem_flags_t flags,
- const ur_image_format_t *pImageFormat,
- const ur_image_desc_t *pImageDesc,
- void *pHost, ur_mem_handle_t *phMem) {
+ur_result_t urMemImageCreate(ur_context_handle_t hContext, ur_mem_flags_t flags,
+ const ur_image_format_t *pImageFormat,
+ const ur_image_desc_t *pImageDesc, void *pHost,
+ ur_mem_handle_t *phMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urMemBufferCreate(
- ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size,
- const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) {
+ur_result_t urMemGetNativeHandle(ur_mem_handle_t hMem,
+ ur_device_handle_t hDevice,
+ ur_native_handle_t *phNativeMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) {
- logger::error("{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) {
- logger::error("{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL urMemBufferPartition(
- ur_mem_handle_t hBuffer, ur_mem_flags_t flags,
- ur_buffer_create_type_t bufferCreateType, const ur_buffer_region_t *pRegion,
- ur_mem_handle_t *phMem) {
- logger::error("{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL urMemGetNativeHandle(ur_mem_handle_t hMem,
- ur_device_handle_t hDevice,
- ur_native_handle_t *phNativeMem) {
- logger::error("{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle(
- ur_native_handle_t hNativeMem, ur_context_handle_t hContext,
- const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) {
- logger::error("{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL urMemImageCreateWithNativeHandle(
+ur_result_t urMemImageCreateWithNativeHandle(
ur_native_handle_t hNativeMem, ur_context_handle_t hContext,
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) {
@@ -95,53 +64,51 @@ ur_result_t UR_APICALL urMemImageCreateWithNativeHandle(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory,
- ur_mem_info_t propName, size_t propSize,
- void *pPropValue, size_t *pPropSizeRet) {
+ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory,
- ur_image_info_t propName,
- size_t propSize, void *pPropValue,
- size_t *pPropSizeRet) {
+ur_result_t urMemImageGetInfo(ur_mem_handle_t hMemory, ur_image_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urSamplerCreate(ur_context_handle_t hContext,
- const ur_sampler_desc_t *pDesc,
- ur_sampler_handle_t *phSampler) {
+ur_result_t urSamplerCreate(ur_context_handle_t hContext,
+ const ur_sampler_desc_t *pDesc,
+ ur_sampler_handle_t *phSampler) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urSamplerRetain(ur_sampler_handle_t hSampler) {
+ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urSamplerRelease(ur_sampler_handle_t hSampler) {
+ur_result_t urSamplerRelease(ur_sampler_handle_t hSampler) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urSamplerGetInfo(ur_sampler_handle_t hSampler,
- ur_sampler_info_t propName,
- size_t propSize, void *pPropValue,
- size_t *pPropSizeRet) {
+ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler,
+ ur_sampler_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urSamplerGetNativeHandle(
- ur_sampler_handle_t hSampler, ur_native_handle_t *phNativeSampler) {
+ur_result_t urSamplerGetNativeHandle(ur_sampler_handle_t hSampler,
+ ur_native_handle_t *phNativeSampler) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urSamplerCreateWithNativeHandle(
+ur_result_t urSamplerCreateWithNativeHandle(
ur_native_handle_t hNativeSampler, ur_context_handle_t hContext,
const ur_sampler_native_properties_t *pProperties,
ur_sampler_handle_t *phSampler) {
@@ -149,7 +116,7 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
+ur_result_t urVirtualMemGranularityGetInfo(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_virtual_mem_granularity_info_t propName, size_t propSize,
void *pPropValue, size_t *pPropSizeRet) {
@@ -157,111 +124,74 @@ ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urVirtualMemReserve(ur_context_handle_t hContext,
- const void *pStart, size_t size,
- void **ppStart) {
- logger::error("{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL urVirtualMemFree(ur_context_handle_t hContext,
- const void *pStart, size_t size) {
- logger::error("{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL urVirtualMemMap(ur_context_handle_t hContext,
- const void *pStart, size_t size,
- ur_physical_mem_handle_t hPhysicalMem,
- size_t offset,
- ur_virtual_mem_access_flags_t flags) {
- logger::error("{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL urVirtualMemUnmap(ur_context_handle_t hContext,
- const void *pStart, size_t size) {
+ur_result_t urVirtualMemReserve(ur_context_handle_t hContext,
+ const void *pStart, size_t size,
+ void **ppStart) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL
-urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart,
- size_t size, ur_virtual_mem_access_flags_t flags) {
+ur_result_t urVirtualMemFree(ur_context_handle_t hContext, const void *pStart,
+ size_t size) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urVirtualMemGetInfo(ur_context_handle_t hContext,
- const void *pStart, size_t size,
- ur_virtual_mem_info_t propName,
- size_t propSize, void *pPropValue,
- size_t *pPropSizeRet) {
+ur_result_t urVirtualMemMap(ur_context_handle_t hContext, const void *pStart,
+ size_t size, ur_physical_mem_handle_t hPhysicalMem,
+ size_t offset,
+ ur_virtual_mem_access_flags_t flags) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urPhysicalMemCreate(
- ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
- const ur_physical_mem_properties_t *pProperties,
- ur_physical_mem_handle_t *phPhysicalMem) {
+ur_result_t urVirtualMemUnmap(ur_context_handle_t hContext, const void *pStart,
+ size_t size) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL
-urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) {
+ur_result_t urVirtualMemSetAccess(ur_context_handle_t hContext,
+ const void *pStart, size_t size,
+ ur_virtual_mem_access_flags_t flags) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL
-urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) {
+ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext,
+ const void *pStart, size_t size,
+ ur_virtual_mem_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urKernelSetArgLocal(
- ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize,
- const ur_kernel_arg_local_properties_t *pProperties) {
+ur_result_t urPhysicalMemCreate(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice, size_t size,
+ const ur_physical_mem_properties_t *pProperties,
+ ur_physical_mem_handle_t *phPhysicalMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel,
- ur_kernel_info_t propName,
- size_t propSize, void *pPropValue,
- size_t *pPropSizeRet) {
+ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urKernelGetGroupInfo(ur_kernel_handle_t hKernel,
- ur_device_handle_t hDevice,
- ur_kernel_group_info_t propName,
- size_t propSize, void *pPropValue,
- size_t *pPropSizeRet) {
+ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL
-urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
- ur_kernel_sub_group_info_t propName, size_t propSize,
- void *pPropValue, size_t *pPropSizeRet) {
+ur_result_t urKernelGetInfo(ur_kernel_handle_t hKernel,
+ ur_kernel_info_t propName, size_t propSize,
+ void *pPropValue, size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urKernelSetExecInfo(
- ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, size_t propSize,
- const ur_kernel_exec_info_properties_t *pProperties,
- const void *pPropValue) {
- logger::error("{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL
+ur_result_t
urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex,
const ur_kernel_arg_sampler_properties_t *pProperties,
ur_sampler_handle_t hArgValue) {
@@ -269,112 +199,94 @@ urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex,
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL
-urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex,
- const ur_kernel_arg_mem_obj_properties_t *pProperties,
- ur_mem_handle_t hArgValue) {
- logger::error("{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL urKernelSetSpecializationConstants(
+ur_result_t urKernelSetSpecializationConstants(
ur_kernel_handle_t hKernel, uint32_t count,
const ur_specialization_constant_info_t *pSpecConstants) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urKernelGetNativeHandle(
- ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) {
+ur_result_t urKernelGetNativeHandle(ur_kernel_handle_t hKernel,
+ ur_native_handle_t *phNativeKernel) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urKernelCreateWithNativeHandle(
- ur_native_handle_t hNativeKernel, ur_context_handle_t hContext,
- ur_program_handle_t hProgram,
- const ur_kernel_native_properties_t *pProperties,
- ur_kernel_handle_t *phKernel) {
+ur_result_t
+urKernelCreateWithNativeHandle(ur_native_handle_t hNativeKernel,
+ ur_context_handle_t hContext,
+ ur_program_handle_t hProgram,
+ const ur_kernel_native_properties_t *pProperties,
+ ur_kernel_handle_t *phKernel) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
- ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t numWorkDim,
- const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
- size_t *pSuggestedLocalWorkSize) {
+ur_result_t urKernelGetSuggestedLocalWorkSize(ur_kernel_handle_t hKernel,
+ ur_queue_handle_t hQueue,
+ uint32_t numWorkDim,
+ const size_t *pGlobalWorkOffset,
+ const size_t *pGlobalWorkSize,
+ size_t *pSuggestedLocalWorkSize) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent,
- ur_event_info_t propName, size_t propSize,
- void *pPropValue, size_t *pPropSizeRet) {
+ur_result_t urEventGetProfilingInfo(ur_event_handle_t hEvent,
+ ur_profiling_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urEventGetProfilingInfo(ur_event_handle_t hEvent,
- ur_profiling_info_t propName,
- size_t propSize,
- void *pPropValue,
- size_t *pPropSizeRet) {
+ur_result_t urEventGetNativeHandle(ur_event_handle_t hEvent,
+ ur_native_handle_t *phNativeEvent) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urEventWait(uint32_t numEvents,
- const ur_event_handle_t *phEventWaitList) {
+ur_result_t
+urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent,
+ ur_context_handle_t hContext,
+ const ur_event_native_properties_t *pProperties,
+ ur_event_handle_t *phEvent) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urEventGetNativeHandle(
- ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) {
+ur_result_t urEventSetCallback(ur_event_handle_t hEvent,
+ ur_execution_info_t execStatus,
+ ur_event_callback_t pfnNotify, void *pUserData) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urEventCreateWithNativeHandle(
- ur_native_handle_t hNativeEvent, ur_context_handle_t hContext,
- const ur_event_native_properties_t *pProperties,
- ur_event_handle_t *phEvent) {
- logger::error("{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL urEventSetCallback(ur_event_handle_t hEvent,
- ur_execution_info_t execStatus,
- ur_event_callback_t pfnNotify,
- void *pUserData) {
+ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ const ur_usm_desc_t *pUSMDesc,
+ ur_usm_pool_handle_t pool, size_t widthInBytes,
+ size_t height, size_t elementSizeBytes,
+ void **ppMem, size_t *pResultPitch) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urUSMPitchedAllocExp(
- ur_context_handle_t hContext, ur_device_handle_t hDevice,
- const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool,
- size_t widthInBytes, size_t height, size_t elementSizeBytes, void **ppMem,
- size_t *pResultPitch) {
- logger::error("{} function not implemented!", __FUNCTION__);
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-ur_result_t UR_APICALL urBindlessImagesUnsampledImageHandleDestroyExp(
+ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_image_native_handle_t hImage) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesSampledImageHandleDestroyExp(
+ur_result_t urBindlessImagesSampledImageHandleDestroyExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_image_native_handle_t hImage) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesImageAllocateExp(
+ur_result_t urBindlessImagesImageAllocateExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
ur_exp_image_mem_native_handle_t *phImageMem) {
@@ -382,14 +294,15 @@ ur_result_t UR_APICALL urBindlessImagesImageAllocateExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesImageFreeExp(
- ur_context_handle_t hContext, ur_device_handle_t hDevice,
- ur_exp_image_mem_native_handle_t hImageMem) {
+ur_result_t
+urBindlessImagesImageFreeExp(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ ur_exp_image_mem_native_handle_t hImageMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp(
+ur_result_t urBindlessImagesUnsampledImageCreateExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_image_mem_native_handle_t hImageMem,
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
@@ -398,7 +311,7 @@ ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
+ur_result_t urBindlessImagesSampledImageCreateExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_image_mem_native_handle_t hImageMem,
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
@@ -407,14 +320,14 @@ ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp(
+ur_result_t urBindlessImagesImageGetInfoExp(
ur_context_handle_t hContext, ur_exp_image_mem_native_handle_t hImageMem,
ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp(
+ur_result_t urBindlessImagesMipmapGetLevelExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel,
ur_exp_image_mem_native_handle_t *phImageMem) {
@@ -422,14 +335,15 @@ ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp(
- ur_context_handle_t hContext, ur_device_handle_t hDevice,
- ur_exp_image_mem_native_handle_t hMem) {
+ur_result_t
+urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ ur_exp_image_mem_native_handle_t hMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp(
+ur_result_t urBindlessImagesImportExternalMemoryExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
ur_exp_external_mem_type_t memHandleType,
ur_exp_external_mem_desc_t *pExternalMemDesc,
@@ -438,7 +352,7 @@ ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp(
+ur_result_t urBindlessImagesMapExternalArrayExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
ur_exp_external_mem_handle_t hExternalMem,
@@ -447,21 +361,21 @@ ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp(
+ur_result_t urBindlessImagesMapExternalLinearMemoryExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice, uint64_t offset,
uint64_t size, ur_exp_external_mem_handle_t hExternalMem, void **ppRetMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp(
+ur_result_t urBindlessImagesReleaseExternalMemoryExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_external_mem_handle_t hExternalMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp(
+ur_result_t urBindlessImagesImportExternalSemaphoreExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_external_semaphore_type_t semHandleType,
ur_exp_external_semaphore_desc_t *pExternalSemaphoreDesc,
@@ -470,40 +384,41 @@ ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp(
+ur_result_t urBindlessImagesReleaseExternalSemaphoreExp(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_exp_external_semaphore_handle_t hExternalSemaphore) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferCreateExp(
- ur_context_handle_t hContext, ur_device_handle_t hDevice,
- const ur_exp_command_buffer_desc_t *pCommandBufferDesc,
- ur_exp_command_buffer_handle_t *phCommandBuffer) {
+ur_result_t
+urCommandBufferCreateExp(ur_context_handle_t hContext,
+ ur_device_handle_t hDevice,
+ const ur_exp_command_buffer_desc_t *pCommandBufferDesc,
+ ur_exp_command_buffer_handle_t *phCommandBuffer) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL
+ur_result_t
urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL
+ur_result_t
urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL
+ur_result_t
urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
+ur_result_t urCommandBufferAppendKernelLaunchExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel,
uint32_t workDim, const size_t *pGlobalWorkOffset,
const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
@@ -515,7 +430,7 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp(
+ur_result_t urCommandBufferAppendUSMMemcpyExp(
ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc,
size_t size, uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
@@ -524,7 +439,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp(
+ur_result_t urCommandBufferAppendUSMFillExp(
ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory,
const void *pPattern, size_t patternSize, size_t size,
uint32_t numSyncPointsInWaitList,
@@ -534,7 +449,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
+ur_result_t urCommandBufferAppendMemBufferCopyExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem,
ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size,
uint32_t numSyncPointsInWaitList,
@@ -544,7 +459,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp(
+ur_result_t urCommandBufferAppendMemBufferWriteExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
size_t offset, size_t size, const void *pSrc,
uint32_t numSyncPointsInWaitList,
@@ -554,7 +469,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp(
+ur_result_t urCommandBufferAppendMemBufferReadExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
@@ -563,7 +478,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
+ur_result_t urCommandBufferAppendMemBufferCopyRectExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem,
ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin,
ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch,
@@ -575,7 +490,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp(
+ur_result_t urCommandBufferAppendMemBufferWriteRectExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset,
ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
@@ -587,7 +502,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp(
+ur_result_t urCommandBufferAppendMemBufferReadRectExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset,
ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
@@ -599,7 +514,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
+ur_result_t urCommandBufferAppendMemBufferFillExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
const void *pPattern, size_t patternSize, size_t offset, size_t size,
uint32_t numSyncPointsInWaitList,
@@ -609,7 +524,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp(
+ur_result_t urCommandBufferAppendUSMPrefetchExp(
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
size_t size, ur_usm_migration_flags_t flags,
uint32_t numSyncPointsInWaitList,
@@ -619,7 +534,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp(
+ur_result_t urCommandBufferAppendUSMAdviseExp(
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
@@ -628,7 +543,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferEnqueueExp(
+ur_result_t urCommandBufferEnqueueExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {
@@ -636,19 +551,19 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferRetainCommandExp(
+ur_result_t urCommandBufferRetainCommandExp(
ur_exp_command_buffer_command_handle_t hCommand) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferReleaseCommandExp(
+ur_result_t urCommandBufferReleaseCommandExp(
ur_exp_command_buffer_command_handle_t hCommand) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
+ur_result_t urCommandBufferUpdateKernelLaunchExp(
ur_exp_command_buffer_command_handle_t hCommand,
const ur_exp_command_buffer_update_kernel_launch_desc_t
*pUpdateKernelLaunch) {
@@ -656,15 +571,16 @@ ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferGetInfoExp(
- ur_exp_command_buffer_handle_t hCommandBuffer,
- ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue,
- size_t *pPropSizeRet) {
+ur_result_t
+urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer,
+ ur_exp_command_buffer_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp(
+ur_result_t urCommandBufferCommandGetInfoExp(
ur_exp_command_buffer_command_handle_t hCommand,
ur_exp_command_buffer_command_info_t propName, size_t propSize,
void *pPropValue, size_t *pPropSizeRet) {
@@ -672,41 +588,42 @@ ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(
+ur_result_t urKernelSuggestMaxCooperativeGroupCountExp(
ur_kernel_handle_t hKernel, size_t localWorkSize,
size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urUSMImportExp(ur_context_handle_t hContext, void *pMem,
- size_t size) {
+ur_result_t urUSMImportExp(ur_context_handle_t hContext, void *pMem,
+ size_t size) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t hContext,
- void *pMem) {
+ur_result_t urUSMReleaseExp(ur_context_handle_t hContext, void *pMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp(
- ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) {
+ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice,
+ ur_device_handle_t peerDevice) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp(
- ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) {
+ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice,
+ ur_device_handle_t peerDevice) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
-ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp(
- ur_device_handle_t commandDevice, ur_device_handle_t peerDevice,
- ur_exp_peer_info_t propName, size_t propSize, void *pPropValue,
- size_t *pPropSizeRet) {
+ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice,
+ ur_device_handle_t peerDevice,
+ ur_exp_peer_info_t propName,
+ size_t propSize, void *pPropValue,
+ size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
+} // namespace ur::level_zero
diff --git a/source/adapters/level_zero/v2/command_list_cache.cpp b/source/adapters/level_zero/v2/command_list_cache.cpp
index eee6555f87..651cb5944a 100644
--- a/source/adapters/level_zero/v2/command_list_cache.cpp
+++ b/source/adapters/level_zero/v2/command_list_cache.cpp
@@ -43,7 +43,7 @@ inline size_t command_list_descriptor_hash_t::operator()(
command_list_cache_t::command_list_cache_t(ze_context_handle_t ZeContext)
: ZeContext{ZeContext} {}
-raii::ze_command_list_t
+raii::ze_command_list_handle_t
command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) {
if (auto ImmCmdDesc =
std::get_if(&desc)) {
@@ -61,7 +61,7 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) {
ZE2UR_CALL_THROWS(
zeCommandListCreateImmediate,
(ZeContext, ImmCmdDesc->ZeDevice, &QueueDesc, &ZeCommandList));
- return raii::ze_command_list_t(ZeCommandList, &zeCommandListDestroy);
+ return raii::ze_command_list_handle_t(ZeCommandList);
} else {
auto RegCmdDesc = std::get(desc);
ZeStruct CmdListDesc;
@@ -72,7 +72,7 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) {
ze_command_list_handle_t ZeCommandList;
ZE2UR_CALL_THROWS(zeCommandListCreate, (ZeContext, RegCmdDesc.ZeDevice,
&CmdListDesc, &ZeCommandList));
- return raii::ze_command_list_t(ZeCommandList, &zeCommandListDestroy);
+ return raii::ze_command_list_handle_t(ZeCommandList);
}
}
@@ -94,8 +94,7 @@ command_list_cache_t::getImmediateCommandList(
auto CommandList = getCommandList(Desc).release();
return raii::cache_borrowed_command_list_t(
CommandList, [Cache = this, Desc](ze_command_list_handle_t CmdList) {
- Cache->addCommandList(
- Desc, raii::ze_command_list_t(CmdList, &zeCommandListDestroy));
+ Cache->addCommandList(Desc, raii::ze_command_list_handle_t(CmdList));
});
}
@@ -113,12 +112,11 @@ command_list_cache_t::getRegularCommandList(ze_device_handle_t ZeDevice,
return raii::cache_borrowed_command_list_t(
CommandList, [Cache = this, Desc](ze_command_list_handle_t CmdList) {
- Cache->addCommandList(
- Desc, raii::ze_command_list_t(CmdList, &zeCommandListDestroy));
+ Cache->addCommandList(Desc, raii::ze_command_list_handle_t(CmdList));
});
}
-raii::ze_command_list_t
+raii::ze_command_list_handle_t
command_list_cache_t::getCommandList(const command_list_descriptor_t &desc) {
std::unique_lock Lock(ZeCommandListCacheMutex);
auto it = ZeCommandListCache.find(desc);
@@ -129,7 +127,8 @@ command_list_cache_t::getCommandList(const command_list_descriptor_t &desc) {
assert(!it->second.empty());
- raii::ze_command_list_t CommandListHandle = std::move(it->second.top());
+ raii::ze_command_list_handle_t CommandListHandle =
+ std::move(it->second.top());
it->second.pop();
if (it->second.empty())
@@ -138,8 +137,9 @@ command_list_cache_t::getCommandList(const command_list_descriptor_t &desc) {
return CommandListHandle;
}
-void command_list_cache_t::addCommandList(const command_list_descriptor_t &desc,
- raii::ze_command_list_t cmdList) {
+void command_list_cache_t::addCommandList(
+ const command_list_descriptor_t &desc,
+ raii::ze_command_list_handle_t cmdList) {
// TODO: add a limit?
std::unique_lock Lock(ZeCommandListCacheMutex);
auto [it, _] = ZeCommandListCache.try_emplace(desc);
diff --git a/source/adapters/level_zero/v2/command_list_cache.hpp b/source/adapters/level_zero/v2/command_list_cache.hpp
index 1850a4334c..9884e16dc4 100644
--- a/source/adapters/level_zero/v2/command_list_cache.hpp
+++ b/source/adapters/level_zero/v2/command_list_cache.hpp
@@ -14,18 +14,16 @@
#include "latency_tracker.hpp"
#include
-#include
+#include
#include
-#include "../common.hpp"
+#include "common.hpp"
namespace v2 {
namespace raii {
-using ze_command_list_t = std::unique_ptr<::_ze_command_list_handle_t,
- decltype(&zeCommandListDestroy)>;
using cache_borrowed_command_list_t =
std::unique_ptr<::_ze_command_list_handle_t,
- std::function>;
+ std::function>;
} // namespace raii
struct immediate_command_list_descriptor_t {
@@ -72,15 +70,16 @@ struct command_list_cache_t {
private:
ze_context_handle_t ZeContext;
std::unordered_map,
+ std::stack,
command_list_descriptor_hash_t>
ZeCommandListCache;
ur_mutex ZeCommandListCacheMutex;
- raii::ze_command_list_t getCommandList(const command_list_descriptor_t &desc);
+ raii::ze_command_list_handle_t
+ getCommandList(const command_list_descriptor_t &desc);
void addCommandList(const command_list_descriptor_t &desc,
- raii::ze_command_list_t cmdList);
- raii::ze_command_list_t
+ raii::ze_command_list_handle_t cmdList);
+ raii::ze_command_list_handle_t
createCommandList(const command_list_descriptor_t &desc);
};
} // namespace v2
diff --git a/source/adapters/level_zero/v2/common.hpp b/source/adapters/level_zero/v2/common.hpp
index fdfed0c661..4fb851bad8 100644
--- a/source/adapters/level_zero/v2/common.hpp
+++ b/source/adapters/level_zero/v2/common.hpp
@@ -54,6 +54,8 @@ struct ze_handle_wrapper {
try {
reset();
} catch (...) {
+ // TODO: add appropriate logging or pass the error
+ // to the caller (make the dtor noexcept(false) or use tls?)
}
}
@@ -85,70 +87,6 @@ struct ze_handle_wrapper {
bool ownZeHandle;
};
-template
-struct ur_shared_handle {
- using handle_t = URHandle;
-
- ur_shared_handle() : handle(nullptr) {}
- explicit ur_shared_handle(handle_t handle) : handle(handle) {}
- ~ur_shared_handle() {
- try {
- reset();
- } catch (...) {
- }
- }
-
- ur_shared_handle(const ur_shared_handle &other) : handle(other.handle) {
- retain(handle);
- }
- ur_shared_handle(ur_shared_handle &&other) : handle(other.handle) {
- other.handle = nullptr;
- }
- ur_shared_handle(std::nullptr_t) : handle(nullptr) {}
-
- void reset() {
- if (!handle) {
- return;
- }
-
- UR_CALL_THROWS(release(handle));
- handle = nullptr;
- }
-
- ur_shared_handle &operator=(const ur_shared_handle &other) {
- if (handle) {
- release(handle);
- }
- handle = other.handle;
- retain(handle);
- return *this;
- }
- ur_shared_handle &operator=(ur_shared_handle &&other) {
- if (handle) {
- release(handle);
- }
- handle = other.handle;
- other.handle = nullptr;
- return *this;
- }
- ur_shared_handle &operator=(std::nullptr_t) {
- if (handle) {
- release(handle);
- }
- new (this) ur_shared_handle(nullptr);
- return *this;
- }
-
- handle_t *ptr() { return &handle; }
- handle_t get() const { return handle; }
- handle_t operator->() { return handle; }
- operator handle_t() { return handle; }
-
-private:
- handle_t handle;
-};
-
using ze_kernel_handle_t =
ze_handle_wrapper<::ze_kernel_handle_t, zeKernelDestroy>;
@@ -158,11 +96,11 @@ using ze_event_handle_t =
using ze_event_pool_handle_t =
ze_handle_wrapper<::ze_event_pool_handle_t, zeEventPoolDestroy>;
-using ur_queue_shared_handle_t =
- ur_shared_handle;
+using ze_context_handle_t =
+ ze_handle_wrapper<::ze_context_handle_t, zeContextDestroy>;
-using ur_kernel_shared_handle_t =
- ur_shared_handle;
+using ze_command_list_handle_t =
+ ze_handle_wrapper<::ze_command_list_handle_t, zeCommandListDestroy>;
} // namespace raii
} // namespace v2
diff --git a/source/adapters/level_zero/v2/context.cpp b/source/adapters/level_zero/v2/context.cpp
index 08032fe85e..abb8a13538 100644
--- a/source/adapters/level_zero/v2/context.cpp
+++ b/source/adapters/level_zero/v2/context.cpp
@@ -17,8 +17,8 @@ ur_context_handle_t_::ur_context_handle_t_(ze_context_handle_t hContext,
uint32_t numDevices,
const ur_device_handle_t *phDevices,
bool ownZeContext)
- : hContext(hContext), hDevices(phDevices, phDevices + numDevices),
- commandListCache(hContext),
+ : hContext(hContext, ownZeContext),
+ hDevices(phDevices, phDevices + numDevices), commandListCache(hContext),
eventPoolCache(phDevices[0]->Platform->getNumDevices(),
[context = this,
platform = phDevices[0]->Platform](DeviceId deviceId) {
@@ -27,19 +27,7 @@ ur_context_handle_t_::ur_context_handle_t_(ze_context_handle_t hContext,
return std::make_unique(
context, device, v2::EVENT_COUNTER,
v2::QUEUE_IMMEDIATE);
- }) {
- std::ignore = ownZeContext;
-}
-
-ur_context_handle_t_::~ur_context_handle_t_() noexcept(false) {
- // ur_context_handle_t_ is only created/destroyed through urContextCreate
- // and urContextRelease so it's safe to throw here
- ZE2UR_CALL_THROWS(zeContextDestroy, (hContext));
-}
-
-ze_context_handle_t ur_context_handle_t_::getZeHandle() const {
- return hContext;
-}
+ }) {}
ur_result_t ur_context_handle_t_::retain() {
RefCount.increment();
@@ -72,10 +60,11 @@ bool ur_context_handle_t_::isValidDevice(ur_device_handle_t hDevice) const {
return false;
}
-UR_APIEXPORT ur_result_t UR_APICALL
-urContextCreate(uint32_t deviceCount, const ur_device_handle_t *phDevices,
- const ur_context_properties_t *pProperties,
- ur_context_handle_t *phContext) {
+namespace ur::level_zero {
+ur_result_t urContextCreate(uint32_t deviceCount,
+ const ur_device_handle_t *phDevices,
+ const ur_context_properties_t *pProperties,
+ ur_context_handle_t *phContext) {
std::ignore = pProperties;
ur_platform_handle_t hPlatform = phDevices[0]->Platform;
@@ -89,23 +78,20 @@ urContextCreate(uint32_t deviceCount, const ur_device_handle_t *phDevices,
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL
-urContextRetain(ur_context_handle_t hContext) {
+ur_result_t urContextRetain(ur_context_handle_t hContext) {
return hContext->retain();
}
-UR_APIEXPORT ur_result_t UR_APICALL
-urContextRelease(ur_context_handle_t hContext) {
+ur_result_t urContextRelease(ur_context_handle_t hContext) {
return hContext->release();
}
-UR_APIEXPORT ur_result_t UR_APICALL
-urContextGetInfo(ur_context_handle_t hContext,
- ur_context_info_t contextInfoType, size_t propSize,
+ur_result_t urContextGetInfo(ur_context_handle_t hContext,
+ ur_context_info_t contextInfoType, size_t propSize,
- void *pContextInfo,
+ void *pContextInfo,
- size_t *pPropSizeRet) {
+ size_t *pPropSizeRet) {
std::shared_lock Lock(hContext->Mutex);
UrReturnHelper ReturnValue(propSize, pContextInfo, pPropSizeRet);
switch (
@@ -117,7 +103,14 @@ urContextGetInfo(ur_context_handle_t hContext,
return ReturnValue(uint32_t(hContext->getDevices().size()));
case UR_CONTEXT_INFO_REFERENCE_COUNT:
return ReturnValue(uint32_t{hContext->RefCount.load()});
+ case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT:
+ // TODO: this is currently not implemented
+ return ReturnValue(uint8_t{false});
+ case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT:
+ // 2D USM fill is not supported.
+ return ReturnValue(uint8_t{false});
default:
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
}
+} // namespace ur::level_zero
diff --git a/source/adapters/level_zero/v2/context.hpp b/source/adapters/level_zero/v2/context.hpp
index 69bf406594..0ed701400d 100644
--- a/source/adapters/level_zero/v2/context.hpp
+++ b/source/adapters/level_zero/v2/context.hpp
@@ -13,17 +13,17 @@
#include
#include "command_list_cache.hpp"
+#include "common.hpp"
#include "event_pool_cache.hpp"
struct ur_context_handle_t_ : _ur_object {
ur_context_handle_t_(ze_context_handle_t hContext, uint32_t numDevices,
const ur_device_handle_t *phDevices, bool ownZeContext);
- ~ur_context_handle_t_() noexcept(false);
ur_result_t retain();
ur_result_t release();
- ze_context_handle_t getZeHandle() const;
+ inline ze_context_handle_t getZeHandle() const { return hContext.get(); }
ur_platform_handle_t getPlatform() const;
const std::vector &getDevices() const;
@@ -31,7 +31,7 @@ struct ur_context_handle_t_ : _ur_object {
// For that the Device or its root devices need to be in the context.
bool isValidDevice(ur_device_handle_t Device) const;
- const ze_context_handle_t hContext;
+ const v2::raii::ze_context_handle_t hContext;
const std::vector hDevices;
v2::command_list_cache_t commandListCache;
v2::event_pool_cache eventPoolCache;
diff --git a/source/adapters/level_zero/v2/event.cpp b/source/adapters/level_zero/v2/event.cpp
index 3129e3dd3e..df99c83b53 100644
--- a/source/adapters/level_zero/v2/event.cpp
+++ b/source/adapters/level_zero/v2/event.cpp
@@ -45,10 +45,47 @@ ur_result_t ur_event_handle_t_::release() {
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) {
- return hEvent->retain();
-}
+namespace ur::level_zero {
+ur_result_t urEventRetain(ur_event_handle_t hEvent) { return hEvent->retain(); }
-UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) {
+ur_result_t urEventRelease(ur_event_handle_t hEvent) {
return hEvent->release();
}
+
+ur_result_t urEventWait(uint32_t numEvents,
+ const ur_event_handle_t *phEventWaitList) {
+ for (uint32_t i = 0; i < numEvents; ++i) {
+ ZE2UR_CALL(zeEventHostSynchronize,
+ (phEventWaitList[i]->getZeEvent(), UINT64_MAX));
+ }
+ return UR_RESULT_SUCCESS;
+}
+
+ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName,
+ size_t propValueSize, void *pPropValue,
+ size_t *pPropValueSizeRet) {
+ UrReturnHelper returnValue(propValueSize, pPropValue, pPropValueSizeRet);
+
+ switch (propName) {
+ case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: {
+ auto zeStatus = ZE_CALL_NOCHECK(zeEventQueryStatus, (hEvent->getZeEvent()));
+
+ if (zeStatus == ZE_RESULT_NOT_READY) {
+ return returnValue(UR_EVENT_STATUS_SUBMITTED);
+ } else {
+ return returnValue(UR_EVENT_STATUS_COMPLETE);
+ }
+ }
+ case UR_EVENT_INFO_REFERENCE_COUNT: {
+ return returnValue(hEvent->RefCount.load());
+ }
+ default:
+ logger::error(
+ "Unsupported ParamName in urEventGetInfo: ParamName=ParamName={}(0x{})",
+ propName, logger::toHex(propName));
+ return UR_RESULT_ERROR_INVALID_VALUE;
+ }
+
+ return UR_RESULT_SUCCESS;
+}
+} // namespace ur::level_zero
diff --git a/source/adapters/level_zero/v2/event_provider_counter.cpp b/source/adapters/level_zero/v2/event_provider_counter.cpp
index 5334b2f888..76caea4c58 100644
--- a/source/adapters/level_zero/v2/event_provider_counter.cpp
+++ b/source/adapters/level_zero/v2/event_provider_counter.cpp
@@ -27,9 +27,9 @@ provider_counter::provider_counter(ur_platform_handle_t platform,
ZE2UR_CALL_THROWS(zeDriverGetExtensionFunctionAddress,
(platform->ZeDriver, "zexCounterBasedEventCreate",
(void **)&this->eventCreateFunc));
- ZE2UR_CALL_THROWS(
- zelLoaderTranslateHandle,
- (ZEL_HANDLE_CONTEXT, context->hContext, (void **)&translatedContext));
+ ZE2UR_CALL_THROWS(zelLoaderTranslateHandle,
+ (ZEL_HANDLE_CONTEXT, context->getZeHandle(),
+ (void **)&translatedContext));
ZE2UR_CALL_THROWS(
zelLoaderTranslateHandle,
(ZEL_HANDLE_DEVICE, device->ZeDevice, (void **)&translatedDevice));
@@ -39,7 +39,7 @@ event_allocation provider_counter::allocate() {
if (freelist.empty()) {
ZeStruct desc;
desc.index = 0;
- desc.signal = 0;
+ desc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
desc.wait = 0;
ze_event_handle_t handle;
diff --git a/source/adapters/level_zero/v2/event_provider_normal.cpp b/source/adapters/level_zero/v2/event_provider_normal.cpp
index f5a1c940c6..4df05c12ed 100644
--- a/source/adapters/level_zero/v2/event_provider_normal.cpp
+++ b/source/adapters/level_zero/v2/event_provider_normal.cpp
@@ -32,7 +32,7 @@ provider_pool::provider_pool(ur_context_handle_t context,
desc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
ze_event_pool_counter_based_exp_desc_t counterBasedExt = {
- ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC, nullptr};
+ ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC, nullptr, 0};
if (events == event_type::EVENT_COUNTER) {
counterBasedExt.flags =
@@ -43,7 +43,7 @@ provider_pool::provider_pool(ur_context_handle_t context,
}
ZE2UR_CALL_THROWS(zeEventPoolCreate,
- (context->hContext, &desc, 1,
+ (context->getZeHandle(), &desc, 1,
const_cast(&device->ZeDevice),
pool.ptr()));
@@ -51,7 +51,7 @@ provider_pool::provider_pool(ur_context_handle_t context,
for (int i = 0; i < EVENTS_BURST; ++i) {
ZeStruct desc;
desc.index = i;
- desc.signal = 0;
+ desc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
desc.wait = 0;
ZE2UR_CALL_THROWS(zeEventCreate, (pool.get(), &desc, freelist[i].ptr()));
}
diff --git a/source/adapters/level_zero/v2/event_provider_normal.hpp b/source/adapters/level_zero/v2/event_provider_normal.hpp
index 238ab2f360..1260964a4f 100644
--- a/source/adapters/level_zero/v2/event_provider_normal.hpp
+++ b/source/adapters/level_zero/v2/event_provider_normal.hpp
@@ -23,6 +23,7 @@
#include "event.hpp"
#include "../device.hpp"
+#include "../ur_interface_loader.hpp"
namespace v2 {
@@ -50,10 +51,10 @@ class provider_normal : public event_provider {
event_type etype, queue_type qtype)
: producedType(etype), queueType(qtype), urContext(context),
urDevice(device) {
- urDeviceRetain(device);
+ ur::level_zero::urDeviceRetain(device);
}
- ~provider_normal() override { urDeviceRelease(urDevice); }
+ ~provider_normal() override { ur::level_zero::urDeviceRelease(urDevice); }
event_allocation allocate() override;
ur_device_handle_t device() override;
diff --git a/source/adapters/level_zero/v2/kernel.cpp b/source/adapters/level_zero/v2/kernel.cpp
index 58e1a10ef1..8bfad2d2ad 100644
--- a/source/adapters/level_zero/v2/kernel.cpp
+++ b/source/adapters/level_zero/v2/kernel.cpp
@@ -12,12 +12,14 @@
#include "context.hpp"
#include "kernel.hpp"
+#include "memory.hpp"
#include "../device.hpp"
#include "../platform.hpp"
#include "../program.hpp"
+#include "../ur_interface_loader.hpp"
-ur_single_device_kernel_t::ur_single_device_kernel_t(ze_device_handle_t hDevice,
+ur_single_device_kernel_t::ur_single_device_kernel_t(ur_device_handle_t hDevice,
ze_kernel_handle_t hKernel,
bool ownZeHandle)
: hDevice(hDevice), hKernel(hKernel, ownZeHandle) {
@@ -32,10 +34,12 @@ ur_result_t ur_single_device_kernel_t::release() {
return UR_RESULT_SUCCESS;
}
-ur_kernel_handle_t_::ur_kernel_handle_t_(ur_program_shared_handle_t hProgram,
+ur_kernel_handle_t_::ur_kernel_handle_t_(ur_program_handle_t hProgram,
const char *kernelName)
: hProgram(hProgram),
deviceKernels(hProgram->Context->getPlatform()->getNumDevices()) {
+ ur::level_zero::urProgramRetain(hProgram);
+
for (auto [zeDevice, zeModule] : hProgram->ZeModuleMap) {
ZeStruct zeKernelDesc;
zeKernelDesc.pKernelName = kernelName;
@@ -51,7 +55,7 @@ ur_kernel_handle_t_::ur_kernel_handle_t_(ur_program_shared_handle_t hProgram,
assert(urDevice != hProgram->Context->getDevices().end());
auto deviceId = (*urDevice)->Id.value();
- deviceKernels[deviceId].emplace(zeDevice, zeKernel, true);
+ deviceKernels[deviceId].emplace(*urDevice, zeKernel, true);
}
completeInitialization();
}
@@ -78,7 +82,8 @@ ur_result_t ur_kernel_handle_t_::release() {
singleDeviceKernelOpt.value().hKernel.reset();
}
}
- hProgram.reset();
+
+ UR_CALL_THROWS(ur::level_zero::urProgramRelease(hProgram));
return UR_RESULT_SUCCESS;
}
@@ -114,7 +119,7 @@ ur_kernel_handle_t_::getZeHandle(ur_device_handle_t hDevice) {
auto &kernel = deviceKernels[0].value();
// hDevice is nullptr for native handle
- if ((kernel.hDevice != nullptr && kernel.hDevice != hDevice->ZeDevice)) {
+ if ((kernel.hDevice != nullptr && kernel.hDevice != hDevice)) {
throw UR_RESULT_ERROR_INVALID_DEVICE;
}
@@ -190,25 +195,77 @@ ur_result_t ur_kernel_handle_t_::setArgPointer(
}
ur_program_handle_t ur_kernel_handle_t_::getProgramHandle() const {
- return hProgram.get();
+ return hProgram;
+}
+
+ur_result_t ur_kernel_handle_t_::setExecInfo(ur_kernel_exec_info_t propName,
+ const void *pPropValue) {
+ std::scoped_lock Guard(Mutex);
+
+ for (auto &kernel : deviceKernels) {
+ if (!kernel.has_value())
+ continue;
+ if (propName == UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS &&
+ *(static_cast(pPropValue)) == true) {
+ // The whole point for users really was to not need to know anything
+ // about the types of allocations kernel uses. So in DPC++ we always
+ // just set all 3 modes for each kernel.
+ ze_kernel_indirect_access_flags_t indirectFlags =
+ ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST |
+ ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE |
+ ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED;
+ ZE2UR_CALL(zeKernelSetIndirectAccess,
+ (kernel->hKernel.get(), indirectFlags));
+ } else if (propName == UR_KERNEL_EXEC_INFO_CACHE_CONFIG) {
+ ze_cache_config_flag_t zeCacheConfig{};
+ auto cacheConfig =
+ *(static_cast(pPropValue));
+ if (cacheConfig == UR_KERNEL_CACHE_CONFIG_LARGE_SLM)
+ zeCacheConfig = ZE_CACHE_CONFIG_FLAG_LARGE_SLM;
+ else if (cacheConfig == UR_KERNEL_CACHE_CONFIG_LARGE_DATA)
+ zeCacheConfig = ZE_CACHE_CONFIG_FLAG_LARGE_DATA;
+ else if (cacheConfig == UR_KERNEL_CACHE_CONFIG_DEFAULT)
+ zeCacheConfig = static_cast(0);
+ else
+ // Unexpected cache configuration value.
+ return UR_RESULT_ERROR_INVALID_VALUE;
+ ZE2UR_CALL(zeKernelSetCacheConfig,
+ (kernel->hKernel.get(), zeCacheConfig););
+ } else {
+ logger::error("urKernelSetExecInfo: unsupported ParamName");
+ return UR_RESULT_ERROR_INVALID_VALUE;
+ }
+ }
+
+ return UR_RESULT_SUCCESS;
+}
+
+std::vector ur_kernel_handle_t_::getDevices() const {
+ std::vector devices;
+ for (size_t i = 0; i < deviceKernels.size(); ++i) {
+ if (deviceKernels[i].has_value()) {
+ devices.push_back(deviceKernels[i].value().hDevice);
+ }
+ }
+ return devices;
}
-UR_APIEXPORT ur_result_t UR_APICALL
-urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName,
- ur_kernel_handle_t *phKernel) {
- *phKernel = new ur_kernel_handle_t_(
- ur_kernel_handle_t_::ur_program_shared_handle_t(hProgram), pKernelName);
+namespace ur::level_zero {
+ur_result_t urKernelCreate(ur_program_handle_t hProgram,
+ const char *pKernelName,
+ ur_kernel_handle_t *phKernel) {
+ *phKernel = new ur_kernel_handle_t_(hProgram, pKernelName);
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(
+ur_result_t urKernelRetain(
ur_kernel_handle_t hKernel ///< [in] handle for the Kernel to retain
) {
hKernel->RefCount.increment();
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease(
+ur_result_t urKernelRelease(
ur_kernel_handle_t hKernel ///< [in] handle for the Kernel to release
) {
if (!hKernel->RefCount.decrementAndTest())
@@ -220,7 +277,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease(
return UR_RESULT_SUCCESS;
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue(
+ur_result_t urKernelSetArgValue(
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
uint32_t argIndex, ///< [in] argument index in range [0, num args - 1]
size_t argSize, ///< [in] size of argument type
@@ -233,7 +290,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue(
return hKernel->setArgValue(argIndex, argSize, pProperties, pArgValue);
}
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer(
+ur_result_t urKernelSetArgPointer(
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
uint32_t argIndex, ///< [in] argument index in range [0, num args - 1]
const ur_kernel_arg_pointer_properties_t
@@ -244,3 +301,162 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer(
TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgPointer");
return hKernel->setArgPointer(argIndex, pProperties, pArgValue);
}
+
+ur_result_t
+urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex,
+ const ur_kernel_arg_mem_obj_properties_t *pProperties,
+ ur_mem_handle_t hArgValue) {
+ TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgMemObj");
+
+ // TODO: support properties
+ std::ignore = pProperties;
+
+ auto kernelDevices = hKernel->getDevices();
+ if (kernelDevices.size() == 1) {
+ auto zePtr = hArgValue->getPtr(kernelDevices.front());
+ return hKernel->setArgPointer(argIndex, nullptr, zePtr);
+ } else {
+ // TODO: Implement this for multi-device kernels.
+ // Do this the same way as in legacy (keep a pending Args vector and
+ // do actual allocation on kernel submission) or allocate the memory
+ // immediately (only for small allocations?)
+ return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+ }
+}
+
+ur_result_t
+urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex,
+ size_t argSize,
+ const ur_kernel_arg_local_properties_t *pProperties) {
+ TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgLocal");
+
+ std::ignore = pProperties;
+
+ return hKernel->setArgValue(argIndex, argSize, nullptr, nullptr);
+}
+
+ur_result_t urKernelSetExecInfo(
+ ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
+ ur_kernel_exec_info_t propName, ///< [in] name of the execution attribute
+ size_t propSize, ///< [in] size in byte the attribute value
+ const ur_kernel_exec_info_properties_t
+ *pProperties, ///< [in][optional] pointer to execution info properties
+ const void *pPropValue ///< [in][range(0, propSize)] pointer to memory
+ ///< location holding the property value.
+) {
+ std::ignore = propSize;
+ std::ignore = pProperties;
+
+ return hKernel->setExecInfo(propName, pPropValue);
+}
+
+ur_result_t urKernelGetGroupInfo(
+ ur_kernel_handle_t hKernel, ///< [in] handle of the Kernel object
+ ur_device_handle_t hDevice, ///< [in] handle of the Device object
+ ur_kernel_group_info_t
+ paramName, ///< [in] name of the work Group property to query
+ size_t
+ paramValueSize, ///< [in] size of the Kernel Work Group property value
+ void *pParamValue, ///< [in,out][optional][range(0, propSize)] value of the
+ ///< Kernel Work Group property.
+ size_t *pParamValueSizeRet ///< [out][optional] pointer to the actual size
+ ///< in bytes of data being queried by propName.
+) {
+ UrReturnHelper returnValue(paramValueSize, pParamValue, pParamValueSizeRet);
+
+ std::shared_lock Guard(hKernel->Mutex);
+ switch (paramName) {
+ case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: {
+ // TODO: To revisit after level_zero/issues/262 is resolved
+ struct {
+ size_t Arr[3];
+ } GlobalWorkSize = {{(hDevice->ZeDeviceComputeProperties->maxGroupSizeX *
+ hDevice->ZeDeviceComputeProperties->maxGroupCountX),
+ (hDevice->ZeDeviceComputeProperties->maxGroupSizeY *
+ hDevice->ZeDeviceComputeProperties->maxGroupCountY),
+ (hDevice->ZeDeviceComputeProperties->maxGroupSizeZ *
+ hDevice->ZeDeviceComputeProperties->maxGroupCountZ)}};
+ return returnValue(GlobalWorkSize);
+ }
+ case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: {
+ ZeStruct workGroupProperties;
+ workGroupProperties.maxGroupSize = 0;
+
+ ZeStruct kernelProperties;
+ kernelProperties.pNext = &workGroupProperties;
+
+ auto zeDevice = hKernel->getZeHandle(hDevice);
+ if (zeDevice) {
+ auto zeResult =
+ ZE_CALL_NOCHECK(zeKernelGetProperties, (zeDevice, &kernelProperties));
+ if (zeResult == ZE_RESULT_SUCCESS &&
+ workGroupProperties.maxGroupSize != 0) {
+ return returnValue(workGroupProperties.maxGroupSize);
+ }
+ return returnValue(
+ uint64_t{hDevice->ZeDeviceComputeProperties->maxTotalGroupSize});
+ }
+ }
+ case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: {
+ auto props = hKernel->getProperties(hDevice);
+ struct {
+ size_t Arr[3];
+ } WgSize = {{props.requiredGroupSizeX, props.requiredGroupSizeY,
+ props.requiredGroupSizeZ}};
+ return returnValue(WgSize);
+ }
+ case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: {
+ auto props = hKernel->getProperties(hDevice);
+ return returnValue(uint32_t{props.localMemSize});
+ }
+ case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
+ return returnValue(
+ size_t{hDevice->ZeDeviceProperties->physicalEUSimdWidth});
+ }
+ case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: {
+ auto props = hKernel->getProperties(hDevice);
+ return returnValue(uint32_t{props.privateMemSize});
+ }
+ default: {
+ logger::error(
+ "Unknown ParamName in urKernelGetGroupInfo: ParamName={}(0x{})",
+ paramName, logger::toHex(paramName));
+ return UR_RESULT_ERROR_INVALID_VALUE;
+ }
+ }
+ return UR_RESULT_SUCCESS;
+}
+
+ur_result_t urKernelGetSubGroupInfo(
+ ur_kernel_handle_t hKernel, ///< [in] handle of the Kernel object
+ ur_device_handle_t hDevice, ///< [in] handle of the Device object
+ ur_kernel_sub_group_info_t
+ propName, ///< [in] name of the SubGroup property to query
+ size_t propSize, ///< [in] size of the Kernel SubGroup property value
+ void *pPropValue, ///< [in,out][range(0, propSize)][optional] value of the
+ ///< Kernel SubGroup property.
+ size_t *pPropSizeRet ///< [out][optional] pointer to the actual size in
+ ///< bytes of data being queried by propName.
+) {
+ std::ignore = hDevice;
+
+ UrReturnHelper returnValue(propSize, pPropValue, pPropSizeRet);
+
+ auto props = hKernel->getProperties(hDevice);
+
+ std::shared_lock Guard(hKernel->Mutex);
+ if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) {
+ returnValue(uint32_t{props.maxSubgroupSize});
+ } else if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS) {
+ returnValue(uint32_t{props.maxNumSubgroups});
+ } else if (propName == UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS) {
+ returnValue(uint32_t{props.requiredNumSubGroups});
+ } else if (propName == UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL) {
+ returnValue(uint32_t{props.requiredSubgroupSize});
+ } else {
+ die("urKernelGetSubGroupInfo: parameter not implemented");
+ return {};
+ }
+ return UR_RESULT_SUCCESS;
+}
+} // namespace ur::level_zero
diff --git a/source/adapters/level_zero/v2/kernel.hpp b/source/adapters/level_zero/v2/kernel.hpp
index b6309ab20f..2d3a891826 100644
--- a/source/adapters/level_zero/v2/kernel.hpp
+++ b/source/adapters/level_zero/v2/kernel.hpp
@@ -15,32 +15,19 @@
#include "common.hpp"
struct ur_single_device_kernel_t {
- ur_single_device_kernel_t(ze_device_handle_t hDevice,
+ ur_single_device_kernel_t(ur_device_handle_t hDevice,
ze_kernel_handle_t hKernel, bool ownZeHandle);
ur_result_t release();
- ze_device_handle_t hDevice;
+ ur_device_handle_t hDevice;
v2::raii::ze_kernel_handle_t hKernel;
mutable ZeCache> zeKernelProperties;
};
struct ur_kernel_handle_t_ : _ur_object {
private:
- static inline ur_result_t
- internalProgramRelease(ur_program_handle_t hProgram) {
- // do a release on the program this kernel was part of without delete of the
- // program handle.
- hProgram->ur_release_program_resources(false);
- return UR_RESULT_SUCCESS;
- }
-
public:
- using ur_program_shared_handle_t =
- v2::raii::ur_shared_handle;
-
- ur_kernel_handle_t_(ur_program_shared_handle_t hProgram,
- const char *kernelName);
+ ur_kernel_handle_t_(ur_program_handle_t hProgram, const char *kernelName);
// From native handle
ur_kernel_handle_t_(ur_native_handle_t hNativeKernel,
@@ -53,6 +40,9 @@ struct ur_kernel_handle_t_ : _ur_object {
// Get program handle of the kernel.
ur_program_handle_t getProgramHandle() const;
+ // Get devices the kernel is built for.
+ std::vector getDevices() const;
+
// Get name of the kernel.
const std::string &getName() const;
@@ -70,12 +60,16 @@ struct ur_kernel_handle_t_ : _ur_object {
const ur_kernel_arg_pointer_properties_t *pProperties,
const void *pArgValue);
+ // Implementation of urKernelSetExecInfo.
+ ur_result_t setExecInfo(ur_kernel_exec_info_t propName,
+ const void *pPropValue);
+
// Perform cleanup.
ur_result_t release();
private:
// Keep the program of the kernel.
- ur_program_shared_handle_t hProgram;
+ ur_program_handle_t hProgram;
// Vector of ur_single_device_kernel_t indexed by device->Id
std::vector> deviceKernels;
diff --git a/source/adapters/level_zero/v2/memory.cpp b/source/adapters/level_zero/v2/memory.cpp
new file mode 100644
index 0000000000..fc9a7522a4
--- /dev/null
+++ b/source/adapters/level_zero/v2/memory.cpp
@@ -0,0 +1,180 @@
+//===--------- memory.cpp - Level Zero Adapter ---------------------------===//
+//
+// Copyright (C) 2024 Intel Corporation
+//
+// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
+// Exceptions. See LICENSE.TXT
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "memory.hpp"
+#include "context.hpp"
+
+#include "../helpers/memory_helpers.hpp"
+
+ur_mem_handle_t_::ur_mem_handle_t_(ur_context_handle_t hContext, size_t size)
+ : hContext(hContext), size(size) {}
+
+ur_host_mem_handle_t::ur_host_mem_handle_t(ur_context_handle_t hContext,
+ void *hostPtr, size_t size,
+ host_ptr_action_t hostPtrAction)
+ : ur_mem_handle_t_(hContext, size) {
+ bool hostPtrImported = false;
+ if (hostPtrAction == host_ptr_action_t::import) {
+ hostPtrImported =
+ maybeImportUSM(hContext->getPlatform()->ZeDriverHandleExpTranslated,
+ hContext->getZeHandle(), hostPtr, size);
+ }
+
+ if (!hostPtrImported) {
+ // TODO: use UMF
+ ZeStruct hostDesc;
+ ZE2UR_CALL_THROWS(zeMemAllocHost, (hContext->getZeHandle(), &hostDesc, size,
+ 0, &this->ptr));
+
+ if (hostPtr) {
+ std::memcpy(this->ptr, hostPtr, size);
+ }
+ }
+}
+
+ur_host_mem_handle_t::~ur_host_mem_handle_t() {
+ // TODO: use UMF API here
+ if (ptr) {
+ ZE_CALL_NOCHECK(zeMemFree, (hContext->getZeHandle(), ptr));
+ }
+}
+
+void *ur_host_mem_handle_t::getPtr(ur_device_handle_t hDevice) {
+ std::ignore = hDevice;
+ return ptr;
+}
+
+ur_device_mem_handle_t::ur_device_mem_handle_t(ur_context_handle_t hContext,
+ void *hostPtr, size_t size)
+ : ur_mem_handle_t_(hContext, size),
+ deviceAllocations(hContext->getPlatform()->getNumDevices()) {
+ // Legacy adapter allocated the memory directly on a device (first on the
+ // contxt) and if the buffer is used on another device, memory is migrated
+ // (depending on an env var setting).
+ //
+ // TODO: port this behavior or figure out if it makes sense to keep the memory
+ // in a host buffer (e.g. for smaller sizes).
+ if (hostPtr) {
+ buffer.assign(reinterpret_cast(hostPtr),
+ reinterpret_cast