diff --git a/.github/workflows/benchmarks_compute.yml b/.github/workflows/benchmarks_compute.yml index 126872cedd..a398e6b56b 100644 --- a/.github/workflows/benchmarks_compute.yml +++ b/.github/workflows/benchmarks_compute.yml @@ -151,9 +151,17 @@ jobs: - name: Build SYCL run: cmake --build ${{github.workspace}}/sycl_build -j - - name: Set oneAPI Device Selector - run: | - echo "ONEAPI_DEVICE_SELECTOR=${{ matrix.adapter.str_name }}:${{ matrix.adapter.unit }}" >> $GITHUB_ENV + - name: Configure UR + working-directory: ${{github.workspace}}/ur-repo + run: > + cmake -DCMAKE_BUILD_TYPE=Release + -B${{github.workspace}}/ur-repo/build + -DUR_BUILD_TESTS=OFF + -DUR_BUILD_ADAPTER_L0=ON + -DUR_BUILD_ADAPTER_L0_V2=ON + + - name: Build UR + run: cmake --build ${{github.workspace}}/ur-repo/build -j $(nproc) - name: Run benchmarks id: benchmarks diff --git a/.github/workflows/build-hw-reusable.yml b/.github/workflows/build-hw-reusable.yml index 88b0877c27..fa23c38248 100644 --- a/.github/workflows/build-hw-reusable.yml +++ b/.github/workflows/build-hw-reusable.yml @@ -18,6 +18,10 @@ on: required: false type: string default: OFF + static_adapter: + required: false + type: string + default: OFF permissions: contents: read @@ -36,7 +40,7 @@ jobs: strategy: matrix: adapter: [ - {name: "${{inputs.adapter_name}}", platform: "${{inputs.platform}}", static_Loader: "${{inputs.static_loader}}"}, + {name: "${{inputs.adapter_name}}", platform: "${{inputs.platform}}", static_Loader: "${{inputs.static_loader}}", static_adapter: "${{inputs.static_loader}}"}, ] build_type: [Debug, Release] compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}] @@ -49,6 +53,10 @@ jobs: build_type: Release - adapter: {static_Loader: ON} compiler: {c: clang, cxx: clang++} + - adapter: {static_adapter: ON} + build_type: Release + - adapter: {static_adapter: ON} + compiler: {c: clang, cxx: clang++} runs-on: ${{inputs.runner_name}} @@ -76,6 +84,7 @@ jobs: -DUR_BUILD_TESTS=ON -DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON -DUR_STATIC_LOADER=${{matrix.adapter.static_Loader}} + -DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static_adapter}} -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ -DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib ${{ matrix.adapter.name == 'HIP' && '-DUR_CONFORMANCE_AMD_ARCH=gfx1030' || '' }} diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index cd5c91854c..6662f7833d 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -20,7 +20,7 @@ jobs: compiler: [{c: gcc, cxx: g++}] libbacktrace: ['-DVAL_USE_LIBBACKTRACE_BACKTRACE=OFF'] pool_tracking: ['-DUMF_ENABLE_POOL_TRACKING=ON', '-DUMF_ENABLE_POOL_TRACKING=OFF'] - latency_tracking: ['-DUMF_ENABLE_LATENCY_TRACKING=OFF'] + latency_tracking: ['-DUR_ENABLE_LATENCY_HISTOGRAM=OFF'] include: - os: 'ubuntu-22.04' build_type: Release @@ -40,7 +40,7 @@ jobs: - os: 'ubuntu-22.04' build_type: Release compiler: {c: clang, cxx: clang++} - latency_tracking: '-DUMF_ENABLE_LATENCY_TRACKING=ON' + latency_tracking: '-DUR_ENABLE_LATENCY_HISTOGRAM=ON' runs-on: ${{ (matrix.os == 'ubuntu-22.04' && github.repository_owner == 'oneapi-src') && 'intel-ubuntu-22.04' || matrix.os }} steps: @@ -155,6 +155,7 @@ jobs: adapter_name: L0 runner_name: L0 static_loader: ON + static_adapter: ON opencl: name: OpenCL @@ -216,7 +217,8 @@ jobs: os: ['windows-2019', 'windows-2022'] adapter: [ {name: None, var: ''}, {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'}, - {name: None, var: ''}, {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'} + {name: None, var: ''}, {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'}, + {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} ] # TODO: building level zero loader on windows-2019 and clang-cl is currently broken @@ -225,16 +227,25 @@ jobs: adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'} - os: 'windows-2019' adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'} + - os: 'windows-2019' + adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} - adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'} compiler: {c: clang-cl, cxx: clang-cl} - adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'} compiler: {c: clang-cl, cxx: clang-cl} + - adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} + compiler: {c: clang-cl, cxx: clang-cl} build_type: [Debug, Release] compiler: [{c: cl, cxx: cl}, {c: clang-cl, cxx: clang-cl}] include: - compiler: {c: clang-cl, cxx: clang-cl} toolset: "-T ClangCL" + - os: 'windows-2022' + adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} + build_type: 'Release' + compiler: {c: cl, cxx: cl} + runs-on: ${{matrix.os}} steps: diff --git a/.github/workflows/e2e_core.yml b/.github/workflows/e2e_core.yml index 148232bd0a..32b8d58e7a 100644 --- a/.github/workflows/e2e_core.yml +++ b/.github/workflows/e2e_core.yml @@ -169,10 +169,6 @@ jobs: -DCMAKE_CXX_COMPILER="$(which clang++)" -DLLVM_LIT="${{github.workspace}}/sycl-repo/llvm/utils/lit/lit.py" - - name: Set LIT_XFAIL_NOT - if: inputs.xfail_not != '' - run: echo "LIT_XFAIL_NOT=${{inputs.xfail_not}}" >> $GITHUB_ENV - - name: Set LIT_XFAIL if: inputs.xfail != '' run: echo "LIT_XFAIL=${{inputs.xfail}}" >> $GITHUB_ENV @@ -181,6 +177,10 @@ jobs: if: inputs.filter_out != '' run: echo "LIT_FILTER_OUT=${{inputs.filter_out}}" >> $GITHUB_ENV + - name: Set LIT_XFAIL_NOT + if: inputs.xfail_not != '' + run: echo "LIT_XFAIL_NOT=${{inputs.xfail_not}}" >> $GITHUB_ENV + # TODO: remove once intel/llvm lit tests can properly recognize the GPU - name: Configure hardware platform feature for L0 if: matrix.adapter.name == 'L0' diff --git a/.github/workflows/e2e_cuda.yml b/.github/workflows/e2e_cuda.yml index 6b4b0ca16c..c2f1d969b8 100644 --- a/.github/workflows/e2e_cuda.yml +++ b/.github/workflows/e2e_cuda.yml @@ -21,3 +21,4 @@ jobs: config: "--cuda" unit: "gpu" extra_lit_flags: "-sv --max-time=3600" + xfail: "Regression/device_num.cpp" diff --git a/.github/workflows/e2e_level_zero.yml b/.github/workflows/e2e_level_zero.yml index ecab9a85ba..3b2511e0f9 100644 --- a/.github/workflows/e2e_level_zero.yml +++ b/.github/workflows/e2e_level_zero.yml @@ -21,9 +21,11 @@ jobs: config: "" unit: "gpu" # Failing tests - xfail: "DeviceCodeSplit/grf.cpp;ESIMD/grf.cpp;ESIMD/mask_expand_load.cpp;KernelAndProgram/target_register_alloc_mode.cpp;Matrix/SG32/get_coord_int8_matB.cpp;Matrix/get_coord_int8_matB.cpp;Matrix/joint_matrix_prefetch.cpp;Matrix/joint_matrix_rowmajorA_rowmajorB.cpp; SYCL :: ESIMD/mask_expand_load.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_OOB.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_out_bounds.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_prefetch.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_rowmajorA_rowmajorB.cpp;Matrix/element_wise_all_ops_1d.cpp;Matrix/element_wise_all_ops_1d_cont.cpp;Matrix/element_wise_all_ops_scalar.cpp;Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp;Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp;Matrix/joint_matrix_out_bounds.cpp;Matrix/joint_matrix_unaligned_k.cpp" + xfail: "DeviceCodeSplit/grf.cpp;ESIMD/mask_expand_load.cpp;KernelAndProgram/target_register_alloc_mode.cpp;Matrix/SG32/get_coord_int8_matB.cpp;Matrix/get_coord_int8_matB.cpp;Matrix/joint_matrix_prefetch.cpp;Matrix/joint_matrix_rowmajorA_rowmajorB.cpp;ESIMD/mask_expand_load.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_OOB.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_out_bounds.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_prefetch.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_rowmajorA_rowmajorB.cpp;Matrix/element_wise_all_ops_1d.cpp;Matrix/element_wise_all_ops_1d_cont.cpp;Matrix/element_wise_all_ops_scalar.cpp;Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp;Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp;Matrix/joint_matrix_out_bounds.cpp;Matrix/joint_matrix_unaligned_k.cpp;Matrix/SPVCooperativeMatrix/SG32/get_coord_int8_matB.cpp;Matrix/SPVCooperativeMatrix/element_wise_all_ops_1d.cpp;Matrix/SPVCooperativeMatrix/element_wise_all_ops_1d_cont.cpp;Matrix/SPVCooperativeMatrix/element_wise_all_ops_scalar.cpp;Matrix/SPVCooperativeMatrix/element_wise_ops.cpp;Matrix/SPVCooperativeMatrix/get_coord_int8_matB.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_SLM.cpp;Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp" + # Unexpectedly Passed Tests + xfail_not: "" # Flaky tests - filter_out: "ESIMD/named_barriers/loop_extended.cpp;ESIMD/local_accessor_copy_to_from.cpp;" + filter_out: "Basic/accessor/accessor.cpp|DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp|Graph/Explicit/interop-level-zero-launch-kernel.cpp|Graph/RecordReplay/interop-level-zero-launch-kernel.cpp|syclcompat/launch/launch_policy_lmem.cpp" # These runners by default spawn upwards of 260 workers. # We also add a time out just in case some test hangs extra_lit_flags: "--param gpu-intel-pvc=True --param gpu-intel-pvc-1T=True -sv -j 100 --max-time=3600" diff --git a/.github/workflows/e2e_opencl.yml b/.github/workflows/e2e_opencl.yml index fa7984bb3f..e4714b2434 100644 --- a/.github/workflows/e2e_opencl.yml +++ b/.github/workflows/e2e_opencl.yml @@ -20,5 +20,5 @@ jobs: prefix: "" config: "" unit: "cpu" - xfail: "AOT/double.cpp;AOT/half.cpp;AOT/reqd-sg-size.cpp;Basic/built-ins/marray_geometric.cpp;KernelCompiler/kernel_compiler_spirv.cpp;KernelCompiler/opencl_queries.cpp" + xfail: "AOT/double.cpp;AOT/half.cpp;AOT/reqd-sg-size.cpp;Basic/built-ins/marray_geometric.cpp;KernelCompiler/kernel_compiler_spirv.cpp;KernelCompiler/opencl_queries.cpp;NonUniformGroups/ballot_group.cpp;NonUniformGroups/ballot_group_algorithms.cpp;NonUniformGroups/fixed_size_group_algorithms.cpp;NonUniformGroups/opportunistic_group.cpp;NonUniformGroups/opportunistic_group_algorithms.cpp;NonUniformGroups/tangle_group.cpp;NonUniformGroups/tangle_group_algorithms.cpp" extra_lit_flags: "-sv --max-time=3600" diff --git a/CMakeLists.txt b/CMakeLists.txt index f54cbd1067..a908a22d80 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,7 @@ option(UR_BUILD_ADAPTER_HIP "Build the HIP adapter" OFF) option(UR_BUILD_ADAPTER_NATIVE_CPU "Build the Native-CPU adapter" OFF) option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF) option(UR_BUILD_ADAPTER_L0_V2 "Build the (experimental) Level-Zero v2 adapter" OFF) +option(UR_STATIC_ADAPTER_L0 "Build the Level-Zero adapter as static and embed in the loader" OFF) option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF) option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF) option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF) diff --git a/README.md b/README.md index 7ba72b43d3..29279a0059 100644 --- a/README.md +++ b/README.md @@ -145,7 +145,7 @@ List of options provided by CMake: | UR_DEVICE_CODE_EXTRACTOR | Path of the `clang-offload-extract` executable from the DPC++ package, required for CTS device binaries | File path | `"${dirname(UR_DPCXX)}/clang-offload-extract"` | | UR_DPCXX_BUILD_FLAGS | Build flags to pass to DPC++ when compiling device programs | Space-separated options list | `""` | | UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` | -| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` | +| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `$ENV{ROCM_PATH}` or `/opt/rocm` | | UR_HIP_INCLUDE_DIR | Path of the ROCm HIP include directory | Directory path | `${UR_HIP_ROCM_DIR}/include` | | UR_HIP_HSA_INCLUDE_DIRS | Path of the ROCm HSA include directory | Directory path | `${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include` | | UR_HIP_LIB_DIR | Path of the ROCm HIP library directory | Directory path | `${UR_HIP_ROCM_DIR}/lib` | diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index 24cb6f8e54..6a5700da8b 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -70,6 +70,7 @@ function(add_ur_target_compile_options name) ) if (CMAKE_BUILD_TYPE STREQUAL "Release") target_compile_definitions(${name} PRIVATE -D_FORTIFY_SOURCE=2) + target_compile_options(${name} PRIVATE -fvisibility=hidden) endif() if(UR_DEVELOPER_MODE) target_compile_options(${name} PRIVATE diff --git a/examples/collector/CMakeLists.txt b/examples/collector/CMakeLists.txt index 5fe484d0b8..6dd112aae0 100644 --- a/examples/collector/CMakeLists.txt +++ b/examples/collector/CMakeLists.txt @@ -17,6 +17,6 @@ target_link_libraries(${TARGET_NAME} PRIVATE ${TARGET_XPTI}) target_include_directories(${TARGET_NAME} PRIVATE ${xpti_SOURCE_DIR}/include) if(MSVC) - target_compile_definitions(${TARGET_NAME} PRIVATE - XPTI_STATIC_LIBRARY XPTI_CALLBACK_API_EXPORTS) + target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_STATIC_LIBRARY) endif() +target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_CALLBACK_API_EXPORTS) diff --git a/include/ur_api.h b/include/ur_api.h index 082890e73d..e75793f3d2 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -332,9 +332,17 @@ typedef enum ur_structure_type_t { #if defined(_WIN32) /// @brief Microsoft-specific dllexport storage-class attribute #define UR_APIEXPORT __declspec(dllexport) +#endif // defined(_WIN32) +#endif // UR_APIEXPORT + +/////////////////////////////////////////////////////////////////////////////// +#ifndef UR_APIEXPORT +#if __GNUC__ >= 4 +/// @brief GCC-specific dllexport storage-class attribute +#define UR_APIEXPORT __attribute__((visibility("default"))) #else #define UR_APIEXPORT -#endif // defined(_WIN32) +#endif // __GNUC__ >= 4 #endif // UR_APIEXPORT /////////////////////////////////////////////////////////////////////////////// @@ -1569,8 +1577,7 @@ typedef enum ur_device_info_t { ///< ::urDevicePartition UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS = 80, ///< [uint32_t] max number of sub groups UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 81, ///< [::ur_bool_t] support sub group independent forward progress - UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = 82, ///< [uint32_t[]] return an array of sub group sizes supported on Intel - ///< device + UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = 82, ///< [uint32_t[]] return an array of supported sub group sizes UR_DEVICE_INFO_USM_HOST_SUPPORT = 83, ///< [::ur_device_usm_access_capability_flags_t] support USM host memory ///< access UR_DEVICE_INFO_USM_DEVICE_SUPPORT = 84, ///< [::ur_device_usm_access_capability_flags_t] support USM device memory diff --git a/include/ur_api_funcs.def b/include/ur_api_funcs.def new file mode 100644 index 0000000000..5cd8dd7926 --- /dev/null +++ b/include/ur_api_funcs.def @@ -0,0 +1,212 @@ + +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file ur_api_funcs.def + * @version v0.11-r0 + * + */ + + // Auto-generated file, do not edit. + +_UR_API(urPlatformGet) +_UR_API(urPlatformGetInfo) +_UR_API(urPlatformGetNativeHandle) +_UR_API(urPlatformCreateWithNativeHandle) +_UR_API(urPlatformGetApiVersion) +_UR_API(urPlatformGetBackendOption) +_UR_API(urContextCreate) +_UR_API(urContextRetain) +_UR_API(urContextRelease) +_UR_API(urContextGetInfo) +_UR_API(urContextGetNativeHandle) +_UR_API(urContextCreateWithNativeHandle) +_UR_API(urContextSetExtendedDeleter) +_UR_API(urEventGetInfo) +_UR_API(urEventGetProfilingInfo) +_UR_API(urEventWait) +_UR_API(urEventRetain) +_UR_API(urEventRelease) +_UR_API(urEventGetNativeHandle) +_UR_API(urEventCreateWithNativeHandle) +_UR_API(urEventSetCallback) +_UR_API(urProgramCreateWithIL) +_UR_API(urProgramCreateWithBinary) +_UR_API(urProgramBuild) +_UR_API(urProgramCompile) +_UR_API(urProgramLink) +_UR_API(urProgramRetain) +_UR_API(urProgramRelease) +_UR_API(urProgramGetFunctionPointer) +_UR_API(urProgramGetGlobalVariablePointer) +_UR_API(urProgramGetInfo) +_UR_API(urProgramGetBuildInfo) +_UR_API(urProgramSetSpecializationConstants) +_UR_API(urProgramGetNativeHandle) +_UR_API(urProgramCreateWithNativeHandle) +_UR_API(urProgramBuildExp) +_UR_API(urProgramCompileExp) +_UR_API(urProgramLinkExp) +_UR_API(urKernelCreate) +_UR_API(urKernelGetInfo) +_UR_API(urKernelGetGroupInfo) +_UR_API(urKernelGetSubGroupInfo) +_UR_API(urKernelRetain) +_UR_API(urKernelRelease) +_UR_API(urKernelGetNativeHandle) +_UR_API(urKernelCreateWithNativeHandle) +_UR_API(urKernelGetSuggestedLocalWorkSize) +_UR_API(urKernelSetArgValue) +_UR_API(urKernelSetArgLocal) +_UR_API(urKernelSetArgPointer) +_UR_API(urKernelSetExecInfo) +_UR_API(urKernelSetArgSampler) +_UR_API(urKernelSetArgMemObj) +_UR_API(urKernelSetSpecializationConstants) +_UR_API(urKernelSuggestMaxCooperativeGroupCountExp) +_UR_API(urQueueGetInfo) +_UR_API(urQueueCreate) +_UR_API(urQueueRetain) +_UR_API(urQueueRelease) +_UR_API(urQueueGetNativeHandle) +_UR_API(urQueueCreateWithNativeHandle) +_UR_API(urQueueFinish) +_UR_API(urQueueFlush) +_UR_API(urSamplerCreate) +_UR_API(urSamplerRetain) +_UR_API(urSamplerRelease) +_UR_API(urSamplerGetInfo) +_UR_API(urSamplerGetNativeHandle) +_UR_API(urSamplerCreateWithNativeHandle) +_UR_API(urMemImageCreate) +_UR_API(urMemBufferCreate) +_UR_API(urMemRetain) +_UR_API(urMemRelease) +_UR_API(urMemBufferPartition) +_UR_API(urMemGetNativeHandle) +_UR_API(urMemBufferCreateWithNativeHandle) +_UR_API(urMemImageCreateWithNativeHandle) +_UR_API(urMemGetInfo) +_UR_API(urMemImageGetInfo) +_UR_API(urPhysicalMemCreate) +_UR_API(urPhysicalMemRetain) +_UR_API(urPhysicalMemRelease) +_UR_API(urAdapterGet) +_UR_API(urAdapterRelease) +_UR_API(urAdapterRetain) +_UR_API(urAdapterGetLastError) +_UR_API(urAdapterGetInfo) +_UR_API(urEnqueueKernelLaunch) +_UR_API(urEnqueueEventsWait) +_UR_API(urEnqueueEventsWaitWithBarrier) +_UR_API(urEnqueueMemBufferRead) +_UR_API(urEnqueueMemBufferWrite) +_UR_API(urEnqueueMemBufferReadRect) +_UR_API(urEnqueueMemBufferWriteRect) +_UR_API(urEnqueueMemBufferCopy) +_UR_API(urEnqueueMemBufferCopyRect) +_UR_API(urEnqueueMemBufferFill) +_UR_API(urEnqueueMemImageRead) +_UR_API(urEnqueueMemImageWrite) +_UR_API(urEnqueueMemImageCopy) +_UR_API(urEnqueueMemBufferMap) +_UR_API(urEnqueueMemUnmap) +_UR_API(urEnqueueUSMFill) +_UR_API(urEnqueueUSMMemcpy) +_UR_API(urEnqueueUSMPrefetch) +_UR_API(urEnqueueUSMAdvise) +_UR_API(urEnqueueUSMFill2D) +_UR_API(urEnqueueUSMMemcpy2D) +_UR_API(urEnqueueDeviceGlobalVariableWrite) +_UR_API(urEnqueueDeviceGlobalVariableRead) +_UR_API(urEnqueueReadHostPipe) +_UR_API(urEnqueueWriteHostPipe) +_UR_API(urEnqueueKernelLaunchCustomExp) +_UR_API(urEnqueueCooperativeKernelLaunchExp) +_UR_API(urEnqueueTimestampRecordingExp) +_UR_API(urEnqueueNativeCommandExp) +_UR_API(urBindlessImagesUnsampledImageHandleDestroyExp) +_UR_API(urBindlessImagesSampledImageHandleDestroyExp) +_UR_API(urBindlessImagesImageAllocateExp) +_UR_API(urBindlessImagesImageFreeExp) +_UR_API(urBindlessImagesUnsampledImageCreateExp) +_UR_API(urBindlessImagesSampledImageCreateExp) +_UR_API(urBindlessImagesImageCopyExp) +_UR_API(urBindlessImagesImageGetInfoExp) +_UR_API(urBindlessImagesMipmapGetLevelExp) +_UR_API(urBindlessImagesMipmapFreeExp) +_UR_API(urBindlessImagesImportExternalMemoryExp) +_UR_API(urBindlessImagesMapExternalArrayExp) +_UR_API(urBindlessImagesMapExternalLinearMemoryExp) +_UR_API(urBindlessImagesReleaseExternalMemoryExp) +_UR_API(urBindlessImagesImportExternalSemaphoreExp) +_UR_API(urBindlessImagesReleaseExternalSemaphoreExp) +_UR_API(urBindlessImagesWaitExternalSemaphoreExp) +_UR_API(urBindlessImagesSignalExternalSemaphoreExp) +_UR_API(urUSMHostAlloc) +_UR_API(urUSMDeviceAlloc) +_UR_API(urUSMSharedAlloc) +_UR_API(urUSMFree) +_UR_API(urUSMGetMemAllocInfo) +_UR_API(urUSMPoolCreate) +_UR_API(urUSMPoolRetain) +_UR_API(urUSMPoolRelease) +_UR_API(urUSMPoolGetInfo) +_UR_API(urUSMPitchedAllocExp) +_UR_API(urUSMImportExp) +_UR_API(urUSMReleaseExp) +_UR_API(urCommandBufferCreateExp) +_UR_API(urCommandBufferRetainExp) +_UR_API(urCommandBufferReleaseExp) +_UR_API(urCommandBufferFinalizeExp) +_UR_API(urCommandBufferAppendKernelLaunchExp) +_UR_API(urCommandBufferAppendUSMMemcpyExp) +_UR_API(urCommandBufferAppendUSMFillExp) +_UR_API(urCommandBufferAppendMemBufferCopyExp) +_UR_API(urCommandBufferAppendMemBufferWriteExp) +_UR_API(urCommandBufferAppendMemBufferReadExp) +_UR_API(urCommandBufferAppendMemBufferCopyRectExp) +_UR_API(urCommandBufferAppendMemBufferWriteRectExp) +_UR_API(urCommandBufferAppendMemBufferReadRectExp) +_UR_API(urCommandBufferAppendMemBufferFillExp) +_UR_API(urCommandBufferAppendUSMPrefetchExp) +_UR_API(urCommandBufferAppendUSMAdviseExp) +_UR_API(urCommandBufferEnqueueExp) +_UR_API(urCommandBufferRetainCommandExp) +_UR_API(urCommandBufferReleaseCommandExp) +_UR_API(urCommandBufferUpdateKernelLaunchExp) +_UR_API(urCommandBufferGetInfoExp) +_UR_API(urCommandBufferCommandGetInfoExp) +_UR_API(urUsmP2PEnablePeerAccessExp) +_UR_API(urUsmP2PDisablePeerAccessExp) +_UR_API(urUsmP2PPeerAccessGetInfoExp) +_UR_API(urVirtualMemGranularityGetInfo) +_UR_API(urVirtualMemReserve) +_UR_API(urVirtualMemFree) +_UR_API(urVirtualMemMap) +_UR_API(urVirtualMemUnmap) +_UR_API(urVirtualMemSetAccess) +_UR_API(urVirtualMemGetInfo) +_UR_API(urDeviceGet) +_UR_API(urDeviceGetInfo) +_UR_API(urDeviceRetain) +_UR_API(urDeviceRelease) +_UR_API(urDevicePartition) +_UR_API(urDeviceSelectBinary) +_UR_API(urDeviceGetNativeHandle) +_UR_API(urDeviceCreateWithNativeHandle) +_UR_API(urDeviceGetGlobalTimestamps) +_UR_API(urLoaderConfigCreate) +_UR_API(urLoaderConfigEnableLayer) +_UR_API(urLoaderConfigGetInfo) +_UR_API(urLoaderConfigRelease) +_UR_API(urLoaderConfigRetain) +_UR_API(urLoaderConfigSetCodeLocationCallback) +_UR_API(urLoaderConfigSetMockingEnabled) +_UR_API(urLoaderInit) +_UR_API(urLoaderTearDown) diff --git a/include/ur_print.hpp b/include/ur_print.hpp index f71cc12b32..681e8e814d 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -17403,6 +17403,11 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const ur_bool_t value) { + os << (value ? "true" : "false"); + return os; +} + namespace ur::details { /////////////////////////////////////////////////////////////////////////////// // @brief Print pointer value diff --git a/scripts/benchmarks/benches/base.py b/scripts/benchmarks/benches/base.py index c7f263c253..e4377c8b65 100644 --- a/scripts/benchmarks/benches/base.py +++ b/scripts/benchmarks/benches/base.py @@ -16,9 +16,12 @@ class Benchmark: def __init__(self, directory): self.directory = directory + self.adapter_path = os.path.join(options.ur_dir, 'build', 'lib', f"libur_adapter_{options.ur_adapter_name}.so") def run_bench(self, command, env_vars): - return run(command=command, env_vars=env_vars, add_sycl=True, cwd=options.benchmark_cwd).stdout.decode() + env_vars_with_forced_adapter = env_vars.copy() + env_vars_with_forced_adapter.update({'UR_ADAPTERS_FORCE_LOAD': self.adapter_path}) + return run(command=command, env_vars=env_vars_with_forced_adapter, add_sycl=True, cwd=options.benchmark_cwd).stdout.decode() def create_data_path(self, name): data_path = os.path.join(self.directory, "data", name) diff --git a/scripts/benchmarks/benches/compute.py b/scripts/benchmarks/benches/compute.py index 672875f2dd..cf164721a6 100644 --- a/scripts/benchmarks/benches/compute.py +++ b/scripts/benchmarks/benches/compute.py @@ -15,7 +15,6 @@ class ComputeBench: def __init__(self, directory): self.directory = directory self.built = False - self.adapter_short_name = {'level_zero' : 'L0', "level_zero_v2" : 'L0_V2'} return def setup(self): @@ -35,11 +34,9 @@ def setup(self): f"-DALLOW_WARNINGS=ON", f"-DBUILD_UR=ON", f"-DUR_BUILD_TESTS=OFF", - f"-DUR_BUILD_ADAPTER_L0=ON", f"-DUR_BUILD_TESTS=OFF", f"-DUMF_DISABLE_HWLOC=ON", f"-DBENCHMARK_UR_SOURCE_DIR={options.ur_dir}", - f"-DUR_BUILD_ADAPTER_{self.adapter_short_name[options.ur_adapter_name]}=ON" ] run(configure_command, add_sycl=True) @@ -47,7 +44,6 @@ def setup(self): self.built = True self.bins = os.path.join(build_path, 'bin') - self.libs = os.path.join(build_path, 'lib') class ComputeBenchmark(Benchmark): def __init__(self, bench, name, test): @@ -82,7 +78,7 @@ def run(self, env_vars) -> Result: result = self.run_bench(command, env_vars) (label, mean) = self.parse_output(result) - return Result(label=label, value=mean, command=command, env=env_vars, stdout=result) + return Result(label=label, value=mean, command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better()) def parse_output(self, output): csv_file = io.StringIO(output) @@ -130,9 +126,6 @@ def name(self): order = "in order" if self.ioq else "out of order" return f"api_overhead_benchmark_ur SubmitKernel {order}" - def extra_env_vars(self) -> dict: - return {"UR_ADAPTERS_FORCE_LOAD" : os.path.join(self.bench.libs, f"libur_adapter_{options.ur_adapter_name}.so")} - def bin_args(self) -> list[str]: return [ f"--Ioq={self.ioq}", diff --git a/scripts/benchmarks/benches/velocity.py b/scripts/benchmarks/benches/velocity.py index e5601c6563..06d2222ac4 100644 --- a/scripts/benchmarks/benches/velocity.py +++ b/scripts/benchmarks/benches/velocity.py @@ -61,7 +61,7 @@ def run(self, env_vars) -> Result: result = self.run_bench(command, env_vars) - return Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result) + return Result(label=self.bench_name, value=self.parse_output(result), command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better()) def teardown(self): return diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py index a9850dfc67..d2b7ef8cd0 100755 --- a/scripts/benchmarks/main.py +++ b/scripts/benchmarks/main.py @@ -52,34 +52,46 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): benchmarks = [benchmark for benchmark in benchmarks if filter.search(benchmark.name())] for benchmark in benchmarks: - print(f"setting up {benchmark.name()}... ", end='', flush=True) - benchmark.setup() - print("complete.") + try: + print(f"setting up {benchmark.name()}... ", end='', flush=True) + benchmark.setup() + print("complete.") + except Exception as e: + if options.exit_on_failure: + raise e + else: + print(f"failed: {e}") results = [] for benchmark in benchmarks: - merged_env_vars = {**additional_env_vars} - iteration_results = [] - for iter in range(options.iterations): - print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True) - bench_results = benchmark.run(merged_env_vars) - if bench_results is not None: - print(f"complete ({bench_results.value} {benchmark.unit()}).") - iteration_results.append(bench_results) + try: + merged_env_vars = {**additional_env_vars} + iteration_results = [] + for iter in range(options.iterations): + print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True) + bench_results = benchmark.run(merged_env_vars) + if bench_results is not None: + print(f"complete ({bench_results.value} {benchmark.unit()}).") + iteration_results.append(bench_results) + else: + print(f"did not finish.") + + if len(iteration_results) == 0: + continue + + iteration_results.sort(key=lambda res: res.value) + median_index = len(iteration_results) // 2 + median_result = iteration_results[median_index] + + median_result.unit = benchmark.unit() + median_result.name = benchmark.name() + + results.append(median_result) + except Exception as e: + if options.exit_on_failure: + raise e else: - print(f"did not finish.") - - if len(iteration_results) == 0: - continue - - iteration_results.sort(key=lambda res: res.value) - median_index = len(iteration_results) // 2 - median_result = iteration_results[median_index] - - median_result.unit = benchmark.unit() - median_result.name = benchmark.name() - - results.append(median_result) + print(f"failed: {e}") for benchmark in benchmarks: print(f"tearing down {benchmark.name()}... ", end='', flush=True) @@ -126,6 +138,7 @@ def validate_and_parse_env_args(env_args): parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=600) parser.add_argument("--filter", type=str, help='Regex pattern to filter benchmarks by name.', default=None) parser.add_argument("--verbose", help='Print output of all the commands.', action="store_true") + parser.add_argument("--exit_on_failure", help='Exit on first failure.', action="store_true") args = parser.parse_args() additional_env_vars = validate_and_parse_env_args(args.env) @@ -137,6 +150,7 @@ def validate_and_parse_env_args(env_args): options.timeout = args.timeout options.ur_dir = args.ur_dir options.ur_adapter_name = args.ur_adapter_name + options.exit_on_failure = args.exit_on_failure benchmark_filter = re.compile(args.filter) if args.filter else None diff --git a/scripts/benchmarks/output.py b/scripts/benchmarks/output.py index 26deabe099..1a61f9909c 100644 --- a/scripts/benchmarks/output.py +++ b/scripts/benchmarks/output.py @@ -116,7 +116,7 @@ def generate_summary_table(chart_data: dict[str, list[Result]]): if key in results: value = results[key].value if key == best_key: - row += f" `**{value}**` |" # Highlight the best value + row += f" {value} |" # Highlight the best value else: row += f" {value} |" else: @@ -132,6 +132,7 @@ def generate_markdown(chart_data: dict[str, list[Result]]): return f""" # Summary +result is better\n {summary_table} # Charts {mermaid_script} diff --git a/scripts/core/INTRO.rst b/scripts/core/INTRO.rst index 448e3569e2..898d4ce5f3 100644 --- a/scripts/core/INTRO.rst +++ b/scripts/core/INTRO.rst @@ -396,6 +396,14 @@ Specific environment variables can be set to control the behavior of unified run See the Layers_ section for details of the layers currently included in the runtime. +.. envvar:: UR_LOADER_PRELOAD_FILTER + + If set, the loader will read `ONEAPI_DEVICE_SELECTOR` before loading the UR Adapters to determine which backends should be loaded. + + .. note:: + + This environment variable is default enabled on Linux, but default disabled on Windows. + Service identifiers --------------------- diff --git a/scripts/core/common.yml b/scripts/core/common.yml index d06333eb07..5df4a7c04e 100644 --- a/scripts/core/common.yml +++ b/scripts/core/common.yml @@ -39,6 +39,12 @@ desc: "Microsoft-specific dllexport storage-class attribute" condition: "defined(_WIN32)" name: $X_APIEXPORT value: __declspec(dllexport) +--- #-------------------------------------------------------------------------- +type: macro +desc: "GCC-specific dllexport storage-class attribute" +condition: "__GNUC__ >= 4" +name: $X_APIEXPORT +value: __attribute__ ((visibility ("default"))) altvalue: "" --- #-------------------------------------------------------------------------- type: macro diff --git a/scripts/core/device.yml b/scripts/core/device.yml index 23c0233ef7..c063466b22 100644 --- a/scripts/core/device.yml +++ b/scripts/core/device.yml @@ -365,7 +365,7 @@ etors: - name: SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS desc: "[$x_bool_t] support sub group independent forward progress" - name: SUB_GROUP_SIZES_INTEL - desc: "[uint32_t[]] return an array of sub group sizes supported on Intel device" + desc: "[uint32_t[]] return an array of supported sub group sizes" - name: USM_HOST_SUPPORT desc: "[$x_device_usm_access_capability_flags_t] support USM host memory access" - name: USM_DEVICE_SUPPORT diff --git a/scripts/generate_code.py b/scripts/generate_code.py index 64386bb5e3..0c7476ab42 100644 --- a/scripts/generate_code.py +++ b/scripts/generate_code.py @@ -108,6 +108,26 @@ def _mako_print_cpp(path, namespace, tags, version, specs, meta): specs=specs, meta=meta) + +def _mako_api_funcs(path, namespace, tags, version, revision, specs, meta): + template = "api_funcs.def.mako" + fin = os.path.join(templates_dir, template) + + name = "%s_api_funcs"%(namespace) + filename = "%s.def"%(name) + fout = os.path.join(path, filename) + + print("Generating %s..."%fout) + return util.makoWrite( + fin, fout, + name=name, + ver=version, + rev=revision, + namespace=namespace, + tags=tags, + specs=specs, + meta=meta) + """ generates c/c++ files from the specification documents """ @@ -116,6 +136,7 @@ def _generate_api_cpp(incpath, srcpath, namespace, tags, version, revision, spec loc += _mako_api_cpp(srcpath, namespace, tags, version, revision, specs, meta) loc += _mako_ddi_h(incpath, namespace, tags, version, revision, specs, meta) loc += _mako_print_hpp(incpath, namespace, tags, version, revision, specs, meta) + loc += _mako_api_funcs(incpath, namespace, tags, version, revision, specs, meta) return loc @@ -379,6 +400,32 @@ def generate_loader(path, section, namespace, tags, version, specs, meta): ) print("Generated %s lines of code.\n"%loc) +""" + generates c/c++ files from the specification documents +""" +def _mako_interface_loader_api(path, adapter, ext, namespace, tags, version, specs, meta): + dstpath = os.path.join(path, adapter) + os.makedirs(dstpath, exist_ok=True) + + template = f"ur_interface_loader.{ext}.mako" + fin = os.path.join(templates_dir, template) + + name = f"ur_interface_loader" + + filename = f"{name}.{ext}" + fout = os.path.join(dstpath, filename) + + print("Generating %s..."%fout) + return util.makoWrite( + fin, fout, + name=name, + adapter=adapter, + ver=version, + namespace=namespace, + tags=tags, + specs=specs, + meta=meta,) + """ Entry-point: generates adapter for unified_runtime @@ -395,6 +442,10 @@ def generate_adapters(path, section, namespace, tags, version, specs, meta): loc += _mako_linker_scripts( dstpath, "adapter", "def", namespace, tags, version, specs, meta ) + + loc += _mako_interface_loader_api(dstpath, "level_zero", "cpp", namespace, tags, version, specs, meta) + loc += _mako_interface_loader_api(dstpath, "level_zero", "hpp", namespace, tags, version, specs, meta) + print("Generated %s lines of code.\n"%loc) """ diff --git a/scripts/templates/api_funcs.def.mako b/scripts/templates/api_funcs.def.mako new file mode 100644 index 0000000000..f0fb653208 --- /dev/null +++ b/scripts/templates/api_funcs.def.mako @@ -0,0 +1,35 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + + x=tags['$x'] + X=x.upper() +%> +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file ${name}.def + * @version v${ver}-r${rev} + * + */ + + // Auto-generated file, do not edit. + +%for tbl in th.get_pfntables(specs, meta, n, tags): +%for obj in tbl['functions']: +_UR_API(${th.make_func_name(n, tags, obj)}) +%endfor +%endfor +%for obj in th.get_loader_functions(specs, meta, n, tags): +%if n + "Loader" in obj: +_UR_API(${obj}) +%endif +%endfor diff --git a/scripts/templates/ldrddi.cpp.mako b/scripts/templates/ldrddi.cpp.mako index 44631cc360..9c797a0ec3 100644 --- a/scripts/templates/ldrddi.cpp.mako +++ b/scripts/templates/ldrddi.cpp.mako @@ -365,6 +365,10 @@ ${tbl['export']['name']}( // Load the device-platform DDI tables for( auto& platform : ur_loader::getContext()->platforms ) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) + continue; + if(platform.initStatus != ${X}_RESULT_SUCCESS) continue; auto getTable = reinterpret_cast<${tbl['pfn']}>( diff --git a/scripts/templates/print.hpp.mako b/scripts/templates/print.hpp.mako index 9bf427b889..4180231ea4 100644 --- a/scripts/templates/print.hpp.mako +++ b/scripts/templates/print.hpp.mako @@ -411,6 +411,11 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct %endfor %endfor +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const ur_bool_t value) { + os << (value ? "true" : "false"); + return os; +} + namespace ${x}::details { /////////////////////////////////////////////////////////////////////////////// // @brief Print pointer value diff --git a/scripts/templates/queue_api.cpp.mako b/scripts/templates/queue_api.cpp.mako index f941c7ba03..fcfa89d258 100644 --- a/scripts/templates/queue_api.cpp.mako +++ b/scripts/templates/queue_api.cpp.mako @@ -24,8 +24,9 @@ from templates import helper as th ur_queue_handle_t_::~ur_queue_handle_t_() {} ## FUNCTION ################################################################### +namespace ${x}::level_zero { %for obj in th.get_queue_related_functions(specs, n, tags): -${X}_APIEXPORT ${x}_result_t ${X}_APICALL +${x}_result_t ${th.make_func_name(n, tags, obj)}( %for line in th.make_param_lines(n, tags, obj, format=["name", "type", "delim"]): ${line} @@ -35,3 +36,4 @@ ${th.make_func_name(n, tags, obj)}( return ${obj['params'][0]['name']}->${th.transform_queue_related_function_name(n, tags, obj, format=["name"])}; } %endfor +} \ No newline at end of file diff --git a/scripts/templates/ur_interface_loader.cpp.mako b/scripts/templates/ur_interface_loader.cpp.mako new file mode 100644 index 0000000000..3298b5bcae --- /dev/null +++ b/scripts/templates/ur_interface_loader.cpp.mako @@ -0,0 +1,88 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + + x=tags['$x'] + X=x.upper() + Adapter=adapter.upper() +%>//===--------- ${n}_interface_loader.cpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include <${n}_api.h> +#include <${n}_ddi.h> + +#include "ur_interface_loader.hpp" + +static ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + // Pre 1.0 we enforce loader and adapter must have same version. + // Post 1.0 only major version match should be required. + if (version != UR_API_VERSION_CURRENT) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + return UR_RESULT_SUCCESS; +} + +#ifdef UR_STATIC_ADAPTER_${Adapter} +namespace ${n}::${adapter} { +#elif defined(__cplusplus) +extern "C" { +#endif + +%for tbl in th.get_pfntables(specs, meta, n, tags): +${X}_APIEXPORT ${x}_result_t ${X}_APICALL ${tbl['export']['name']}( + %for line in th.make_param_lines(n, tags, tbl['export'], format=["type", "name", "delim"]): + ${line} + %endfor + ) +{ + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + %for obj in tbl['functions']: + pDdiTable->${th.append_ws(th.make_pfn_name(n, tags, obj), 43)} = ${n}::${adapter}::${th.make_func_name(n, tags, obj)}; + %endfor + + return result; +} + +%endfor + +#ifdef UR_STATIC_ADAPTER_${Adapter} +} // namespace ur::${adapter} +#elif defined(__cplusplus) +} // extern "C" +#endif + +#ifdef UR_STATIC_ADAPTER_${Adapter} +namespace ur::${adapter} { +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { + if (ddi == nullptr) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + ur_result_t result; + +%for tbl in th.get_pfntables(specs, meta, n, tags): + result = ${n}::${adapter}::${tbl['export']['name']}( ${X}_API_VERSION_CURRENT, &ddi->${tbl['name']} ); + if (result != UR_RESULT_SUCCESS) + return result; +%endfor + + return result; +} +} +#endif diff --git a/scripts/templates/ur_interface_loader.hpp.mako b/scripts/templates/ur_interface_loader.hpp.mako new file mode 100644 index 0000000000..e2902f93c8 --- /dev/null +++ b/scripts/templates/ur_interface_loader.hpp.mako @@ -0,0 +1,38 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + + x=tags['$x'] + X=x.upper() + Adapter=adapter.upper() +%>//===--------- ${n}_interface_loader.hpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include <${n}_api.h> +#include <${n}_ddi.h> + +namespace ${n}::${adapter} { +%for s in specs: +%for obj in th.filter_items(s['objects'], 'type', 'function'): +%if not th.obj_traits.is_loader_only(obj): +${x}_result_t ${th.make_func_name(n, tags, obj)}( + %for line in th.make_param_lines(n, tags, obj, format=["type", "name", "delim"]): + ${line} + %endfor + ); +%endif +%endfor +%endfor +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); +#endif +} diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index bbaaa27cdb..9c8a0c807c 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -57,12 +57,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(4318u); } case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: { - int ComputeUnits = 0; - UR_CHECK_ERROR(cuDeviceGetAttribute( - &ComputeUnits, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - hDevice->get())); - detail::ur::assertion(ComputeUnits >= 0); - return ReturnValue(static_cast(ComputeUnits)); + return ReturnValue(hDevice->getNumComputeUnits()); } case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: { return ReturnValue(MaxWorkItemDimensions); diff --git a/source/adapters/cuda/device.hpp b/source/adapters/cuda/device.hpp index 0a40329026..3654f2bb36 100644 --- a/source/adapters/cuda/device.hpp +++ b/source/adapters/cuda/device.hpp @@ -32,6 +32,7 @@ struct ur_device_handle_t_ { int MaxCapacityLocalMem{0}; int MaxChosenLocalMem{0}; bool MaxLocalMemSizeChosen{false}; + uint32_t NumComputeUnits{0}; public: ur_device_handle_t_(native_type cuDevice, CUcontext cuContext, CUevent evBase, @@ -54,6 +55,10 @@ struct ur_device_handle_t_ { sizeof(MaxWorkGroupSize), &MaxWorkGroupSize, nullptr)); + UR_CHECK_ERROR(cuDeviceGetAttribute( + reinterpret_cast(&NumComputeUnits), + CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, cuDevice)); + // Set local mem max size if env var is present static const char *LocalMemSizePtrUR = std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE"); @@ -107,6 +112,8 @@ struct ur_device_handle_t_ { int getMaxChosenLocalMem() const noexcept { return MaxChosenLocalMem; }; bool maxLocalMemSizeChosen() { return MaxLocalMemSizeChosen; }; + + uint32_t getNumComputeUnits() const noexcept { return NumComputeUnits; }; }; int getAttribute(ur_device_handle_t Device, CUdevice_attribute Attribute); diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp index e2960573aa..427fde70e6 100644 --- a/source/adapters/cuda/image.cpp +++ b/source/adapters/cuda/image.cpp @@ -759,13 +759,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.srcHost = pSrc; - cpy_desc.srcPitch = pCopyRegion->copyExtent.width * PixelSizeBytes; - cpy_desc.srcHeight = pCopyRegion->copyExtent.height; + cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes; + cpy_desc.srcHeight = std::max(uint64_t{1}, pSrcImageDesc->height); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height); - cpy_desc.Depth = pDstImageDesc->arraySize; + cpy_desc.Depth = pCopyRegion->copyExtent.depth; UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream)); } } else if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { @@ -855,10 +855,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.dstHost = pDst; cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; - cpy_desc.dstHeight = pDstImageDesc->height; + cpy_desc.dstHeight = std::max(uint64_t{1}, pDstImageDesc->height); cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height); - cpy_desc.Depth = pSrcImageDesc->arraySize; + cpy_desc.Depth = pCopyRegion->copyExtent.depth; UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream)); } } else { @@ -932,7 +932,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstArray = (CUarray)pDst; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height); - cpy_desc.Depth = pSrcImageDesc->arraySize; + cpy_desc.Depth = pCopyRegion->copyExtent.depth; UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream)); } // Synchronization is required here to handle the case of copying data diff --git a/source/adapters/cuda/kernel.cpp b/source/adapters/cuda/kernel.cpp index d43bd046dc..2061893744 100644 --- a/source/adapters/cuda/kernel.cpp +++ b/source/adapters/cuda/kernel.cpp @@ -167,10 +167,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( ur_kernel_handle_t hKernel, size_t localWorkSize, size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) { - (void)hKernel; - (void)localWorkSize; - (void)dynamicSharedMemorySize; - *pGroupCountRet = 1; + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_KERNEL); + + // We need to set the active current device for this kernel explicitly here, + // because the occupancy querying API does not take device parameter. + ur_device_handle_t Device = hKernel->getProgram()->getDevice(); + ScopedContext Active(Device); + try { + // We need to calculate max num of work-groups using per-device semantics. + + int MaxNumActiveGroupsPerCU{0}; + UR_CHECK_ERROR(cuOccupancyMaxActiveBlocksPerMultiprocessor( + &MaxNumActiveGroupsPerCU, hKernel->get(), localWorkSize, + dynamicSharedMemorySize)); + detail::ur::assertion(MaxNumActiveGroupsPerCU >= 0); + // Handle the case where we can't have all SMs active with at least 1 group + // per SM. In that case, the device is still able to run 1 work-group, hence + // we will manually check if it is possible with the available HW resources. + if (MaxNumActiveGroupsPerCU == 0) { + size_t MaxWorkGroupSize{}; + urKernelGetGroupInfo( + hKernel, Device, UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, + sizeof(MaxWorkGroupSize), &MaxWorkGroupSize, nullptr); + size_t MaxLocalSizeBytes{}; + urDeviceGetInfo(Device, UR_DEVICE_INFO_LOCAL_MEM_SIZE, + sizeof(MaxLocalSizeBytes), &MaxLocalSizeBytes, nullptr); + if (localWorkSize > MaxWorkGroupSize || + dynamicSharedMemorySize > MaxLocalSizeBytes || + hasExceededMaxRegistersPerBlock(Device, hKernel, localWorkSize)) + *pGroupCountRet = 0; + else + *pGroupCountRet = 1; + } else { + // Multiply by the number of SMs (CUs = compute units) on the device in + // order to retreive the total number of groups/blocks that can be + // launched. + *pGroupCountRet = Device->getNumComputeUnits() * MaxNumActiveGroupsPerCU; + } + } catch (ur_result_t Err) { + return Err; + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 164eae7521..2e77e434ca 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -8,8 +8,13 @@ set(TARGET_NAME ur_adapter_hip) # Set default UR HIP platform to AMD set(UR_HIP_PLATFORM "AMD" CACHE STRING "UR HIP platform, AMD or NVIDIA") +set(DEFAULT_ROCM_PATH "/opt/rocm") +if(DEFINED ENV{ROCM_PATH}) + set(DEFAULT_ROCM_PATH $ENV{ROCM_PATH}) +endif() + # Set default ROCm installation directory -set(UR_HIP_ROCM_DIR "/opt/rocm" CACHE STRING "ROCm installation dir") +set(UR_HIP_ROCM_DIR "${DEFAULT_ROCM_PATH}" CACHE STRING "ROCm installation dir") # Allow custom location of HIP/HSA include and HIP library directories set(UR_HIP_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/include" CACHE PATH "Custom ROCm HIP include dir") diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 05bf05e0a7..cc05d36084 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -40,7 +40,7 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) set(UR_LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") endif() if (UR_LEVEL_ZERO_LOADER_TAG STREQUAL "") - set(UR_LEVEL_ZERO_LOADER_TAG v1.17.6) + set(UR_LEVEL_ZERO_LOADER_TAG v1.17.39) endif() # Disable due to a bug https://github.com/oneapi-src/level-zero/issues/104 @@ -73,27 +73,33 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) ${level-zero-loader_SOURCE_DIR}/include CACHE PATH "Path to Level Zero Headers") endif() -add_library (LevelZeroLoader INTERFACE) +add_library(LevelZeroLoader INTERFACE) # The MSVC linker does not like / at the start of a path, so to work around this # we split it into a link library and a library path, where the path is allowed # to have leading /. get_filename_component(LEVEL_ZERO_LIBRARY_SRC "${LEVEL_ZERO_LIBRARY}" DIRECTORY) get_filename_component(LEVEL_ZERO_LIB_NAME "${LEVEL_ZERO_LIBRARY}" NAME) target_link_directories(LevelZeroLoader - INTERFACE "${LEVEL_ZERO_LIBRARY_SRC}" + INTERFACE "$" + "$" ) target_link_libraries(LevelZeroLoader INTERFACE "${LEVEL_ZERO_LIB_NAME}" ) -add_library (LevelZeroLoader-Headers INTERFACE) +add_library(LevelZeroLoader-Headers INTERFACE) target_include_directories(LevelZeroLoader-Headers - INTERFACE "${LEVEL_ZERO_INCLUDE_DIR}" + INTERFACE "$" + "$" ) if(UR_BUILD_ADAPTER_L0) - add_ur_adapter(ur_adapter_level_zero - SHARED + set(ADAPTER_LIB_TYPE SHARED) + if(UR_STATIC_ADAPTER_L0) + set(ADAPTER_LIB_TYPE STATIC) + endif() + + add_ur_adapter(ur_adapter_level_zero ${ADAPTER_LIB_TYPE} ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp @@ -109,10 +115,10 @@ if(UR_BUILD_ADAPTER_L0) ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp ${CMAKE_CURRENT_SOURCE_DIR}/program.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/queue_api.hpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.hpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_level_zero.cpp ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context.cpp @@ -127,14 +133,28 @@ if(UR_BUILD_ADAPTER_L0) ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/queue_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp ) + if(UR_STATIC_ADAPTER_L0) + target_compile_definitions(ur_adapter_level_zero PUBLIC UR_STATIC_ADAPTER_LEVEL_ZERO) + + # 'utils' target from 'level-zero-loader' includes path which is prefixed + # in the source directory, this breaks the installation of 'utils' target. + set_target_properties(utils PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "") + install(TARGETS ur_adapter_level_zero ur_umf LevelZeroLoader LevelZeroLoader-Headers ze_loader utils + EXPORT ${PROJECT_NAME}-targets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + endif() + if(NOT WIN32) target_sources(ur_adapter_level_zero PRIVATE @@ -154,7 +174,7 @@ if(UR_BUILD_ADAPTER_L0) if (WIN32) # 0x800: Search for the DLL only in the System32 folder - target_link_options(ur_adapter_level_zero PUBLIC /DEPENDENTLOADFLAG:0x800) + target_link_options(ur_adapter_level_zero PRIVATE /DEPENDENTLOADFLAG:0x800) endif() target_link_libraries(ur_adapter_level_zero PRIVATE @@ -181,6 +201,7 @@ if(UR_BUILD_ADAPTER_L0_V2) ${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp ${CMAKE_CURRENT_SOURCE_DIR}/program.hpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.hpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device.cpp @@ -188,6 +209,7 @@ if(UR_BUILD_ADAPTER_L0_V2) ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp # v2-only sources ${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp @@ -199,6 +221,7 @@ if(UR_BUILD_ADAPTER_L0_V2) ${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/event.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/kernel.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/v2/memory.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.hpp @@ -211,6 +234,7 @@ if(UR_BUILD_ADAPTER_L0_V2) ${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_normal.cpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/kernel.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/v2/memory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_create.cpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index ed52254ec3..eaabb70a29 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -289,7 +289,8 @@ ur_result_t adapterStateTeardown() { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( +namespace ur::level_zero { +ur_result_t urAdapterGet( uint32_t NumEntries, ///< [in] the number of platforms to be added to ///< phAdapters. If phAdapters is not NULL, then ///< NumEntries should be greater than zero, otherwise @@ -330,7 +331,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { +ur_result_t urAdapterRelease(ur_adapter_handle_t) { // Check first if the Adapter pointer is valid if (GlobalAdapter) { std::lock_guard Lock{GlobalAdapter->Mutex}; @@ -342,7 +343,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { +ur_result_t urAdapterRetain(ur_adapter_handle_t) { if (GlobalAdapter) { std::lock_guard Lock{GlobalAdapter->Mutex}; GlobalAdapter->RefCount++; @@ -351,7 +352,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( +ur_result_t urAdapterGetLastError( ur_adapter_handle_t, ///< [in] handle of the platform instance const char **Message, ///< [out] pointer to a C string where the adapter ///< specific error message will be stored. @@ -364,11 +365,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( return ErrorMessageCode; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, - ur_adapter_info_t PropName, - size_t PropSize, - void *PropValue, - size_t *PropSizeRet) { +ur_result_t urAdapterGetInfo(ur_adapter_handle_t, ur_adapter_info_t PropName, + size_t PropSize, void *PropValue, + size_t *PropSizeRet) { UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); switch (PropName) { @@ -382,3 +381,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index e507730888..1bf4f26716 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -10,6 +10,7 @@ #include "command_buffer.hpp" #include "helpers/kernel_helpers.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" /* L0 Command-buffer Extension Doc see: @@ -297,16 +298,16 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( IsUpdatable(Desc ? Desc->isUpdatable : false), IsProfilingEnabled(Desc ? Desc->enableProfiling : false), IsInOrderCmdList(IsInOrderCmdList) { - urContextRetain(Context); - urDeviceRetain(Device); + ur::level_zero::urContextRetain(Context); + ur::level_zero::urDeviceRetain(Device); } void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() { // Release the memory allocated to the Context stored in the command_buffer - urContextRelease(Context); + ur::level_zero::urContextRelease(Context); // Release the device - urDeviceRelease(Device); + ur::level_zero::urDeviceRelease(Device); // Release the memory allocated to the CommandList stored in the // command_buffer @@ -376,7 +377,7 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() { for (auto &AssociatedKernel : KernelsList) { ReleaseIndirectMem(AssociatedKernel); - urKernelRelease(AssociatedKernel); + ur::level_zero::urKernelRelease(AssociatedKernel); } } @@ -387,16 +388,16 @@ ur_exp_command_buffer_command_handle_t_:: ur_kernel_handle_t Kernel = nullptr) : CommandBuffer(CommandBuffer), CommandId(CommandId), WorkDim(WorkDim), UserDefinedLocalSize(UserDefinedLocalSize), Kernel(Kernel) { - urCommandBufferRetainExp(CommandBuffer); + ur::level_zero::urCommandBufferRetainExp(CommandBuffer); if (Kernel) - urKernelRetain(Kernel); + ur::level_zero::urKernelRetain(Kernel); } ur_exp_command_buffer_command_handle_t_:: ~ur_exp_command_buffer_command_handle_t_() { - urCommandBufferReleaseExp(CommandBuffer); + ur::level_zero::urCommandBufferReleaseExp(CommandBuffer); if (Kernel) - urKernelRelease(Kernel); + ur::level_zero::urKernelRelease(Kernel); } void ur_exp_command_buffer_handle_t_::registerSyncPoint( @@ -433,7 +434,7 @@ ur_result_t ur_exp_command_buffer_handle_t_::getFenceForQueue( return UR_RESULT_SUCCESS; } -namespace { +namespace ur::level_zero { /** * Creates a L0 command list @@ -493,9 +494,8 @@ bool canBeInOrder(ur_context_handle_t Context, ? (CommandBufferDesc ? CommandBufferDesc->isInOrder : false) : false; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, const ur_exp_command_buffer_desc_t *CommandBufferDesc, ur_exp_command_buffer_handle_t *CommandBuffer) { @@ -567,13 +567,13 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferRetainExp(ur_exp_command_buffer_handle_t CommandBuffer) { CommandBuffer->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) { if (!CommandBuffer->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -583,7 +583,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { UR_ASSERT(CommandBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER); // It is not allowed to append to command list from multiple threads. @@ -627,8 +627,6 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { return UR_RESULT_SUCCESS; } -namespace { - /** * Sets the global offset for a kernel command that will be appended to the * command buffer. @@ -730,9 +728,8 @@ createCommandHandle(ur_exp_command_buffer_handle_t CommandBuffer, return UR_RESULT_SUCCESS; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( +ur_result_t urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, uint32_t WorkDim, const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, const size_t *LocalWorkSize, @@ -769,7 +766,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( // is in use. Once the event has been signaled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. - UR_CALL(urKernelRetain(Kernel)); + UR_CALL(ur::level_zero::urKernelRetain(Kernel)); if (Command && CommandBuffer->IsUpdatable) { UR_CALL(createCommandHandle(CommandBuffer, Kernel, WorkDim, LocalWorkSize, @@ -790,7 +787,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( +ur_result_t urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t CommandBuffer, void *Dst, const void *Src, size_t Size, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -812,7 +809,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( +ur_result_t urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t SrcMem, ur_mem_handle_t DstMem, size_t SrcOffset, size_t DstOffset, size_t Size, uint32_t NumSyncPointsInWaitList, @@ -842,7 +839,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( +ur_result_t urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t SrcMem, ur_mem_handle_t DstMem, ur_rect_offset_t SrcOrigin, ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch, @@ -875,7 +872,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( +ur_result_t urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, size_t Offset, size_t Size, const void *Src, uint32_t NumSyncPointsInWaitList, @@ -897,7 +894,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( +ur_result_t urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset, ur_rect_region_t Region, size_t BufferRowPitch, size_t BufferSlicePitch, @@ -922,7 +919,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( +ur_result_t urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, size_t Offset, size_t Size, void *Dst, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -942,7 +939,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( +ur_result_t urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset, ur_rect_region_t Region, size_t BufferRowPitch, size_t BufferSlicePitch, @@ -966,7 +963,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( +ur_result_t urCommandBufferAppendUSMPrefetchExp( ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, ur_usm_migration_flags_t Flags, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -1005,7 +1002,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( +ur_result_t urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, ur_usm_advice_flags_t Advice, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -1067,7 +1064,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( +ur_result_t urCommandBufferAppendMemBufferFillExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, const void *Pattern, size_t PatternSize, size_t Offset, size_t Size, uint32_t NumSyncPointsInWaitList, @@ -1088,7 +1085,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( +ur_result_t urCommandBufferAppendUSMFillExp( ur_exp_command_buffer_handle_t CommandBuffer, void *Ptr, const void *Pattern, size_t PatternSize, size_t Size, uint32_t NumSyncPointsInWaitList, @@ -1102,8 +1099,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -namespace { - /** * Gets an L0 command queue that supports the chosen engine. * @param[in] Queue The UR queue used to submit the command buffer. @@ -1112,8 +1107,7 @@ namespace { * @param[out] ZeCommandQueue The L0 command queue. * @return UR_RESULT_SUCCESS or an error code on failure */ -ur_result_t getZeCommandQueue(ur_queue_handle_legacy_t Queue, - bool UseCopyEngine, +ur_result_t getZeCommandQueue(ur_queue_handle_t Queue, bool UseCopyEngine, ze_command_queue_handle_t &ZeCommandQueue) { auto &QGroup = Queue->getQueueGroup(UseCopyEngine); uint32_t QueueGroupOrdinal; @@ -1130,7 +1124,7 @@ ur_result_t getZeCommandQueue(ur_queue_handle_legacy_t Queue, * @return UR_RESULT_SUCCESS or an error code on failure */ ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer, - ur_queue_handle_legacy_t Queue, + ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList) { const bool UseCopyEngine = false; @@ -1182,7 +1176,7 @@ ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer, * @return UR_RESULT_SUCCESS or an error code on failure */ ur_result_t createUserEvent(ur_exp_command_buffer_handle_t CommandBuffer, - ur_queue_handle_legacy_t Queue, + ur_queue_handle_t Queue, ur_command_list_ptr_t SignalCommandList, ur_event_handle_t *Event) { // Execution event for this enqueue of the UR command-buffer @@ -1226,13 +1220,12 @@ ur_result_t createUserEvent(ur_exp_command_buffer_handle_t CommandBuffer, return UR_RESULT_SUCCESS; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( - ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t UrQueue, - uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, - ur_event_handle_t *Event) { - auto Queue = Legacy(UrQueue); +ur_result_t +urCommandBufferEnqueueExp(ur_exp_command_buffer_handle_t CommandBuffer, + ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, + const ur_event_handle_t *EventWaitList, + ur_event_handle_t *Event) { std::scoped_lock Lock(Queue->Mutex); ze_command_queue_handle_t ZeCommandQueue; @@ -1294,13 +1287,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainCommandExp( +ur_result_t urCommandBufferRetainCommandExp( ur_exp_command_buffer_command_handle_t Command) { Command->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( +ur_result_t urCommandBufferReleaseCommandExp( ur_exp_command_buffer_command_handle_t Command) { if (!Command->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -1309,8 +1302,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( return UR_RESULT_SUCCESS; } -namespace { - /** * Validates contents of the update command description. * @param[in] Command The command which is being updated. @@ -1620,9 +1611,8 @@ ur_result_t updateKernelCommand( return UR_RESULT_SUCCESS; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( +ur_result_t urCommandBufferUpdateKernelLaunchExp( ur_exp_command_buffer_command_handle_t Command, const ur_exp_command_buffer_update_kernel_launch_desc_t *CommandDesc) { UR_ASSERT(Command->Kernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -1653,10 +1643,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( - ur_exp_command_buffer_handle_t hCommandBuffer, - ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t +urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { @@ -1669,10 +1660,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( return UR_RESULT_ERROR_INVALID_ENUMERATION; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( - ur_exp_command_buffer_command_handle_t Command, - ur_exp_command_buffer_command_info_t PropName, size_t PropSize, - void *PropValue, size_t *PropSizeRet) { +ur_result_t +urCommandBufferCommandGetInfoExp(ur_exp_command_buffer_command_handle_t Command, + ur_exp_command_buffer_command_info_t PropName, + size_t PropSize, void *PropValue, + size_t *PropSizeRet) { UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); switch (PropName) { @@ -1684,3 +1676,5 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( return UR_RESULT_ERROR_INVALID_ENUMERATION; } + +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/common.hpp b/source/adapters/level_zero/common.hpp index b7d0a4a913..6dd8a614c5 100644 --- a/source/adapters/level_zero/common.hpp +++ b/source/adapters/level_zero/common.hpp @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 452189d038..296e3e98d5 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -18,7 +18,9 @@ #include "queue.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( +namespace ur::level_zero { + +ur_result_t urContextCreate( uint32_t DeviceCount, ///< [in] the number of devices given in phDevices const ur_device_handle_t *Devices, ///< [in][range(0, DeviceCount)] array of handle of devices. @@ -53,7 +55,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextRetain( +ur_result_t urContextRetain( ur_context_handle_t Context ///< [in] handle of the context to get a reference of. ) { @@ -61,7 +63,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextRelease( +ur_result_t urContextRelease( ur_context_handle_t Context ///< [in] handle of the context to release. ) { ur_platform_handle_t Plt = Context->getPlatform(); @@ -85,7 +87,7 @@ static const bool UseMemcpy2DOperations = [] { return std::atoi(UseMemcpy2DOperationsFlag) > 0; }(); -UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo( +ur_result_t urContextGetInfo( ur_context_handle_t Context, ///< [in] handle of the context ur_context_info_t ContextInfoType, ///< [in] type of the info to retrieve size_t PropSize, ///< [in] the number of bytes of memory pointed to by @@ -133,7 +135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( +ur_result_t urContextGetNativeHandle( ur_context_handle_t Context, ///< [in] handle of the context. ur_native_handle_t *NativeContext ///< [out] a pointer to the native ///< handle of the context. @@ -142,7 +144,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( +ur_result_t urContextCreateWithNativeHandle( ur_native_handle_t NativeContext, ///< [in] the native handle of the context. ur_adapter_handle_t, uint32_t NumDevices, const ur_device_handle_t *Devices, @@ -166,7 +168,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( +ur_result_t urContextSetExtendedDeleter( ur_context_handle_t Context, ///< [in] handle of the context. ur_context_extended_deleter_t Deleter, ///< [in] Function pointer to extended deleter. @@ -180,6 +182,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero ur_result_t ur_context_handle_t_::initialize() { @@ -509,7 +512,7 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( // Create one event ZePool per MaxNumEventsPerPool events if (*ZePool == nullptr) { ze_event_pool_counter_based_exp_desc_t counterBasedExt = { - ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC}; + ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC, nullptr, 0}; ZeStruct ZeEventPoolDesc; ZeEventPoolDesc.count = MaxNumEventsPerPool; ZeEventPoolDesc.flags = 0; @@ -527,6 +530,8 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( counterBasedExt.flags = ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE; } + logger::debug("ze_event_pool_desc_t counter based flags set to: {}", + counterBasedExt.flags); ZeEventPoolDesc.pNext = &counterBasedExt; } @@ -576,8 +581,8 @@ void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) { std::scoped_lock Lock(EventCacheMutex); ur_device_handle_t Device = nullptr; - if (!Event->IsMultiDevice && Legacy(Event->UrQueue)) { - Device = Legacy(Event->UrQueue)->Device; + if (!Event->IsMultiDevice && Event->UrQueue) { + Device = Event->UrQueue->Device; } auto Cache = getEventCache(Event->isHostVisible(), @@ -598,10 +603,10 @@ ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) { ze_device_handle_t ZeDevice = nullptr; bool UsingImmediateCommandlists = - !Legacy(Event->UrQueue) || Legacy(Event->UrQueue)->UsingImmCmdLists; + !Event->UrQueue || Event->UrQueue->UsingImmCmdLists; - if (!Event->IsMultiDevice && Legacy(Event->UrQueue)) { - ZeDevice = Legacy(Event->UrQueue)->Device->ZeDevice; + if (!Event->IsMultiDevice && Event->UrQueue) { + ZeDevice = Event->UrQueue->Device->ZeDevice; } std::list *ZePoolCache = getZeEventPoolCache( @@ -644,7 +649,7 @@ static const size_t CmdListsCleanupThreshold = [] { // Retrieve an available command list to be used in a PI call. ur_result_t ur_context_handle_t_::getAvailableCommandList( - ur_queue_handle_legacy_t Queue, ur_command_list_ptr_t &CommandList, + ur_queue_handle_t Queue, ur_command_list_ptr_t &CommandList, bool UseCopyEngine, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, bool AllowBatching, ze_command_queue_handle_t *ForcedCmdQueue) { @@ -767,9 +772,11 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( CommandList = Queue->CommandListMap .emplace(ZeCommandList, - ur_command_list_info_t(ZeFence, true, false, - ZeCommandQueue, ZeQueueDesc, - Queue->useCompletionBatching())) + ur_command_list_info_t( + ZeFence, true, false, ZeCommandQueue, ZeQueueDesc, + Queue->useCompletionBatching(), true, + ZeCommandListIt->second.InOrderList, + ZeCommandListIt->second.IsImmediate)) .first; } ZeCommandListCache.erase(ZeCommandListIt); diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index a1212f0698..c2fbba633f 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include @@ -297,7 +297,7 @@ struct ur_context_handle_t_ : _ur_object { // for executing on this device. Immediate commandlists are created only // once for each SYCL Queue and after that they are reused. ur_result_t getAvailableCommandList( - ur_queue_handle_legacy_t Queue, ur_command_list_ptr_t &CommandList, + ur_queue_handle_t Queue, ur_command_list_ptr_t &CommandList, bool UseCopyEngine, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, bool AllowBatching = false, ze_command_queue_handle_t *ForcedCmdQueue = nullptr); diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index de2bee3789..e6cb650420 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -10,13 +10,59 @@ #include "device.hpp" #include "adapter.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" #include "ur_util.hpp" #include #include #include -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( +// UR_L0_USE_COPY_ENGINE can be set to an integer value, or +// a pair of integer values of the form "lower_index:upper_index". +// Here, the indices point to copy engines in a list of all available copy +// engines. +// This functions returns this pair of indices. +// If the user specifies only a single integer, a value of 0 indicates that +// the copy engines will not be used at all. A value of 1 indicates that all +// available copy engines can be used. +const std::pair +getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) { + const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); + static const char *EnvVar = UrRet ? UrRet : (PiRet ? PiRet : nullptr); + // If the environment variable is not set, no copy engines are used when + // immediate commandlists are being used. For standard commandlists all are + // used. + if (!EnvVar) { + if (Device->ImmCommandListUsed) + return std::pair(0, 0); // Only main copy engine will be used. + return std::pair(0, INT_MAX); // All copy engines will be used. + } + std::string CopyEngineRange = EnvVar; + // Environment variable can be a single integer or a pair of integers + // separated by ":" + auto pos = CopyEngineRange.find(":"); + if (pos == std::string::npos) { + bool UseCopyEngine = (std::stoi(CopyEngineRange) != 0); + if (UseCopyEngine) + return std::pair(0, INT_MAX); // All copy engines can be used. + return std::pair(-1, -1); // No copy engines will be used. + } + int LowerCopyEngineIndex = std::stoi(CopyEngineRange.substr(0, pos)); + int UpperCopyEngineIndex = std::stoi(CopyEngineRange.substr(pos + 1)); + if ((LowerCopyEngineIndex > UpperCopyEngineIndex) || + (LowerCopyEngineIndex < -1) || (UpperCopyEngineIndex < -1)) { + logger::error("UR_L0_LEVEL_ZERO_USE_COPY_ENGINE: invalid value provided, " + "default set."); + LowerCopyEngineIndex = 0; + UpperCopyEngineIndex = INT_MAX; + } + return std::pair(LowerCopyEngineIndex, UpperCopyEngineIndex); +} + +namespace ur::level_zero { + +ur_result_t urDeviceGet( ur_platform_handle_t Platform, ///< [in] handle of the platform instance ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to @@ -143,7 +189,7 @@ uint64_t calculateGlobalMemSize(ur_device_handle_t Device) { return Device->ZeGlobalMemSize.operator->()->value; } -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( +ur_result_t urDeviceGetInfo( ur_device_handle_t Device, ///< [in] handle of the device instance ur_device_info_t ParamName, ///< [in] type of the info to retrieve size_t propSize, ///< [in] the number of bytes pointed to by ParamValue. @@ -1068,158 +1114,353 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return UR_RESULT_SUCCESS; } -// UR_L0_USE_COPY_ENGINE can be set to an integer value, or -// a pair of integer values of the form "lower_index:upper_index". -// Here, the indices point to copy engines in a list of all available copy -// engines. -// This functions returns this pair of indices. -// If the user specifies only a single integer, a value of 0 indicates that -// the copy engines will not be used at all. A value of 1 indicates that all -// available copy engines can be used. -const std::pair -getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) { - const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE"); - const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); - static const char *EnvVar = UrRet ? UrRet : (PiRet ? PiRet : nullptr); - // If the environment variable is not set, no copy engines are used when - // immediate commandlists are being used. For standard commandlists all are - // used. - if (!EnvVar) { - if (Device->ImmCommandListUsed) - return std::pair(0, 0); // Only main copy engine will be used. - return std::pair(0, INT_MAX); // All copy engines will be used. - } - std::string CopyEngineRange = EnvVar; - // Environment variable can be a single integer or a pair of integers - // separated by ":" - auto pos = CopyEngineRange.find(":"); - if (pos == std::string::npos) { - bool UseCopyEngine = (std::stoi(CopyEngineRange) != 0); - if (UseCopyEngine) - return std::pair(0, INT_MAX); // All copy engines can be used. - return std::pair(-1, -1); // No copy engines will be used. - } - int LowerCopyEngineIndex = std::stoi(CopyEngineRange.substr(0, pos)); - int UpperCopyEngineIndex = std::stoi(CopyEngineRange.substr(pos + 1)); - if ((LowerCopyEngineIndex > UpperCopyEngineIndex) || - (LowerCopyEngineIndex < -1) || (UpperCopyEngineIndex < -1)) { - logger::error("UR_L0_LEVEL_ZERO_USE_COPY_ENGINE: invalid value provided, " - "default set."); - LowerCopyEngineIndex = 0; - UpperCopyEngineIndex = INT_MAX; - } - return std::pair(LowerCopyEngineIndex, UpperCopyEngineIndex); -} - bool CopyEngineRequested(const ur_device_handle_t &Device) { int LowerCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).first; int UpperCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).second; return ((LowerCopyQueueIndex != -1) || (UpperCopyQueueIndex != -1)); } -// Whether immediate commandlists will be used for kernel launches and copies. -// The default is standard commandlists. Setting 1 or 2 specifies use of -// immediate commandlists. Note: when immediate commandlists are used then -// device-only events must be either AllHostVisible or OnDemandHostVisibleProxy. -// (See env var UR_L0_DEVICE_SCOPE_EVENTS). - -// Get value of immediate commandlists env var setting or -1 if unset -ur_device_handle_t_::ImmCmdlistMode -ur_device_handle_t_::useImmediateCommandLists() { - // If immediate commandlist setting is not explicitly set, then use the device - // default. - // TODO: confirm this is good once make_queue revert is added - static const int ImmediateCommandlistsSetting = [] { - const char *UrRet = std::getenv("UR_L0_USE_IMMEDIATE_COMMANDLISTS"); - const char *PiRet = - std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"); - const char *ImmediateCommandlistsSettingStr = - UrRet ? UrRet : (PiRet ? PiRet : nullptr); - if (!ImmediateCommandlistsSettingStr) - return -1; - return std::atoi(ImmediateCommandlistsSettingStr); - }(); - - if (ImmediateCommandlistsSetting == -1) { - bool isDG2SupportedDriver = - this->Platform->isDriverVersionNewerOrSimilar(1, 5, 30820); - if ((isDG2SupportedDriver && isDG2()) || isPVC()) { - return PerQueue; - } else { - return NotUsed; +ur_result_t urDevicePartition( + ur_device_handle_t Device, ///< [in] handle of the device to partition. + const ur_device_partition_properties_t + *Properties, ///< [in] Device partition properties. + uint32_t NumDevices, ///< [in] the number of sub-devices. + ur_device_handle_t + *OutDevices, ///< [out][optional][range(0, NumDevices)] array of handle + ///< of devices. If NumDevices is less than the number of + ///< sub-devices available, then the function shall only + ///< retrieve that number of sub-devices. + uint32_t *NumDevicesRet ///< [out][optional] pointer to the number of + ///< sub-devices the device can be partitioned into + ///< according to the partitioning property. +) { + // Other partitioning ways are not supported by Level Zero + UR_ASSERT(Properties->PropCount == 1, UR_RESULT_ERROR_INVALID_VALUE); + if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + if ((Properties->pProperties->value.affinity_domain != + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE && + Properties->pProperties->value.affinity_domain != + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) { + return UR_RESULT_ERROR_INVALID_VALUE; } + } else if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { + if (Properties->pProperties->value.affinity_domain != 0) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + } else { + return UR_RESULT_ERROR_INVALID_VALUE; } - switch (ImmediateCommandlistsSetting) { - case 0: - return NotUsed; - case 1: - return PerQueue; - case 2: - return PerThreadPerQueue; - default: - return NotUsed; - } -} -bool ur_device_handle_t_::useRelaxedAllocationLimits() { - static const bool EnableRelaxedAllocationLimits = [] { - auto UrRet = ur_getenv("UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS"); - const bool RetVal = UrRet ? std::stoi(*UrRet) : 0; - return RetVal; - }(); + // Devices cache is normally created in piDevicesGet but still make + // sure that cache is populated. + // + auto Res = Device->Platform->populateDeviceCacheIfNeeded(); + if (Res != UR_RESULT_SUCCESS) { + return Res; + } - return EnableRelaxedAllocationLimits; -} + auto EffectiveNumDevices = [&]() -> decltype(Device->SubDevices.size()) { + if (Device->SubDevices.size() == 0) + return 0; -bool ur_device_handle_t_::useDriverInOrderLists() { - // Use in-order lists implementation from L0 driver instead - // of adapter's implementation. + // Sub-Sub-Devices are partitioned by CSlices, not by affinity domain. + // However, if + // UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that + // still expose CSlices in partitioning by affinity domain for compatibility + // reasons. + if (Properties->pProperties->type == + UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN && + !ExposeCSliceInAffinityPartitioning) { + if (Device->isSubDevice()) { + return 0; + } + } + if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { + // Not a CSlice-based partitioning. + if (!Device->SubDevices[0]->isCCS()) { + return 0; + } + } - static const bool UseDriverInOrderLists = [&] { - const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS"); - bool CompatibleDriver = this->Platform->isDriverVersionNewerOrSimilar( - 1, 3, L0_DRIVER_INORDER_MIN_VERSION); - if (!UrRet) - return CompatibleDriver; - return std::atoi(UrRet) != 0; + return Device->SubDevices.size(); }(); - return UseDriverInOrderLists; -} - -ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, - int SubSubDeviceIndex) { - // Maintain various device properties cache. - // Note that we just describe here how to compute the data. - // The real initialization is upon first access. + // TODO: Consider support for partitioning to <= total sub-devices. + // Currently supported partitioning (by affinity domain/numa) would always + // partition to all sub-devices. // - auto ZeDevice = this->ZeDevice; - ZeDeviceProperties.Compute = [ZeDevice](ze_device_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &Properties)); - }; - - ZeDeviceComputeProperties.Compute = - [ZeDevice](ze_device_compute_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetComputeProperties, (ZeDevice, &Properties)); - }; + if (NumDevices != 0) + UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); - ZeDeviceIpVersionExt.Compute = - [ZeDevice](ze_device_ip_version_ext_t &Properties) { - ze_device_properties_t P; - P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; - P.pNext = (void *)&Properties; - ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P)); - }; + for (uint32_t I = 0; I < NumDevices; I++) { + auto prop = Properties->pProperties[0]; + if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + // In case the value is NEXT_PARTITIONABLE, we need to change it to the + // chosen domain. This will always be NUMA since that's the only domain + // supported by level zero. + prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; + } + Device->SubDevices[I]->SubDeviceCreationProperty = prop; - ZeDeviceImageProperties.Compute = - [ZeDevice](ze_device_image_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetImageProperties, (ZeDevice, &Properties)); - }; + OutDevices[I] = Device->SubDevices[I]; + // reusing the same pi_device needs to increment the reference count + ur::level_zero::urDeviceRetain(OutDevices[I]); + } - ZeDeviceModuleProperties.Compute = - [ZeDevice](ze_device_module_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetModuleProperties, (ZeDevice, &Properties)); - }; + if (NumDevicesRet) { + *NumDevicesRet = EffectiveNumDevices; + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceSelectBinary( + ur_device_handle_t + Device, ///< [in] handle of the device to select binary for. + const ur_device_binary_t + *Binaries, ///< [in] the array of binaries to select from. + uint32_t NumBinaries, ///< [in] the number of binaries passed in ppBinaries. + ///< Must greater than or equal to zero otherwise + ///< ::UR_RESULT_ERROR_INVALID_VALUE is returned. + uint32_t + *SelectedBinary ///< [out] the index of the selected binary in the input + ///< array of binaries. If a suitable binary was not + ///< found the function returns ${X}_INVALID_BINARY. +) { + std::ignore = Device; + // TODO: this is a bare-bones implementation for choosing a device image + // that would be compatible with the targeted device. An AOT-compiled + // image is preferred over SPIR-V for known devices (i.e. Intel devices) + // The implementation makes no effort to differentiate between multiple images + // for the given device, and simply picks the first one compatible. + // + // Real implementation will use the same mechanism OpenCL ICD dispatcher + // uses. Something like: + // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT); + // return context->dispatch->piextDeviceSelectIR( + // ctx, images, num_images, selected_image); + // where context->dispatch is set to the dispatch table provided by PI + // plugin for platform/device the ctx was created for. + + // Look for GEN binary, which we known can only be handled by Level-Zero now. + const char *BinaryTarget = + UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; // UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; + + uint32_t *SelectedBinaryInd = SelectedBinary; + + // Find the appropriate device image, fallback to spirv if not found + constexpr uint32_t InvalidInd = (std::numeric_limits::max)(); + uint32_t Spirv = InvalidInd; + + for (uint32_t i = 0; i < NumBinaries; ++i) { + if (strcmp(Binaries[i].pDeviceTargetSpec, BinaryTarget) == 0) { + *SelectedBinaryInd = i; + return UR_RESULT_SUCCESS; + } + if (strcmp(Binaries[i].pDeviceTargetSpec, + UR_DEVICE_BINARY_TARGET_SPIRV64) == 0) + Spirv = i; + } + // Points to a spirv image, if such indeed was found + if ((*SelectedBinaryInd = Spirv) != InvalidInd) + return UR_RESULT_SUCCESS; + + // No image can be loaded for the given device + return UR_RESULT_ERROR_INVALID_BINARY; +} + +ur_result_t urDeviceGetNativeHandle( + ur_device_handle_t Device, ///< [in] handle of the device. + ur_native_handle_t + *NativeDevice ///< [out] a pointer to the native handle of the device. +) { + *NativeDevice = reinterpret_cast(Device->ZeDevice); + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceCreateWithNativeHandle( + ur_native_handle_t NativeDevice, ///< [in] the native handle of the device. + [[maybe_unused]] ur_adapter_handle_t + Adapter, ///< [in] handle of the platform instance + [[maybe_unused]] const ur_device_native_properties_t + *Properties, ///< [in][optional] pointer to native device properties + ///< struct. + ur_device_handle_t + *Device ///< [out] pointer to the handle of the device object created. +) { + auto ZeDevice = ur_cast(NativeDevice); + + // The SYCL spec requires that the set of devices must remain fixed for the + // duration of the application's execution. We assume that we found all of the + // Level Zero devices when we initialized the platforms/devices cache, so the + // "NativeHandle" must already be in the cache. If it is not, this must not be + // a valid Level Zero device. + + ur_device_handle_t Dev = nullptr; + if (const auto *platforms = GlobalAdapter->PlatformCache->get_value()) { + for (const auto &p : *platforms) { + Dev = p->getDeviceFromNativeHandle(ZeDevice); + } + } else { + return GlobalAdapter->PlatformCache->get_error(); + } + + if (Dev == nullptr) + return UR_RESULT_ERROR_INVALID_VALUE; + + *Device = Dev; + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceGetGlobalTimestamps( + ur_device_handle_t Device, ///< [in] handle of the device instance + uint64_t *DeviceTimestamp, ///< [out][optional] pointer to the Device's + ///< global timestamp that correlates with the + ///< Host's global timestamp value + uint64_t *HostTimestamp ///< [out][optional] pointer to the Host's global + ///< timestamp that correlates with the Device's + ///< global timestamp value +) { + const uint64_t &ZeTimerResolution = + Device->ZeDeviceProperties->timerResolution; + const uint64_t TimestampMaxCount = Device->getTimestampMask(); + uint64_t DeviceClockCount, Dummy; + + ZE2UR_CALL(zeDeviceGetGlobalTimestamps, + (Device->ZeDevice, + HostTimestamp == nullptr ? &Dummy : HostTimestamp, + &DeviceClockCount)); + + if (DeviceTimestamp != nullptr) { + *DeviceTimestamp = + (DeviceClockCount & TimestampMaxCount) * ZeTimerResolution; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceRetain(ur_device_handle_t Device) { + // The root-device ref-count remains unchanged (always 1). + if (Device->isSubDevice()) { + Device->RefCount.increment(); + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceRelease(ur_device_handle_t Device) { + // Root devices are destroyed during the piTearDown process. + if (Device->isSubDevice()) { + if (Device->RefCount.decrementAndTest()) { + delete Device; + } + } + + return UR_RESULT_SUCCESS; +} +} // namespace ur::level_zero + +// Whether immediate commandlists will be used for kernel launches and copies. +// The default is standard commandlists. Setting 1 or 2 specifies use of +// immediate commandlists. Note: when immediate commandlists are used then +// device-only events must be either AllHostVisible or OnDemandHostVisibleProxy. +// (See env var UR_L0_DEVICE_SCOPE_EVENTS). + +// Get value of immediate commandlists env var setting or -1 if unset +ur_device_handle_t_::ImmCmdlistMode +ur_device_handle_t_::useImmediateCommandLists() { + // If immediate commandlist setting is not explicitly set, then use the device + // default. + // TODO: confirm this is good once make_queue revert is added + static const int ImmediateCommandlistsSetting = [] { + const char *UrRet = std::getenv("UR_L0_USE_IMMEDIATE_COMMANDLISTS"); + const char *PiRet = + std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"); + const char *ImmediateCommandlistsSettingStr = + UrRet ? UrRet : (PiRet ? PiRet : nullptr); + if (!ImmediateCommandlistsSettingStr) + return -1; + return std::atoi(ImmediateCommandlistsSettingStr); + }(); + + if (ImmediateCommandlistsSetting == -1) { + bool isDG2SupportedDriver = + this->Platform->isDriverVersionNewerOrSimilar(1, 5, 30820); + if ((isDG2SupportedDriver && isDG2()) || isPVC()) { + return PerQueue; + } else { + return NotUsed; + } + } + switch (ImmediateCommandlistsSetting) { + case 0: + return NotUsed; + case 1: + return PerQueue; + case 2: + return PerThreadPerQueue; + default: + return NotUsed; + } +} + +bool ur_device_handle_t_::useRelaxedAllocationLimits() { + static const bool EnableRelaxedAllocationLimits = [] { + auto UrRet = ur_getenv("UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS"); + const bool RetVal = UrRet ? std::stoi(*UrRet) : 0; + return RetVal; + }(); + + return EnableRelaxedAllocationLimits; +} + +bool ur_device_handle_t_::useDriverInOrderLists() { + // Use in-order lists implementation from L0 driver instead + // of adapter's implementation. + + static const bool UseDriverInOrderLists = [&] { + const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS"); + bool CompatibleDriver = this->Platform->isDriverVersionNewerOrSimilar( + 1, 3, L0_DRIVER_INORDER_MIN_VERSION); + if (!UrRet) + return CompatibleDriver; + return std::atoi(UrRet) != 0; + }(); + + return UseDriverInOrderLists; +} + +ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, + int SubSubDeviceIndex) { + // Maintain various device properties cache. + // Note that we just describe here how to compute the data. + // The real initialization is upon first access. + // + auto ZeDevice = this->ZeDevice; + ZeDeviceProperties.Compute = [ZeDevice](ze_device_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &Properties)); + }; + + ZeDeviceComputeProperties.Compute = + [ZeDevice](ze_device_compute_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetComputeProperties, (ZeDevice, &Properties)); + }; + + ZeDeviceIpVersionExt.Compute = + [ZeDevice](ze_device_ip_version_ext_t &Properties) { + ze_device_properties_t P; + P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; + P.pNext = (void *)&Properties; + ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P)); + }; + + ZeDeviceImageProperties.Compute = + [ZeDevice](ze_device_image_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetImageProperties, (ZeDevice, &Properties)); + }; + + ZeDeviceModuleProperties.Compute = + [ZeDevice](ze_device_module_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetModuleProperties, (ZeDevice, &Properties)); + }; ZeDeviceMemoryProperties.Compute = [ZeDevice]( @@ -1314,7 +1555,7 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, return UR_RESULT_ERROR_UNKNOWN; } - if (CopyEngineRequested((ur_device_handle_t)this)) { + if (ur::level_zero::CopyEngineRequested((ur_device_handle_t)this)) { for (uint32_t i = 0; i < numQueueGroups; i++) { if (((QueueGroupProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0) && @@ -1355,26 +1596,6 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, return UR_RESULT_SUCCESS; } -ur_result_t urDeviceRetain(ur_device_handle_t Device) { - - // The root-device ref-count remains unchanged (always 1). - if (Device->isSubDevice()) { - Device->RefCount.increment(); - } - return UR_RESULT_SUCCESS; -} - -ur_result_t urDeviceRelease(ur_device_handle_t Device) { - // Root devices are destroyed during the piTearDown process. - if (Device->isSubDevice()) { - if (Device->RefCount.decrementAndTest()) { - delete Device; - } - } - - return UR_RESULT_SUCCESS; -} - void ZeDriverVersionStringExtension::setZeDriverVersionString( ur_platform_handle_t_ *Platform) { // Check if Intel Driver Version String is available. If yes, save the API @@ -1442,221 +1663,3 @@ void ZeUSMImportExtension::doZeUSMRelease(ze_driver_handle_t DriverHandle, void *HostPtr) { ZE_CALL_NOCHECK(zexDriverReleaseImportedPointer, (DriverHandle, HostPtr)); } - -UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( - ur_device_handle_t Device, ///< [in] handle of the device to partition. - const ur_device_partition_properties_t - *Properties, ///< [in] Device partition properties. - uint32_t NumDevices, ///< [in] the number of sub-devices. - ur_device_handle_t - *OutDevices, ///< [out][optional][range(0, NumDevices)] array of handle - ///< of devices. If NumDevices is less than the number of - ///< sub-devices available, then the function shall only - ///< retrieve that number of sub-devices. - uint32_t *NumDevicesRet ///< [out][optional] pointer to the number of - ///< sub-devices the device can be partitioned into - ///< according to the partitioning property. -) { - // Other partitioning ways are not supported by Level Zero - UR_ASSERT(Properties->PropCount == 1, UR_RESULT_ERROR_INVALID_VALUE); - if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { - if ((Properties->pProperties->value.affinity_domain != - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE && - Properties->pProperties->value.affinity_domain != - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - } else if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { - if (Properties->pProperties->value.affinity_domain != 0) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - } else { - return UR_RESULT_ERROR_INVALID_VALUE; - } - - // Devices cache is normally created in piDevicesGet but still make - // sure that cache is populated. - // - auto Res = Device->Platform->populateDeviceCacheIfNeeded(); - if (Res != UR_RESULT_SUCCESS) { - return Res; - } - - auto EffectiveNumDevices = [&]() -> decltype(Device->SubDevices.size()) { - if (Device->SubDevices.size() == 0) - return 0; - - // Sub-Sub-Devices are partitioned by CSlices, not by affinity domain. - // However, if - // UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that - // still expose CSlices in partitioning by affinity domain for compatibility - // reasons. - if (Properties->pProperties->type == - UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN && - !ExposeCSliceInAffinityPartitioning) { - if (Device->isSubDevice()) { - return 0; - } - } - if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { - // Not a CSlice-based partitioning. - if (!Device->SubDevices[0]->isCCS()) { - return 0; - } - } - - return Device->SubDevices.size(); - }(); - - // TODO: Consider support for partitioning to <= total sub-devices. - // Currently supported partitioning (by affinity domain/numa) would always - // partition to all sub-devices. - // - if (NumDevices != 0) - UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); - - for (uint32_t I = 0; I < NumDevices; I++) { - auto prop = Properties->pProperties[0]; - if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { - // In case the value is NEXT_PARTITIONABLE, we need to change it to the - // chosen domain. This will always be NUMA since that's the only domain - // supported by level zero. - prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; - } - Device->SubDevices[I]->SubDeviceCreationProperty = prop; - - OutDevices[I] = Device->SubDevices[I]; - // reusing the same pi_device needs to increment the reference count - urDeviceRetain(OutDevices[I]); - } - - if (NumDevicesRet) { - *NumDevicesRet = EffectiveNumDevices; - } - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( - ur_device_handle_t - Device, ///< [in] handle of the device to select binary for. - const ur_device_binary_t - *Binaries, ///< [in] the array of binaries to select from. - uint32_t NumBinaries, ///< [in] the number of binaries passed in ppBinaries. - ///< Must greater than or equal to zero otherwise - ///< ::UR_RESULT_ERROR_INVALID_VALUE is returned. - uint32_t - *SelectedBinary ///< [out] the index of the selected binary in the input - ///< array of binaries. If a suitable binary was not - ///< found the function returns ${X}_INVALID_BINARY. -) { - std::ignore = Device; - // TODO: this is a bare-bones implementation for choosing a device image - // that would be compatible with the targeted device. An AOT-compiled - // image is preferred over SPIR-V for known devices (i.e. Intel devices) - // The implementation makes no effort to differentiate between multiple images - // for the given device, and simply picks the first one compatible. - // - // Real implementation will use the same mechanism OpenCL ICD dispatcher - // uses. Something like: - // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT); - // return context->dispatch->piextDeviceSelectIR( - // ctx, images, num_images, selected_image); - // where context->dispatch is set to the dispatch table provided by PI - // plugin for platform/device the ctx was created for. - - // Look for GEN binary, which we known can only be handled by Level-Zero now. - const char *BinaryTarget = - UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; // UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; - - uint32_t *SelectedBinaryInd = SelectedBinary; - - // Find the appropriate device image, fallback to spirv if not found - constexpr uint32_t InvalidInd = (std::numeric_limits::max)(); - uint32_t Spirv = InvalidInd; - - for (uint32_t i = 0; i < NumBinaries; ++i) { - if (strcmp(Binaries[i].pDeviceTargetSpec, BinaryTarget) == 0) { - *SelectedBinaryInd = i; - return UR_RESULT_SUCCESS; - } - if (strcmp(Binaries[i].pDeviceTargetSpec, - UR_DEVICE_BINARY_TARGET_SPIRV64) == 0) - Spirv = i; - } - // Points to a spirv image, if such indeed was found - if ((*SelectedBinaryInd = Spirv) != InvalidInd) - return UR_RESULT_SUCCESS; - - // No image can be loaded for the given device - return UR_RESULT_ERROR_INVALID_BINARY; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( - ur_device_handle_t Device, ///< [in] handle of the device. - ur_native_handle_t - *NativeDevice ///< [out] a pointer to the native handle of the device. -) { - *NativeDevice = reinterpret_cast(Device->ZeDevice); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( - ur_native_handle_t NativeDevice, ///< [in] the native handle of the device. - [[maybe_unused]] ur_adapter_handle_t - Adapter, ///< [in] handle of the platform instance - [[maybe_unused]] const ur_device_native_properties_t - *Properties, ///< [in][optional] pointer to native device properties - ///< struct. - ur_device_handle_t - *Device ///< [out] pointer to the handle of the device object created. -) { - auto ZeDevice = ur_cast(NativeDevice); - - // The SYCL spec requires that the set of devices must remain fixed for the - // duration of the application's execution. We assume that we found all of the - // Level Zero devices when we initialized the platforms/devices cache, so the - // "NativeHandle" must already be in the cache. If it is not, this must not be - // a valid Level Zero device. - - ur_device_handle_t Dev = nullptr; - if (const auto *platforms = GlobalAdapter->PlatformCache->get_value()) { - for (const auto &p : *platforms) { - Dev = p->getDeviceFromNativeHandle(ZeDevice); - } - } else { - return GlobalAdapter->PlatformCache->get_error(); - } - - if (Dev == nullptr) - return UR_RESULT_ERROR_INVALID_VALUE; - - *Device = Dev; - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( - ur_device_handle_t Device, ///< [in] handle of the device instance - uint64_t *DeviceTimestamp, ///< [out][optional] pointer to the Device's - ///< global timestamp that correlates with the - ///< Host's global timestamp value - uint64_t *HostTimestamp ///< [out][optional] pointer to the Host's global - ///< timestamp that correlates with the Device's - ///< global timestamp value -) { - const uint64_t &ZeTimerResolution = - Device->ZeDeviceProperties->timerResolution; - const uint64_t TimestampMaxCount = Device->getTimestampMask(); - uint64_t DeviceClockCount, Dummy; - - ZE2UR_CALL(zeDeviceGetGlobalTimestamps, - (Device->ZeDevice, - HostTimestamp == nullptr ? &Dummy : HostTimestamp, - &DeviceClockCount)); - - if (DeviceTimestamp != nullptr) { - *DeviceTimestamp = - (DeviceClockCount & TimestampMaxCount) * ZeTimerResolution; - } - - return UR_RESULT_SUCCESS; -} diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 898edff779..a8b8098819 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/enqueue_native.cpp b/source/adapters/level_zero/enqueue_native.cpp index b67cccc4f1..7c3a1da988 100644 --- a/source/adapters/level_zero/enqueue_native.cpp +++ b/source/adapters/level_zero/enqueue_native.cpp @@ -8,13 +8,30 @@ // //===----------------------------------------------------------------------===// +#include #include +#include -#include "queue.hpp" +namespace ur::level_zero { + +ur_result_t urEnqueueNativeCommandExp( + ur_queue_handle_t hQueue, + ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, + uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, + const ur_exp_enqueue_native_command_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = pfnNativeEnqueue; + std::ignore = data; + std::ignore = numMemsInMemList; + std::ignore = phMemList; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; -ur_result_t ur_queue_handle_legacy_t_::enqueueNativeCommandExp( - ur_exp_enqueue_native_command_function_t, void *, uint32_t, - const ur_mem_handle_t *, const ur_exp_enqueue_native_command_properties_t *, - uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index f4dee0d661..f58db37753 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -18,6 +18,7 @@ #include "common.hpp" #include "event.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" void printZeEventList(const _ur_ze_event_list_t &UrZeEventList) { @@ -46,21 +47,23 @@ static const bool UseMultipleCmdlistBarriers = [] { }(); bool WaitListEmptyOrAllEventsFromSameQueue( - ur_queue_handle_legacy_t Queue, uint32_t NumEventsInWaitList, + ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList) { if (!NumEventsInWaitList) return true; for (uint32_t i = 0; i < NumEventsInWaitList; ++i) { - if (Queue != Legacy(EventWaitList[i]->UrQueue)) + if (Queue != EventWaitList[i]->UrQueue) return false; } return true; } -ur_result_t ur_queue_handle_legacy_t_::enqueueEventsWait( ///< [in] handle of - ///< the queue object +namespace ur::level_zero { + +ur_result_t urEnqueueEventsWait( + ur_queue_handle_t Queue, ///< [in] handle of the queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] @@ -72,7 +75,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueEventsWait( ///< [in] handle of *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; if (EventWaitList) { bool UseCopyEngine = false; @@ -152,9 +154,8 @@ static const bool InOrderBarrierBySignal = [] { return (UrRet ? std::atoi(UrRet) : true); }(); -ur_result_t -ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the - ///< queue object +ur_result_t urEnqueueEventsWaitWithBarrier( + ur_queue_handle_t Queue, ///< [in] handle of the queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] @@ -166,8 +167,6 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; - // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -197,7 +196,9 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the // if (Queue->isInOrderQueue() && InOrderBarrierBySignal && !Queue->isProfilingEnabled()) { - if (EventWaitList.Length) { + // If we are using driver in order lists, then append wait on events + // is unnecessary and we can signal the event created. + if (EventWaitList.Length && !CmdList->second.IsInOrderList) { ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (CmdList->first, EventWaitList.Length, EventWaitList.ZeEventList)); @@ -222,9 +223,8 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the return UR_RESULT_SUCCESS; } - ur_event_handle_t InternalEvent; + ur_event_handle_t ResultEvent = nullptr; bool IsInternal = OutEvent == nullptr; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; // For in-order queue and wait-list which is empty or has events from // the same queue just use the last command event as the barrier event. @@ -234,8 +234,11 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList, EventWaitList) && Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) { - UR_CALL(urEventRetain(Queue->LastCommandEvent)); - *Event = Queue->LastCommandEvent; + UR_CALL(ur::level_zero::urEventRetain(Queue->LastCommandEvent)); + ResultEvent = Queue->LastCommandEvent; + if (OutEvent) { + *OutEvent = ResultEvent; + } return UR_RESULT_SUCCESS; } @@ -265,16 +268,21 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the EventWaitList, OkToBatch)); // Insert the barrier into the command-list and execute. - UR_CALL(insertBarrierIntoCmdList(CmdList, TmpWaitList, *Event, IsInternal)); + UR_CALL(insertBarrierIntoCmdList(CmdList, TmpWaitList, ResultEvent, + IsInternal)); UR_CALL(Queue->executeCommandList(CmdList, false, OkToBatch)); // Because of the dependency between commands in the in-order queue we don't // need to keep track of any active barriers if we have in-order queue. if (UseMultipleCmdlistBarriers && !Queue->isInOrderQueue()) { - auto UREvent = reinterpret_cast(*Event); + auto UREvent = reinterpret_cast(ResultEvent); Queue->ActiveBarriers.add(UREvent); } + + if (OutEvent) { + *OutEvent = ResultEvent; + } return UR_RESULT_SUCCESS; } @@ -304,8 +312,8 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the for (auto &QueueMap : {Queue->ComputeQueueGroupsByTID, Queue->CopyQueueGroupsByTID}) for (auto &QueueGroup : QueueMap) { - bool UseCopyEngine = QueueGroup.second.Type != - ur_queue_handle_legacy_t_::queue_type::Compute; + bool UseCopyEngine = + QueueGroup.second.Type != ur_queue_handle_t_::queue_type::Compute; if (Queue->UsingImmCmdLists) { // If immediate command lists are being used, each will act as their own // queue, so we must insert a barrier into each. @@ -362,20 +370,20 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the // Insert a barrier with the events from each command-queue into the // convergence command list. The resulting event signals the convergence of // all barriers. - UR_CALL(insertBarrierIntoCmdList(ConvergenceCmdList, BaseWaitList, *Event, - IsInternal)); + UR_CALL(insertBarrierIntoCmdList(ConvergenceCmdList, BaseWaitList, + ResultEvent, IsInternal)); } else { // If there is only a single queue then insert a barrier and the single // result event can be used as our active barrier and used as the return // event. Take into account whether output event is discarded or not. - UR_CALL(insertBarrierIntoCmdList(CmdLists[0], _ur_ze_event_list_t{}, *Event, - IsInternal)); + UR_CALL(insertBarrierIntoCmdList(CmdLists[0], _ur_ze_event_list_t{}, + ResultEvent, IsInternal)); } // Execute each command list so the barriers can be encountered. for (ur_command_list_ptr_t &CmdList : CmdLists) { - bool IsCopy = CmdList->second.isCopy( - reinterpret_cast(Queue)); + bool IsCopy = + CmdList->second.isCopy(reinterpret_cast(Queue)); const auto &CommandBatch = (IsCopy) ? Queue->CopyCommandBatch : Queue->ComputeCommandBatch; // Only batch if the matching CmdList is already open. @@ -385,12 +393,14 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the } UR_CALL(Queue->ActiveBarriers.clear()); - auto UREvent = reinterpret_cast(*Event); - Queue->ActiveBarriers.add(UREvent); + Queue->ActiveBarriers.add(ResultEvent); + if (OutEvent) { + *OutEvent = ResultEvent; + } return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( +ur_result_t urEventGetInfo( ur_event_handle_t Event, ///< [in] handle of the event object ur_event_info_t PropName, ///< [in] the name of the event property to query size_t PropValueSize, ///< [in] size in bytes of the event property value @@ -419,7 +429,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( // possible that this is trying to query some event's status that // is part of the batch. This isn't strictly required, but it seems // like a reasonable thing to do. - auto UrQueue = Legacy(Event->UrQueue); + auto UrQueue = Event->UrQueue; if (UrQueue) { // Lock automatically releases when this goes out of scope. std::unique_lock Lock(UrQueue->Mutex, std::try_to_lock); @@ -473,7 +483,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( +ur_result_t urEventGetProfilingInfo( ur_event_handle_t Event, ///< [in] handle of the event object ur_profiling_info_t PropName, ///< [in] the name of the profiling property to query @@ -491,9 +501,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; } - ur_device_handle_t Device = Legacy(Event->UrQueue) - ? Legacy(Event->UrQueue)->Device - : Event->Context->Devices[0]; + ur_device_handle_t Device = + Event->UrQueue ? Event->UrQueue->Device : Event->Context->Devices[0]; uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution; const uint64_t TimestampMaxValue = Device->getTimestampMask(); @@ -517,10 +526,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( return ReturnValue(Event->RecordEventEndTimestamp); // Otherwise we need to collect it from the queue. - auto Entry = Legacy(Event->UrQueue)->EndTimeRecordings.find(Event); + auto Entry = Event->UrQueue->EndTimeRecordings.find(Event); // Unexpected state if there is no end-time record. - if (Entry == Legacy(Event->UrQueue)->EndTimeRecordings.end()) + if (Entry == Event->UrQueue->EndTimeRecordings.end()) return UR_RESULT_ERROR_UNKNOWN; auto &EndTimeRecording = Entry->second; @@ -545,7 +554,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( // anymore, so we cache it on the event and evict the record from the // queue. Event->RecordEventEndTimestamp = ContextEndTime; - Legacy(Event->UrQueue)->EndTimeRecordings.erase(Entry); + Event->UrQueue->EndTimeRecordings.erase(Entry); return ReturnValue(ContextEndTime); } @@ -663,7 +672,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( +ur_result_t urEnqueueTimestampRecordingExp( + ur_queue_handle_t Queue, ///< [in] handle of the queue object bool Blocking, ///< [in] blocking or non-blocking enqueue uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t @@ -677,7 +687,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( *OutEvent ///< [in,out] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -701,12 +710,13 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( (*OutEvent)->WaitList = TmpWaitList; uint64_t DeviceStartTimestamp = 0; - UR_CALL(urDeviceGetGlobalTimestamps(Device, &DeviceStartTimestamp, nullptr)); + UR_CALL(ur::level_zero::urDeviceGetGlobalTimestamps( + Device, &DeviceStartTimestamp, nullptr)); (*OutEvent)->RecordEventStartTimestamp = DeviceStartTimestamp; // Create a new entry in the queue's recordings. Queue->EndTimeRecordings[*OutEvent] = - ur_queue_handle_legacy_t_::end_time_recording{}; + ur_queue_handle_t_::end_time_recording{}; ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp, (CommandList->first, @@ -720,64 +730,15 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( return UR_RESULT_SUCCESS; } -ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( - ze_event_handle_t &ZeHostVisibleEvent) { - auto UrQueue = Legacy(this->UrQueue); - - std::scoped_lock Lock(UrQueue->Mutex, - this->Mutex); - - if (!HostVisibleEvent) { - this->IsCreatingHostProxyEvent = true; - if (UrQueue->ZeEventsScope != OnDemandHostVisibleProxy) - die("getOrCreateHostVisibleEvent: missing host-visible event"); - - // Submit the command(s) signalling the proxy event to the queue. - // We have to first submit a wait for the device-only event for which this - // proxy is created. - // - // Get a new command list to be used on this call - - // We want to batch these commands to avoid extra submissions (costly) - bool OkToBatch = true; - - ur_command_list_ptr_t CommandList{}; - UR_CALL(UrQueue->Context->getAvailableCommandList( - UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch)) - - // Create a "proxy" host-visible event. - UR_CALL(createEventAndAssociateQueue( - UrQueue, &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList, - /* IsInternal */ false, /* IsMultiDevice */ false, - /* HostVisible */ true)); - - if (this->IsInnerBatchedEvent) { - ZE2UR_CALL(zeCommandListAppendBarrier, - (CommandList->first, ZeEvent, 0, nullptr)); - } else { - ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (CommandList->first, 1, &ZeEvent)); - } - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (CommandList->first, HostVisibleEvent->ZeEvent)); - - UR_CALL(UrQueue->executeCommandList(CommandList, false, OkToBatch)) - this->IsCreatingHostProxyEvent = false; - } - - ZeHostVisibleEvent = HostVisibleEvent->ZeEvent; - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEventWait( - uint32_t NumEvents, ///< [in] number of events in the event list - const ur_event_handle_t - *EventWaitList ///< [in][range(0, numEvents)] pointer to a list of - ///< events to wait for completion +ur_result_t +urEventWait(uint32_t NumEvents, ///< [in] number of events in the event list + const ur_event_handle_t + *EventWaitList ///< [in][range(0, numEvents)] pointer to a list + ///< of events to wait for completion ) { for (uint32_t I = 0; I < NumEvents; I++) { auto e = EventWaitList[I]; - auto UrQueue = Legacy(e->UrQueue); + auto UrQueue = e->UrQueue; if (UrQueue && UrQueue->ZeEventsScope == OnDemandHostVisibleProxy) { // Make sure to add all host-visible "proxy" event signals if needed. // This ensures that all signalling commands are submitted below and @@ -795,7 +756,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( // Submit dependent open command lists for execution, if any for (uint32_t I = 0; I < NumEvents; I++) { ur_event_handle_t_ *Event = ur_cast(EventWaitList[I]); - auto UrQueue = Legacy(Event->UrQueue); + auto UrQueue = Event->UrQueue; if (UrQueue) { // Lock automatically releases when this goes out of scope. std::scoped_lock lock(UrQueue->Mutex); @@ -803,7 +764,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( UR_CALL(UrQueue->executeAllOpenCommandLists()); } } - std::unordered_set Queues; + std::unordered_set Queues; for (uint32_t I = 0; I < NumEvents; I++) { { ur_event_handle_t_ *Event = @@ -830,13 +791,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( Event->Completed = true; } } - if (auto Q = Legacy(Event->UrQueue)) { + if (auto Q = Event->UrQueue) { if (Q->UsingImmCmdLists && Q->isInOrderQueue()) // Use information about waited event to cleanup completed events in // the in-order queue. CleanupEventsInImmCmdLists( - Legacy(Event->UrQueue), false /* QueueLocked */, - false /* QueueSynced */, + Event->UrQueue, false /* QueueLocked */, false /* QueueSynced */, reinterpret_cast(Event)); else { // NOTE: we are cleaning up after the event here to free resources @@ -861,8 +821,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventRetain( - ur_event_handle_t Event ///< [in] handle of the event object +ur_result_t +urEventRetain(ur_event_handle_t Event ///< [in] handle of the event object ) { Event->RefCountExternal++; Event->RefCount.increment(); @@ -870,8 +830,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventRelease( - ur_event_handle_t Event ///< [in] handle of the event object +ur_result_t +urEventRelease(ur_event_handle_t Event ///< [in] handle of the event object ) { Event->RefCountExternal--; UR_CALL(urEventReleaseInternal(Event)); @@ -879,7 +839,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( +ur_result_t urEventGetNativeHandle( ur_event_handle_t Event, ///< [in] handle of the event. ur_native_handle_t *NativeEvent ///< [out] a pointer to the native handle of the event. @@ -892,7 +852,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( // Event can potentially be in an open command-list, make sure that // it is submitted for execution to avoid potential deadlock if // interop app is going to wait for it. - auto Queue = Legacy(Event->UrQueue); + auto Queue = Event->UrQueue; if (Queue) { std::scoped_lock lock(Queue->Mutex); const auto &OpenCommandList = Queue->eventOpenCommandList(Event); @@ -904,7 +864,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate( +ur_result_t urExtEventCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_event_handle_t *Event ///< [out] pointer to the handle of the event object created. @@ -917,7 +877,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( +ur_result_t urEventCreateWithNativeHandle( ur_native_handle_t NativeEvent, ///< [in] the native handle of the event. ur_context_handle_t Context, ///< [in] handle of the context object const ur_event_native_properties_t *Properties, @@ -967,7 +927,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback( +ur_result_t urEventSetCallback( ur_event_handle_t Event, ///< [in] handle of the event object ur_execution_info_t ExecStatus, ///< [in] execution status of the event ur_event_callback_t Notify, ///< [in] execution status of the event @@ -983,6 +943,57 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero + +ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( + ze_event_handle_t &ZeHostVisibleEvent) { + auto UrQueue = this->UrQueue; + + std::scoped_lock Lock(UrQueue->Mutex, + this->Mutex); + + if (!HostVisibleEvent) { + this->IsCreatingHostProxyEvent = true; + if (UrQueue->ZeEventsScope != OnDemandHostVisibleProxy) + die("getOrCreateHostVisibleEvent: missing host-visible event"); + + // Submit the command(s) signalling the proxy event to the queue. + // We have to first submit a wait for the device-only event for which this + // proxy is created. + // + // Get a new command list to be used on this call + + // We want to batch these commands to avoid extra submissions (costly) + bool OkToBatch = true; + + ur_command_list_ptr_t CommandList{}; + UR_CALL(UrQueue->Context->getAvailableCommandList( + UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch)) + + // Create a "proxy" host-visible event. + UR_CALL(createEventAndAssociateQueue( + UrQueue, &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList, + /* IsInternal */ false, /* IsMultiDevice */ false, + /* HostVisible */ true)); + + if (this->IsInnerBatchedEvent) { + ZE2UR_CALL(zeCommandListAppendBarrier, + (CommandList->first, ZeEvent, 0, nullptr)); + } else { + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (CommandList->first, 1, &ZeEvent)); + } + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (CommandList->first, HostVisibleEvent->ZeEvent)); + + UR_CALL(UrQueue->executeCommandList(CommandList, false, OkToBatch)) + this->IsCreatingHostProxyEvent = false; + } + + ZeHostVisibleEvent = HostVisibleEvent->ZeEvent; + return UR_RESULT_SUCCESS; +} + ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { if (!Event->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -1022,7 +1033,7 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { } // Save pointer to the queue before deleting/resetting event. - auto Queue = Legacy(Event->UrQueue); + auto Queue = Event->UrQueue; // If the event was a timestamp recording, we try to evict its entry in the // queue. @@ -1099,7 +1110,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, ur_kernel_handle_t AssociatedKernel = nullptr; // List of dependent events. std::list EventsToBeReleased; - ur_queue_handle_legacy_t AssociatedQueue = nullptr; + ur_queue_handle_t AssociatedQueue = nullptr; { // If the Event is already locked, then continue with the cleanup, otherwise // block on locking the event. @@ -1113,7 +1124,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, if (Event->CleanedUp) return UR_RESULT_SUCCESS; - AssociatedQueue = Legacy(Event->UrQueue); + AssociatedQueue = Event->UrQueue; // Remember the kernel associated with this event if there is one. We are // going to release it later. @@ -1158,7 +1169,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, // We've reset event data members above, now cleanup resources. if (AssociatedKernel) { ReleaseIndirectMem(AssociatedKernel); - UR_CALL(urKernelRelease(AssociatedKernel)); + UR_CALL(ur::level_zero::urKernelRelease(AssociatedKernel)); } if (AssociatedQueue) { @@ -1217,7 +1228,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, } if (DepEventKernel) { ReleaseIndirectMem(DepEventKernel); - UR_CALL(urKernelRelease(DepEventKernel)); + UR_CALL(ur::level_zero::urKernelRelease(DepEventKernel)); } UR_CALL(urEventReleaseInternal(DepEvent)); } @@ -1230,9 +1241,9 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, // The "HostVisible" argument specifies if event needs to be allocated from // a host-visible pool. // -ur_result_t EventCreate(ur_context_handle_t Context, - ur_queue_handle_legacy_t Queue, bool IsMultiDevice, - bool HostVisible, ur_event_handle_t *RetEvent, +ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, + bool IsMultiDevice, bool HostVisible, + ur_event_handle_t *RetEvent, bool CounterBasedEventEnabled, bool ForceDisableProfiling) { bool ProfilingEnabled = @@ -1319,7 +1330,7 @@ ur_result_t ur_event_handle_t_::reset() { ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( uint32_t EventListLength, const ur_event_handle_t *EventList, - ur_queue_handle_legacy_t CurQueue, bool UseCopyEngine) { + ur_queue_handle_t CurQueue, bool UseCopyEngine) { this->Length = 0; this->ZeEventList = nullptr; this->UrEventList = nullptr; @@ -1435,7 +1446,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( } } - auto Queue = Legacy(EventList[I]->UrQueue); + auto Queue = EventList[I]->UrQueue; auto CurQueueDevice = CurQueue->Device; std::optional> QueueLock = @@ -1508,8 +1519,8 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( std::shared_lock Lock(EventList[I]->Mutex); - ur_device_handle_t QueueRootDevice; - ur_device_handle_t CurrentQueueRootDevice; + ur_device_handle_t QueueRootDevice = nullptr; + ur_device_handle_t CurrentQueueRootDevice = nullptr; if (Queue) { QueueRootDevice = Queue->Device; CurrentQueueRootDevice = CurQueueDevice; @@ -1537,8 +1548,13 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (ZeCommandList, 1u, &EventList[I]->ZeEvent)); - if (!MultiDeviceEvent->CounterBasedEventsEnabled) + if (!MultiDeviceEvent->CounterBasedEventsEnabled) { ZE2UR_CALL(zeEventHostSignal, (MultiDeviceZeEvent)); + } else { + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (ZeCommandList, MultiDeviceZeEvent)); + } + MultiDeviceEvent->Completed = true; UR_CALL(Queue->executeCommandList(CommandList, /* IsBlocking */ false, /* OkToBatchCommand */ true)); @@ -1636,7 +1652,7 @@ ur_result_t _ur_ze_event_list_t::collectEventsForReleaseAndDestroyUrZeEventList( // Tells if this event is with profiling capabilities. bool ur_event_handle_t_::isProfilingEnabled() const { return !UrQueue || // tentatively assume user events are profiling enabled - (Legacy(UrQueue)->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0; + (UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0; } // Tells if this event was created as a timestamp event, allowing profiling diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index e99df2a272..7dd64acdaa 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include @@ -29,9 +29,9 @@ extern "C" { ur_result_t urEventReleaseInternal(ur_event_handle_t Event); -ur_result_t EventCreate(ur_context_handle_t Context, - ur_queue_handle_legacy_t Queue, bool IsMultiDevice, - bool HostVisible, ur_event_handle_t *RetEvent, +ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, + bool IsMultiDevice, bool HostVisible, + ur_event_handle_t *RetEvent, bool CounterBasedEventEnabled = false, bool ForceDisableProfiling = false); } // extern "C" @@ -89,7 +89,7 @@ struct _ur_ze_event_list_t { // command-lists. ur_result_t createAndRetainUrZeEventList(uint32_t EventListLength, const ur_event_handle_t *EventList, - ur_queue_handle_legacy_t CurQueue, + ur_queue_handle_t CurQueue, bool UseCopyEngine); // Add all the events in this object's UrEventList to the end diff --git a/source/adapters/level_zero/helpers/memory_helpers.cpp b/source/adapters/level_zero/helpers/memory_helpers.cpp new file mode 100644 index 0000000000..aea32795ab --- /dev/null +++ b/source/adapters/level_zero/helpers/memory_helpers.cpp @@ -0,0 +1,33 @@ +//===--------- memory_helpers.cpp - Level Zero Adapter -------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "memory_helpers.hpp" +#include "../common.hpp" + +ze_memory_type_t getMemoryType(ze_context_handle_t hContext, void *ptr) { + // TODO: use UMF once + // https://github.com/oneapi-src/unified-memory-framework/issues/687 is + // implemented + ZeStruct zeMemoryAllocationProperties; + ZE2UR_CALL_THROWS(zeMemGetAllocProperties, + (hContext, ptr, &zeMemoryAllocationProperties, nullptr)); + return zeMemoryAllocationProperties.type; +} + +bool maybeImportUSM(ze_driver_handle_t hTranslatedDriver, + ze_context_handle_t hContext, void *ptr, size_t size) { + if (ZeUSMImport.Enabled && ptr != nullptr && + getMemoryType(hContext, ptr) == ZE_MEMORY_TYPE_UNKNOWN) { + // Promote the host ptr to USM host memory + ZeUSMImport.doZeUSMImport(hTranslatedDriver, ptr, size); + return true; + } + return false; +} diff --git a/source/adapters/level_zero/helpers/memory_helpers.hpp b/source/adapters/level_zero/helpers/memory_helpers.hpp new file mode 100644 index 0000000000..ad50be992c --- /dev/null +++ b/source/adapters/level_zero/helpers/memory_helpers.hpp @@ -0,0 +1,23 @@ +//===--------- memory_helpers.hpp - Level Zero Adapter -------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include +#include + +// If USM Import feature is enabled and hostptr is supplied, +// import the hostptr if not already imported into USM. +// Data transfer rate is maximized when both source and destination +// are USM pointers. Promotion of the host pointer to USM thus +// optimizes data transfer performance. +bool maybeImportUSM(ze_driver_handle_t hTranslatedDriver, + ze_context_handle_t hContext, void *ptr, size_t size); + +ze_memory_type_t getMemoryType(ze_context_handle_t hContext, void *ptr); diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp index f68b2d93be..a717597623 100644 --- a/source/adapters/level_zero/image.cpp +++ b/source/adapters/level_zero/image.cpp @@ -14,7 +14,9 @@ #include "event.hpp" #include "logger/ur_logger.hpp" #include "sampler.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" +#include "ze_api.h" typedef ze_result_t(ZE_APICALL *zeImageGetDeviceOffsetExp_pfn)( ze_image_handle_t hImage, uint64_t *pDeviceOffset); @@ -444,7 +446,8 @@ ur_result_t bindlessImagesCreateImpl(ur_context_handle_t hContext, ze_image_handle_t ZeImage; ze_memory_allocation_properties_t MemAllocProperties{ - ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES}; + ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES, nullptr, + ZE_MEMORY_TYPE_UNKNOWN, 0, 0}; ZE2UR_CALL(zeMemGetAllocProperties, (hContext->ZeContext, reinterpret_cast(hImageMem), &MemAllocProperties, nullptr)); @@ -631,11 +634,14 @@ getImageFormatTypeAndSize(const ur_image_format_t *ImageFormat) { return {ZeImageFormatType, ZeImageFormatTypeSize}; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, - size_t widthInBytes, size_t height, size_t elementSizeBytes, void **ppMem, - size_t *pResultPitch) { +namespace ur::level_zero { + +ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t widthInBytes, + size_t height, size_t elementSizeBytes, + void **ppMem, size_t *pResultPitch) { std::shared_lock Lock(hContext->Mutex); UR_ASSERT(hContext && hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -668,13 +674,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( *pResultPitch = RowPitch; size_t Size = height * RowPitch; - UR_CALL(urUSMDeviceAlloc(hContext, hDevice, pUSMDesc, pool, Size, ppMem)); + UR_CALL(ur::level_zero::urUSMDeviceAlloc(hContext, hDevice, pUSMDesc, pool, + Size, ppMem)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urBindlessImagesUnsampledImageHandleDestroyExp( +ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_native_handle_t hImage) { UR_ASSERT(hContext && hDevice && hImage, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -691,17 +697,16 @@ urBindlessImagesUnsampledImageHandleDestroyExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urBindlessImagesSampledImageHandleDestroyExp( +ur_result_t urBindlessImagesSampledImageHandleDestroyExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_native_handle_t hImage) { // Sampled image is a combination of unsampled image and sampler. // Sampler is released in urSamplerRelease. - return urBindlessImagesUnsampledImageHandleDestroyExp(hContext, hDevice, - hImage); + return ur::level_zero::urBindlessImagesUnsampledImageHandleDestroyExp( + hContext, hDevice, hImage); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( +ur_result_t urBindlessImagesImageAllocateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_image_mem_native_handle_t *phImageMem) { @@ -730,16 +735,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hImageMem) { +ur_result_t +urBindlessImagesImageFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem) { std::ignore = hContext; std::ignore = hDevice; - UR_CALL(urMemRelease(reinterpret_cast(hImageMem))); + UR_CALL(ur::level_zero::urMemRelease( + reinterpret_cast(hImageMem))); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( +ur_result_t urBindlessImagesUnsampledImageCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -749,7 +756,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( +ur_result_t urBindlessImagesSampledImageCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -759,8 +766,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp( - [[maybe_unused]] const void *pSrc, [[maybe_unused]] void *pDst, +ur_result_t urBindlessImagesImageCopyExp( + ur_queue_handle_t hQueue, [[maybe_unused]] const void *pSrc, + [[maybe_unused]] void *pDst, [[maybe_unused]] const ur_image_desc_t *pSrcImageDesc, [[maybe_unused]] const ur_image_desc_t *pDstImageDesc, [[maybe_unused]] const ur_image_format_t *pSrcImageFormat, @@ -770,7 +778,6 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp( [[maybe_unused]] uint32_t numEventsInWaitList, [[maybe_unused]] const ur_event_handle_t *phEventWaitList, [[maybe_unused]] ur_event_handle_t *phEvent) { - auto hQueue = this; std::scoped_lock Lock(hQueue->Mutex); UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -920,7 +927,7 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( +ur_result_t urBindlessImagesImageGetInfoExp( ur_context_handle_t, ur_exp_image_mem_native_handle_t hImageMem, ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet) { UR_ASSERT(hImageMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -970,7 +977,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( } } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( +ur_result_t urBindlessImagesMipmapGetLevelExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel, ur_exp_image_mem_native_handle_t *phImageMem) { @@ -984,13 +991,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hMem) { - return urBindlessImagesImageFreeExp(hContext, hDevice, hMem); +ur_result_t +urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hMem) { + return ur::level_zero::urBindlessImagesImageFreeExp(hContext, hDevice, hMem); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( +ur_result_t urBindlessImagesImportExternalMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, ur_exp_external_mem_type_t memHandleType, ur_exp_external_mem_desc_t *pExternalMemDesc, @@ -1050,7 +1058,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( +ur_result_t urBindlessImagesMapExternalArrayExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_external_mem_handle_t hExternalMem, @@ -1085,7 +1093,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp( +ur_result_t urBindlessImagesMapExternalLinearMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, uint64_t offset, uint64_t size, ur_exp_external_mem_handle_t hExternalMem, void **phRetMem) { std::ignore = hContext; @@ -1099,7 +1107,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( +ur_result_t urBindlessImagesReleaseExternalMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_mem_handle_t hExternalMem) { @@ -1109,7 +1117,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( struct ur_ze_external_memory_data *externalMemoryData = reinterpret_cast(hExternalMem); - UR_CALL(urMemRelease(externalMemoryData->urMemoryHandle)); + UR_CALL(ur::level_zero::urMemRelease(externalMemoryData->urMemoryHandle)); switch (externalMemoryData->type) { case UR_ZE_EXTERNAL_OPAQUE_FD: @@ -1129,7 +1137,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( +ur_result_t urBindlessImagesImportExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_semaphore_type_t semHandleType, ur_exp_external_semaphore_desc_t *pExternalSemaphoreDesc, @@ -1144,7 +1152,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp( +ur_result_t urBindlessImagesReleaseExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_semaphore_handle_t hExternalSemaphore) { std::ignore = hContext; @@ -1155,10 +1163,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t ur_queue_handle_legacy_t_::bindlessImagesWaitExternalSemaphoreExp( - ur_exp_external_semaphore_handle_t hSemaphore, bool hasValue, - uint64_t waitValue, uint32_t numEventsInWaitList, +ur_result_t urBindlessImagesWaitExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, + bool hasValue, uint64_t waitValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hQueue; std::ignore = hSemaphore; std::ignore = hasValue; std::ignore = waitValue; @@ -1170,10 +1179,11 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesWaitExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t ur_queue_handle_legacy_t_::bindlessImagesSignalExternalSemaphoreExp( - ur_exp_external_semaphore_handle_t hSemaphore, bool hasValue, - uint64_t signalValue, uint32_t numEventsInWaitList, +ur_result_t urBindlessImagesSignalExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, + bool hasValue, uint64_t signalValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hQueue; std::ignore = hSemaphore; std::ignore = hasValue; std::ignore = signalValue; @@ -1184,3 +1194,5 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesSignalExternalSemaphoreExp( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/image.hpp b/source/adapters/level_zero/image.hpp index 618258601d..43f37fa757 100644 --- a/source/adapters/level_zero/image.hpp +++ b/source/adapters/level_zero/image.hpp @@ -10,7 +10,7 @@ #pragma once #include -#include +#include #include #include diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index 3469620b71..9c638d53f6 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -11,11 +11,29 @@ #include "kernel.hpp" #include "logger/ur_logger.hpp" #include "ur_api.h" -#include "ur_level_zero.hpp" +#include "ur_interface_loader.hpp" #include "helpers/kernel_helpers.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( +ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel, + ze_kernel_handle_t *phZeKernel) { + if (hKernel->ZeKernelMap.empty()) { + *phZeKernel = hKernel->ZeKernel; + } else { + auto It = hKernel->ZeKernelMap.find(hDevice); + if (It == hKernel->ZeKernelMap.end()) { + /* kernel and queue don't match */ + return UR_RESULT_ERROR_INVALID_QUEUE; + } + *phZeKernel = It->second; + } + + return UR_RESULT_SUCCESS; +} + +namespace ur::level_zero { + +ur_result_t urKernelGetSuggestedLocalWorkSize( ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t workDim, [[maybe_unused]] const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, size_t *pSuggestedLocalWorkSize) { @@ -29,32 +47,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( std::copy(pGlobalWorkSize, pGlobalWorkSize + workDim, GlobalWorkSize3D); ze_kernel_handle_t ZeKernel{}; - UR_CALL(getZeKernel(Legacy(hQueue)->Device->ZeDevice, hKernel, &ZeKernel)); + UR_CALL(getZeKernel(hQueue->Device->ZeDevice, hKernel, &ZeKernel)); - UR_CALL(getSuggestedLocalWorkSize(Legacy(hQueue)->Device, ZeKernel, - GlobalWorkSize3D, LocalWorkSize)); + UR_CALL(getSuggestedLocalWorkSize(hQueue->Device, ZeKernel, GlobalWorkSize3D, + LocalWorkSize)); std::copy(LocalWorkSize, LocalWorkSize + workDim, pSuggestedLocalWorkSize); return UR_RESULT_SUCCESS; } -ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel, - ze_kernel_handle_t *phZeKernel) { - if (hKernel->ZeKernelMap.empty()) { - *phZeKernel = hKernel->ZeKernel; - } else { - auto It = hKernel->ZeKernelMap.find(hDevice); - if (It == hKernel->ZeKernelMap.end()) { - /* kernel and queue don't match */ - return UR_RESULT_ERROR_INVALID_QUEUE; - } - *phZeKernel = It->second; - } - - return UR_RESULT_SUCCESS; -} - -ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( +ur_result_t urEnqueueKernelLaunch( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify ///< the global and work-group work-items @@ -86,7 +89,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( UR_ASSERT(WorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); UR_ASSERT(WorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - auto Queue = this; ze_kernel_handle_t ZeKernel{}; UR_CALL(getZeKernel(Queue->Device->ZeDevice, Kernel, &ZeKernel)); @@ -158,7 +160,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( // is in use. Once the event has been signalled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. - UR_CALL(urKernelRetain(Kernel)); + UR_CALL(ur::level_zero::urKernelRetain(Kernel)); // Add to list of kernels to be submitted if (IndirectAccessTrackingEnabled) @@ -204,7 +206,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp( +ur_result_t urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify ///< the global and work-group work-items @@ -236,7 +239,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp( UR_ASSERT(WorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); UR_ASSERT(WorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - auto Queue = this; auto ZeDevice = Queue->Device->ZeDevice; ze_kernel_handle_t ZeKernel{}; @@ -422,7 +424,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp( // is in use. Once the event has been signalled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. - UR_CALL(urKernelRetain(Kernel)); + UR_CALL(ur::level_zero::urKernelRetain(Kernel)); // Add to list of kernels to be submitted if (IndirectAccessTrackingEnabled) @@ -468,7 +470,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite( +ur_result_t urEnqueueDeviceGlobalVariableWrite( + ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. ur_program_handle_t Program, ///< [in] handle of the program containing the ///< device global variable. const char @@ -489,14 +492,21 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite( *Event ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - auto Queue = this; std::scoped_lock lock(Queue->Mutex); + ze_module_handle_t ZeModule{}; + auto It = Program->ZeModuleMap.find(Queue->Device->ZeDevice); + if (It != Program->ZeModuleMap.end()) { + ZeModule = It->second; + } else { + ZeModule = Program->ZeModule; + } + // Find global variable pointer size_t GlobalVarSize = 0; void *GlobalVarPtr = nullptr; ZE2UR_CALL(zeModuleGetGlobalPointer, - (Program->ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); + (ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); if (GlobalVarSize < Offset + Count) { setErrorMessage("Write device global variable is out of range.", UR_RESULT_ERROR_INVALID_VALUE, @@ -522,29 +532,28 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite( EventWaitList, Event, PreferCopyEngine); } -ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableRead( - ur_program_handle_t Program, ///< [in] handle of the program containing - ///< the device global variable. - const char *Name, ///< [in] the unique identifier for the device global - ///< variable. +ur_result_t urEnqueueDeviceGlobalVariableRead( + ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. + ur_program_handle_t Program, ///< [in] handle of the program containing the + ///< device global variable. + const char + *Name, ///< [in] the unique identifier for the device global variable. bool BlockingRead, ///< [in] indicates if this operation should block. size_t Count, ///< [in] the number of bytes to copy. - size_t Offset, ///< [in] the byte offset into the device global variable - ///< to start copying. - void *Dst, ///< [in] pointer to where the data must be copied to. + size_t Offset, ///< [in] the byte offset into the device global variable to + ///< start copying. + void *Dst, ///< [in] pointer to where the data must be copied to. uint32_t NumEventsInWaitList, ///< [in] size of the event wait list. const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be - ///< complete before the kernel execution. If - ///< nullptr, the numEventsInWaitList must be 0, - ///< indicating that no wait event. + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating that no + ///< wait event. ur_event_handle_t - *Event ///< [in,out][optional] return an event object that - ///< identifies this particular kernel execution instance. + *Event ///< [in,out][optional] return an event object that identifies + ///< this particular kernel execution instance. ) { - auto Queue = this; - std::scoped_lock lock(Queue->Mutex); // Find global variable pointer @@ -577,7 +586,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableRead( EventWaitList, Event, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate( +ur_result_t urKernelCreate( ur_program_handle_t Program, ///< [in] handle of the program instance const char *KernelName, ///< [in] pointer to null-terminated string. ur_kernel_handle_t @@ -640,7 +649,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( +ur_result_t urKernelSetArgValue( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] size_t ArgSize, ///< [in] size of argument type @@ -690,7 +699,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( return ze2urResult(ZeResult); } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal( +ur_result_t urKernelSetArgLocal( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] size_t ArgSize, ///< [in] size of the local buffer to be allocated by the @@ -700,12 +709,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal( ) { std::ignore = Properties; - UR_CALL(urKernelSetArgValue(Kernel, ArgIndex, ArgSize, nullptr, nullptr)); + UR_CALL(ur::level_zero::urKernelSetArgValue(Kernel, ArgIndex, ArgSize, + nullptr, nullptr)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo( +ur_result_t urKernelGetInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object ur_kernel_info_t ParamName, ///< [in] name of the Kernel property to query size_t PropSize, ///< [in] the size of the Kernel property value. @@ -767,7 +777,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo( +ur_result_t urKernelGetGroupInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object ur_device_handle_t Device, ///< [in] handle of the Device object ur_kernel_group_info_t @@ -848,7 +858,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo( +ur_result_t urKernelGetSubGroupInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object ur_device_handle_t Device, ///< [in] handle of the Device object ur_kernel_sub_group_info_t @@ -879,7 +889,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain( +ur_result_t urKernelRetain( ur_kernel_handle_t Kernel ///< [in] handle for the Kernel to retain ) { Kernel->RefCount.increment(); @@ -887,7 +897,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( +ur_result_t urKernelRelease( ur_kernel_handle_t Kernel ///< [in] handle for the Kernel to release ) { if (!Kernel->RefCount.decrementAndTest()) @@ -904,7 +914,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( } Kernel->ZeKernelMap.clear(); if (IndirectAccessTrackingEnabled) { - UR_CALL(urContextRelease(KernelProgram->Context)); + UR_CALL(ur::level_zero::urContextRelease(KernelProgram->Context)); } // do a release on the program this kernel was part of without delete of the // program handle @@ -915,7 +925,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( +ur_result_t urKernelSetArgPointer( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_pointer_properties_t @@ -927,12 +937,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( std::ignore = Properties; // KernelSetArgValue is expecting a pointer to the argument - UR_CALL(urKernelSetArgValue(Kernel, ArgIndex, sizeof(const void *), nullptr, - &ArgValue)); + UR_CALL(ur::level_zero::urKernelSetArgValue( + Kernel, ArgIndex, sizeof(const void *), nullptr, &ArgValue)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( +ur_result_t urKernelSetExecInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object ur_kernel_exec_info_t PropName, ///< [in] name of the execution attribute size_t PropSize, ///< [in] size in byte the attribute value @@ -978,7 +988,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( +ur_result_t urKernelSetArgSampler( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_sampler_properties_t @@ -996,7 +1006,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( +ur_result_t urKernelSetArgMemObj( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_mem_obj_properties_t @@ -1038,7 +1048,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( +ur_result_t urKernelGetNativeHandle( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel. ur_native_handle_t *NativeKernel ///< [out] a pointer to the native handle of the kernel. @@ -1049,7 +1059,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( +ur_result_t urKernelSuggestMaxCooperativeGroupCountExp( ur_kernel_handle_t hKernel, size_t localWorkSize, size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) { (void)localWorkSize; @@ -1062,7 +1072,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( +ur_result_t urKernelCreateWithNativeHandle( ur_native_handle_t NativeKernel, ///< [in] the native handle of the kernel. ur_context_handle_t Context, ///< [in] handle of the context object ur_program_handle_t Program, @@ -1098,13 +1108,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( return UR_RESULT_SUCCESS; } +ur_result_t urKernelSetSpecializationConstants( + ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object + uint32_t Count, ///< [in] the number of elements in the pSpecConstants array + const ur_specialization_constant_info_t + *SpecConstants ///< [in] array of specialization constant value + ///< descriptions +) { + std::ignore = Kernel; + std::ignore = Count; + std::ignore = SpecConstants; + logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +} // namespace ur::level_zero + ur_result_t ur_kernel_handle_t_::initialize() { // Retain the program and context to show it's used by this kernel. - UR_CALL(urProgramRetain(Program)); + UR_CALL(ur::level_zero::urProgramRetain(Program)); if (IndirectAccessTrackingEnabled) // TODO: do piContextRetain without the guard - UR_CALL(urContextRetain(Program->Context)); + UR_CALL(ur::level_zero::urContextRetain(Program->Context)); // Set up how to obtain kernel properties when needed. ZeKernelProperties.Compute = [this](ze_kernel_properties_t &Properties) { @@ -1123,36 +1150,3 @@ ur_result_t ur_kernel_handle_t_::initialize() { return UR_RESULT_SUCCESS; } - -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetSpecializationConstants( - ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object - uint32_t Count, ///< [in] the number of elements in the pSpecConstants array - const ur_specialization_constant_info_t - *SpecConstants ///< [in] array of specialization constant value - ///< descriptions -) { - std::ignore = Kernel; - std::ignore = Count; - std::ignore = SpecConstants; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunchCustomExp( - ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList, - const ur_exp_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - std::ignore = hKernel; - std::ignore = workDim; - std::ignore = pGlobalWorkSize; - std::ignore = pLocalWorkSize; - std::ignore = numPropsInLaunchPropList; - std::ignore = launchPropList; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 585a10ef4f..69edf83a78 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -15,9 +15,11 @@ #include "context.hpp" #include "event.hpp" +#include "helpers/memory_helpers.hpp" #include "image.hpp" #include "logger/ur_logger.hpp" #include "queue.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" // Default to using compute engine for fill operation, but allow to @@ -59,7 +61,7 @@ bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr) { // PI interfaces must have queue's and destination buffer's mutexes locked for // exclusive use and source buffer's mutex locked for shared use on entry. ur_result_t enqueueMemCopyHelper(ur_command_t CommandType, - ur_queue_handle_legacy_t Queue, void *Dst, + ur_queue_handle_t Queue, void *Dst, ur_bool_t BlockingWrite, size_t Size, const void *Src, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, @@ -112,13 +114,12 @@ ur_result_t enqueueMemCopyHelper(ur_command_t CommandType, // PI interfaces must have queue's and destination buffer's mutexes locked for // exclusive use and source buffer's mutex locked for shared use on entry. ur_result_t enqueueMemCopyRectHelper( - ur_command_t CommandType, ur_queue_handle_legacy_t Queue, - const void *SrcBuffer, void *DstBuffer, ur_rect_offset_t SrcOrigin, - ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch, - size_t DstRowPitch, size_t SrcSlicePitch, size_t DstSlicePitch, - ur_bool_t Blocking, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent, - bool PreferCopyEngine) { + ur_command_t CommandType, ur_queue_handle_t Queue, const void *SrcBuffer, + void *DstBuffer, ur_rect_offset_t SrcOrigin, ur_rect_offset_t DstOrigin, + ur_rect_region_t Region, size_t SrcRowPitch, size_t DstRowPitch, + size_t SrcSlicePitch, size_t DstSlicePitch, ur_bool_t Blocking, + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_event_handle_t *OutEvent, bool PreferCopyEngine) { bool UseCopyEngine = Queue->useCopyEngine(PreferCopyEngine); _ur_ze_event_list_t TmpWaitList; @@ -198,9 +199,9 @@ ur_result_t enqueueMemCopyRectHelper( // PI interfaces must have queue's and buffer's mutexes locked on entry. static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, - ur_queue_handle_legacy_t Queue, - void *Ptr, const void *Pattern, - size_t PatternSize, size_t Size, + ur_queue_handle_t Queue, void *Ptr, + const void *Pattern, size_t PatternSize, + size_t Size, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { @@ -315,7 +316,7 @@ static ur_result_t ZeHostMemAllocHelper(void **ResultPtr, // indirect access, that is why explicitly retain context to be sure // that it is released after all memory allocations in this context are // released. - UR_CALL(urContextRetain(UrContext)); + UR_CALL(ur::level_zero::urContextRetain(UrContext)); } ZeStruct ZeDesc; @@ -337,7 +338,7 @@ static ur_result_t ZeHostMemAllocHelper(void **ResultPtr, // PI interfaces must have queue's and destination image's mutexes locked for // exclusive use and source image's mutex locked for shared use on entry. static ur_result_t enqueueMemImageCommandHelper( - ur_command_t CommandType, ur_queue_handle_legacy_t Queue, + ur_command_t CommandType, ur_queue_handle_t Queue, const void *Src, // image or ptr void *Dst, // image or ptr ur_bool_t IsBlocking, ur_rect_offset_t *SrcOrigin, @@ -474,7 +475,10 @@ static ur_result_t enqueueMemImageCommandHelper( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferRead( +namespace ur::level_zero { + +ur_result_t urEnqueueMemBufferRead( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -492,7 +496,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferRead( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; ur_mem_handle_t_ *Src = ur_cast(hBuffer); std::shared_lock SrcLock(Src->Mutex, std::defer_lock); @@ -508,7 +511,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferRead( true /* PreferCopyEngine */); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWrite( +ur_result_t urEnqueueMemBufferWrite( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) @@ -528,7 +532,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWrite( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); std::scoped_lock Lock(Queue->Mutex, @@ -545,7 +548,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWrite( true /* PreferCopyEngine */); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferReadRect( +ur_result_t urEnqueueMemBufferReadRect( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer @@ -573,7 +577,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferReadRect( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); std::shared_lock SrcLock(Buffer->Mutex, std::defer_lock); @@ -590,7 +593,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferReadRect( phEvent); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWriteRect( +ur_result_t urEnqueueMemBufferWriteRect( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) @@ -620,7 +624,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWriteRect( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); std::scoped_lock Lock(Queue->Mutex, @@ -637,7 +640,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWriteRect( phEventWaitList, phEvent); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferCopy( +ur_result_t urEnqueueMemBufferCopy( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t BufferSrc, ///< [in] handle of the src buffer object ur_mem_handle_t BufferDst, ///< [in] handle of the dest buffer object size_t SrcOffset, ///< [in] offset into hBufferSrc to begin copying from @@ -655,7 +659,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferCopy( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; _ur_buffer *SrcBuffer = ur_cast<_ur_buffer *>(BufferSrc); _ur_buffer *DstBuffer = ur_cast<_ur_buffer *>(BufferDst); @@ -688,9 +691,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferCopy( OutEvent, PreferCopyEngine); } -ur_result_t -ur_queue_handle_legacy_t_::enqueueMemBufferCopyRect( ///< [in] handle of the - ///< queue object +ur_result_t urEnqueueMemBufferCopyRect( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t BufferSrc, ///< [in] handle of the source buffer object ur_mem_handle_t BufferDst, ///< [in] handle of the dest buffer object ur_rect_offset_t SrcOrigin, ///< [in] 3D offset in the source buffer @@ -717,7 +719,6 @@ ur_queue_handle_legacy_t_::enqueueMemBufferCopyRect( ///< [in] handle of the *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; _ur_buffer *SrcBuffer = ur_cast<_ur_buffer *>(BufferSrc); _ur_buffer *DstBuffer = ur_cast<_ur_buffer *>(BufferDst); @@ -748,11 +749,12 @@ ur_queue_handle_legacy_t_::enqueueMemBufferCopyRect( ///< [in] handle of the NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferFill( - ur_mem_handle_t Buffer, ///< [in] handle of the buffer object - const void *Pattern, ///< [in] pointer to the fill pattern - size_t PatternSize, ///< [in] size in bytes of the pattern - size_t Offset, ///< [in] offset into the buffer +ur_result_t urEnqueueMemBufferFill( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + ur_mem_handle_t Buffer, ///< [in] handle of the buffer object + const void *Pattern, ///< [in] pointer to the fill pattern + size_t PatternSize, ///< [in] size in bytes of the pattern + size_t Offset, ///< [in] offset into the buffer size_t Size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t @@ -766,7 +768,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferFill( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Buffer->Mutex); @@ -781,8 +782,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferFill( Size, NumEventsInWaitList, EventWaitList, OutEvent); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageRead( - ur_mem_handle_t Image, ///< [in] handle of the image object +ur_result_t urEnqueueMemImageRead( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + ur_mem_handle_t Image, ///< [in] handle of the image object bool BlockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t Origin, ///< [in] defines the (x,y,z) offset in pixels in ///< the 1D, 2D, or 3D image @@ -803,7 +805,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageRead( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Image->Mutex); return enqueueMemImageCommandHelper( @@ -812,8 +813,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageRead( EventWaitList, OutEvent); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageWrite( - ur_mem_handle_t Image, ///< [in] handle of the image object +ur_result_t urEnqueueMemImageWrite( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + ur_mem_handle_t Image, ///< [in] handle of the image object bool BlockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t Origin, ///< [in] defines the (x,y,z) offset in pixels in @@ -835,7 +837,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageWrite( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Image->Mutex); return enqueueMemImageCommandHelper( @@ -844,9 +845,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageWrite( EventWaitList, OutEvent); } -ur_result_t -ur_queue_handle_legacy_t_::enqueueMemImageCopy( ///< [in] handle of - ///< the queue object +ur_result_t urEnqueueMemImageCopy( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t ImageSrc, ///< [in] handle of the src image object ur_mem_handle_t ImageDst, ///< [in] handle of the dest image object ur_rect_offset_t SrcOrigin, ///< [in] defines the (x,y,z) offset in pixels @@ -867,7 +867,6 @@ ur_queue_handle_legacy_t_::enqueueMemImageCopy( ///< [in] handle of *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; std::shared_lock SrcLock(ImageSrc->Mutex, std::defer_lock); std::scoped_lock, ur_shared_mutex, ur_shared_mutex> @@ -885,8 +884,9 @@ ur_queue_handle_legacy_t_::enqueueMemImageCopy( ///< [in] handle of NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( - ur_mem_handle_t Buf, ///< [in] handle of the buffer object +ur_result_t urEnqueueMemBufferMap( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + ur_mem_handle_t Buf, ///< [in] handle of the buffer object bool BlockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t MapFlags, ///< [in] flags for read, write, readwrite mapping size_t Offset, ///< [in] offset in bytes of the buffer region being mapped @@ -905,7 +905,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( void **RetMap ///< [in,out] return mapped pointer. TODO: move it before ///< numEventsInWaitList? ) { - auto Queue = this; auto Buffer = ur_cast<_ur_buffer *>(Buf); UR_ASSERT(!Buffer->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); @@ -964,10 +963,10 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( if (Buffer->OnHost) { // Wait on incoming events before doing the copy if (NumEventsInWaitList > 0) - UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); + UR_CALL(ur::level_zero::urEventWait(NumEventsInWaitList, EventWaitList)); if (Queue->isInOrderQueue()) - UR_CALL(urQueueFinish(Queue)); + UR_CALL(ur::level_zero::urQueueFinish(Queue)); // Lock automatically releases when this goes out of scope. std::scoped_lock Guard(Buffer->Mutex); @@ -1053,7 +1052,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( +ur_result_t urEnqueueMemUnmap( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t Mem, ///< [in] handle of the memory (buffer or image) object void *MappedPtr, ///< [in] mapped host address uint32_t NumEventsInWaitList, ///< [in] size of the event wait list @@ -1068,7 +1068,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; UR_ASSERT(!Mem->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); auto Buffer = ur_cast<_ur_buffer *>(Mem); @@ -1120,10 +1119,10 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( if (Buffer->OnHost) { // Wait on incoming events before doing the copy if (NumEventsInWaitList > 0) - UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); + UR_CALL(ur::level_zero::urEventWait(NumEventsInWaitList, EventWaitList)); if (Queue->isInOrderQueue()) - UR_CALL(urQueueFinish(Queue)); + UR_CALL(ur::level_zero::urQueueFinish(Queue)); char *ZeHandleDst; UR_CALL(Buffer->getZeHandle(ZeHandleDst, ur_mem_handle_t_::write_only, @@ -1146,8 +1145,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( - reinterpret_cast(Queue), CommandList, - UseCopyEngine, NumEventsInWaitList, EventWaitList)); + reinterpret_cast(Queue), CommandList, UseCopyEngine, + NumEventsInWaitList, EventWaitList)); CommandList->second.append(reinterpret_cast(*Event)); (*Event)->RefCount.increment(); @@ -1180,8 +1179,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy( - bool Blocking, ///< [in] blocking or non-blocking copy +ur_result_t urEnqueueUSMMemcpy( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + bool Blocking, ///< [in] blocking or non-blocking copy void *Dst, ///< [in] pointer to the destination USM memory object const void *Src, ///< [in] pointer to the source USM memory object size_t Size, ///< [in] size in bytes to be copied @@ -1197,7 +1197,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; std::scoped_lock lock(Queue->Mutex); // Device to Device copies are found to execute slower on copy engine @@ -1219,7 +1218,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy( NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -ur_result_t ur_queue_handle_legacy_t_::enqueueUSMPrefetch( +ur_result_t urEnqueueUSMPrefetch( + ur_queue_handle_t Queue, ///< [in] handle of the queue object const void *Mem, ///< [in] pointer to the USM memory object size_t Size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t Flags, ///< [in] USM prefetch flags @@ -1235,7 +1235,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMPrefetch( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; std::ignore = Flags; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -1287,7 +1286,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMPrefetch( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueUSMAdvise( +ur_result_t urEnqueueUSMAdvise( + ur_queue_handle_t Queue, ///< [in] handle of the queue object const void *Mem, ///< [in] pointer to the USM memory object size_t Size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t Advice, ///< [in] USM memory advice @@ -1295,7 +1295,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMAdvise( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -1345,8 +1344,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMAdvise( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill2D( - void *Mem, ///< [in] pointer to memory to be filled. +ur_result_t urEnqueueUSMFill2D( + ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. + void *Mem, ///< [in] pointer to memory to be filled. size_t Pitch, ///< [in] the total width of the destination memory including ///< padding. size_t PatternSize, ///< [in] the size in bytes of the pattern. @@ -1364,6 +1364,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill2D( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { + std::ignore = Queue; std::ignore = Mem; std::ignore = Pitch; std::ignore = PatternSize; @@ -1378,7 +1379,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill2D( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy2D( +ur_result_t urEnqueueUSMMemcpy2D( + ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. bool Blocking, ///< [in] indicates if this operation should block the host. void *Dst, ///< [in] pointer to memory where data will be copied. size_t DstPitch, ///< [in] the total width of the source memory including @@ -1399,7 +1401,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy2D( *Event ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - auto Queue = this; ur_rect_offset_t ZeroOffset{0, 0, 0}; ur_rect_region_t Region{Width, Height, 0}; @@ -1500,7 +1501,7 @@ static ur_result_t ur2zeImageDesc(const ur_image_format_t *ImageFormat, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( +ur_result_t urMemImageCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_mem_flags_t Flags, ///< [in] allocation and usage information flags const ur_image_format_t @@ -1549,7 +1550,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( +ur_result_t urMemImageCreateWithNativeHandle( ur_native_handle_t NativeMem, ///< [in] the native handle to the memory. ur_context_handle_t Context, ///< [in] handle of the context object. [[maybe_unused]] const ur_image_format_t @@ -1577,7 +1578,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( +ur_result_t urMemBufferCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_mem_flags_t Flags, ///< [in] allocation and usage information flags size_t Size, ///< [in] size in bytes of the memory object to be allocated @@ -1599,30 +1600,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( Host = Properties->pHost; } - // If USM Import feature is enabled and hostptr is supplied, - // import the hostptr if not already imported into USM. - // Data transfer rate is maximized when both source and destination - // are USM pointers. Promotion of the host pointer to USM thus - // optimizes data transfer performance. bool HostPtrImported = false; - if (ZeUSMImport.Enabled && Host != nullptr && - (Flags & UR_MEM_FLAG_USE_HOST_POINTER) != 0) { - // Query memory type of the host pointer - ze_device_handle_t ZeDeviceHandle; - ZeStruct ZeMemoryAllocationProperties; - ZE2UR_CALL(zeMemGetAllocProperties, - (Context->ZeContext, Host, &ZeMemoryAllocationProperties, - &ZeDeviceHandle)); - - // If not shared of any type, we can import the ptr - if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) { - // Promote the host ptr to USM host memory - ze_driver_handle_t driverHandle = - Context->getPlatform()->ZeDriverHandleExpTranslated; - ZeUSMImport.doZeUSMImport(driverHandle, Host, Size); - HostPtrImported = true; - } - } + if (Flags & UR_MEM_FLAG_USE_HOST_POINTER) + HostPtrImported = + maybeImportUSM(Context->getPlatform()->ZeDriverHandleExpTranslated, + Context->ZeContext, Host, Size); _ur_buffer *Buffer = nullptr; auto HostPtrOrNull = (Flags & UR_MEM_FLAG_USE_HOST_POINTER) @@ -1671,14 +1653,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemRetain( +ur_result_t urMemRetain( ur_mem_handle_t Mem ///< [in] handle of the memory object to get access ) { Mem->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemRelease( +ur_result_t urMemRelease( ur_mem_handle_t Mem ///< [in] handle of the memory object to release ) { if (!Mem->RefCount.decrementAndTest()) @@ -1704,7 +1686,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( +ur_result_t urMemBufferPartition( ur_mem_handle_t Buffer, ///< [in] handle of the buffer object to allocate from ur_mem_flags_t Flags, ///< [in] allocation and usage information flags @@ -1740,7 +1722,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( +ur_result_t urMemGetNativeHandle( ur_mem_handle_t Mem, ///< [in] handle of the mem. ur_device_handle_t, ///< [in] handle of the device. ur_native_handle_t @@ -1754,7 +1736,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( +ur_result_t urMemBufferCreateWithNativeHandle( ur_native_handle_t NativeMem, ///< [in] the native handle to the memory. ur_context_handle_t Context, ///< [in] handle of the context object. const ur_mem_native_properties_t @@ -1821,7 +1803,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( ContextsLock.lock(); // Retain context to be sure that it is released after all memory // allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); Context->MemAllocs.emplace(std::piecewise_construct, std::forward_as_tuple(Ptr), @@ -1857,7 +1839,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo( +ur_result_t urMemGetInfo( ur_mem_handle_t Memory, ///< [in] handle to the memory object being queried. ur_mem_info_t MemInfoType, ///< [in] type of the info to retrieve. size_t PropSize, ///< [in] the number of bytes of memory pointed to by @@ -1893,7 +1875,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo( +ur_result_t urMemImageGetInfo( ur_mem_handle_t Memory, ///< [in] handle to the image object being queried. ur_image_info_t ImgInfoType, ///< [in] type of image info to retrieve. size_t PropSize, ///< [in] the number of bytes of memory pointer to by @@ -1916,6 +1898,79 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +ur_result_t urEnqueueUSMFill( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + void *Ptr, ///< [in] pointer to USM memory object + size_t PatternSize, ///< [in] the size in bytes of the pattern. Must be a + ///< power of 2 and less than or equal to width. + const void *Pattern, ///< [in] pointer with the bytes of the pattern to set. + size_t Size, ///< [in] size in bytes to be set. Must be a multiple of + ///< patternSize. + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, the + ///< numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t *Event ///< [out][optional] return an event object that + ///< identifies this particular command instance. +) { + std::scoped_lock Lock(Queue->Mutex); + + return enqueueMemFillHelper( + // TODO: do we need a new command type for USM memset? + UR_COMMAND_MEM_BUFFER_FILL, Queue, Ptr, + Pattern, // It will be interpreted as an 8-bit value, + PatternSize, // which is indicated with this pattern_size==1 + Size, NumEventsInWaitList, EventWaitList, Event); +} + +/// Host Pipes +ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = hProgram; + std::ignore = pipe_symbol; + std::ignore = blocking; + std::ignore = pDst; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = hProgram; + std::ignore = pipe_symbol; + std::ignore = blocking; + std::ignore = pSrc; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +} // namespace ur::level_zero + // If indirect access tracking is enabled then performs reference counting, // otherwise just calls zeMemAllocDevice. static ur_result_t ZeDeviceMemAllocHelper(void **ResultPtr, @@ -1935,7 +1990,7 @@ static ur_result_t ZeDeviceMemAllocHelper(void **ResultPtr, // indirect access, that is why explicitly retain context to be sure // that it is released after all memory allocations in this context are // released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } ze_device_mem_alloc_desc_t ZeDesc = {}; @@ -1995,8 +2050,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - UR_CALL(urUSMHostAlloc(UrContext, &USMDesc, Pool, Size, - reinterpret_cast(&ZeHandle))); + UR_CALL(ur::level_zero::urUSMHostAlloc( + UrContext, &USMDesc, Pool, Size, + reinterpret_cast(&ZeHandle))); } else { HostAllocation.ReleaseAction = allocation_t::free_native; UR_CALL(ZeHostMemAllocHelper(reinterpret_cast(&ZeHandle), @@ -2054,8 +2110,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - UR_CALL(urUSMDeviceAlloc(UrContext, Device, &USMDesc, Pool, Size, - reinterpret_cast(&ZeHandle))); + UR_CALL(ur::level_zero::urUSMDeviceAlloc( + UrContext, Device, &USMDesc, Pool, Size, + reinterpret_cast(&ZeHandle))); } else { Allocation.ReleaseAction = allocation_t::free_native; UR_CALL(ZeDeviceMemAllocHelper(reinterpret_cast(&ZeHandle), @@ -2118,8 +2175,8 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - UR_CALL( - urUSMHostAlloc(UrContext, &USMDesc, Pool, Size, &ZeHandleHost)); + UR_CALL(ur::level_zero::urUSMHostAlloc(UrContext, &USMDesc, Pool, + Size, &ZeHandleHost)); } else { HostAllocation.ReleaseAction = allocation_t::free_native; UR_CALL(ZeHostMemAllocHelper(&ZeHandleHost, UrContext, Size)); @@ -2301,66 +2358,3 @@ size_t _ur_buffer::getAlignment() const { Alignment = 1UL; return Alignment; } - -ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill( - void *Ptr, ///< [in] pointer to USM memory object - size_t PatternSize, ///< [in] the size in bytes of the pattern. Must be a - ///< power of 2 and less than or equal to width. - const void *Pattern, ///< [in] pointer with the bytes of the pattern to set. - size_t Size, ///< [in] size in bytes to be set. Must be a multiple of - ///< patternSize. - uint32_t NumEventsInWaitList, ///< [in] size of the event wait list - const ur_event_handle_t * - EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, the - ///< numEventsInWaitList must be 0, indicating that this - ///< command does not wait on any event to complete. - ur_event_handle_t *Event ///< [out][optional] return an event object that - ///< identifies this particular command instance. -) { - auto Queue = this; - std::scoped_lock Lock(Queue->Mutex); - - return enqueueMemFillHelper( - // TODO: do we need a new command type for USM memset? - UR_COMMAND_MEM_BUFFER_FILL, Queue, Ptr, - Pattern, // It will be interpreted as an 8-bit value, - PatternSize, // which is indicated with this pattern_size==1 - Size, NumEventsInWaitList, EventWaitList, Event); -} - -/// Host Pipes -ur_result_t ur_queue_handle_legacy_t_::enqueueReadHostPipe( - ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking, - void *pDst, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hProgram; - std::ignore = pipe_symbol; - std::ignore = blocking; - std::ignore = pDst; - std::ignore = size; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t ur_queue_handle_legacy_t_::enqueueWriteHostPipe( - ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking, - void *pSrc, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hProgram; - std::ignore = pipe_symbol; - std::ignore = blocking; - std::ignore = pSrc; - std::ignore = size; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} diff --git a/source/adapters/level_zero/memory.hpp b/source/adapters/level_zero/memory.hpp index 43d548f16b..71d102e9dd 100644 --- a/source/adapters/level_zero/memory.hpp +++ b/source/adapters/level_zero/memory.hpp @@ -20,15 +20,12 @@ #include #include -#include +#include #include #include #include "ur_level_zero.hpp" -struct ur_queue_handle_legacy_t_; -using ur_queue_handle_legacy_t = ur_queue_handle_legacy_t_ *; - struct ur_device_handle_t_; bool IsDevicePointer(ur_context_handle_t Context, const void *Ptr); @@ -48,7 +45,7 @@ const bool UseCopyEngineForD2DCopy = [] { // PI interfaces must have queue's and destination buffer's mutexes locked for // exclusive use and source buffer's mutex locked for shared use on entry. ur_result_t enqueueMemCopyHelper(ur_command_t CommandType, - ur_queue_handle_legacy_t Queue, void *Dst, + ur_queue_handle_t Queue, void *Dst, ur_bool_t BlockingWrite, size_t Size, const void *Src, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, @@ -56,13 +53,12 @@ ur_result_t enqueueMemCopyHelper(ur_command_t CommandType, bool PreferCopyEngine); ur_result_t enqueueMemCopyRectHelper( - ur_command_t CommandType, ur_queue_handle_legacy_t Queue, - const void *SrcBuffer, void *DstBuffer, ur_rect_offset_t SrcOrigin, - ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch, - size_t DstRowPitch, size_t SrcSlicePitch, size_t DstSlicePitch, - ur_bool_t Blocking, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent, - bool PreferCopyEngine = false); + ur_command_t CommandType, ur_queue_handle_t Queue, const void *SrcBuffer, + void *DstBuffer, ur_rect_offset_t SrcOrigin, ur_rect_offset_t DstOrigin, + ur_rect_region_t Region, size_t SrcRowPitch, size_t DstRowPitch, + size_t SrcSlicePitch, size_t DstSlicePitch, ur_bool_t Blocking, + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_event_handle_t *OutEvent, bool PreferCopyEngine = false); struct ur_mem_handle_t_ : _ur_object { // Keeps the PI context of this memory handle. diff --git a/source/adapters/level_zero/physical_mem.cpp b/source/adapters/level_zero/physical_mem.cpp index d4d9792f24..e7bb498859 100644 --- a/source/adapters/level_zero/physical_mem.cpp +++ b/source/adapters/level_zero/physical_mem.cpp @@ -14,7 +14,9 @@ #include "device.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( +namespace ur::level_zero { + +ur_result_t urPhysicalMemCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, [[maybe_unused]] const ur_physical_mem_properties_t *pProperties, ur_physical_mem_handle_t *phPhysicalMem) { @@ -35,14 +37,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { hPhysicalMem->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { if (!hPhysicalMem->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -52,3 +52,4 @@ urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index 68aebf97c7..721db3c359 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -12,7 +12,9 @@ #include "adapter.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( +namespace ur::level_zero { + +ur_result_t urPlatformGet( ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ///< [in] the number of platforms to be added to ///< phPlatforms. If phPlatforms is not NULL, then @@ -47,7 +49,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo( +ur_result_t urPlatformGetInfo( ur_platform_handle_t Platform, ///< [in] handle of the platform ur_platform_info_t ParamName, ///< [in] type of the info to retrieve size_t Size, ///< [in] the number of bytes pointed to by pPlatformInfo. @@ -101,7 +103,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetApiVersion( +ur_result_t urPlatformGetApiVersion( ur_platform_handle_t Driver, ///< [in] handle of the platform ur_api_version_t *Version ///< [out] api version ) { @@ -110,7 +112,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetApiVersion( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( +ur_result_t urPlatformGetNativeHandle( ur_platform_handle_t Platform, ///< [in] handle of the platform. ur_native_handle_t *NativePlatform ///< [out] a pointer to the native ///< handle of the platform. @@ -120,7 +122,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( +ur_result_t urPlatformCreateWithNativeHandle( ur_native_handle_t NativePlatform, ///< [in] the native handle of the platform. ur_adapter_handle_t, @@ -135,12 +137,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( uint32_t NumPlatforms = 0; ur_adapter_handle_t AdapterHandle = GlobalAdapter; - UR_CALL(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms)); + UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, 0, nullptr, + &NumPlatforms)); if (NumPlatforms) { std::vector Platforms(NumPlatforms); - UR_CALL(urPlatformGet(&AdapterHandle, 1, NumPlatforms, Platforms.data(), - nullptr)); + UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, NumPlatforms, + Platforms.data(), nullptr)); // The SYCL spec requires that the set of platforms must remain fixed for // the duration of the application's execution. We assume that we found all @@ -158,6 +161,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( return UR_RESULT_ERROR_INVALID_VALUE; } +// Returns plugin specific backend option. +// Current support is only for optimization options. +// Return '-ze-opt-disable' for frontend_option = -O0. +// Return '-ze-opt-level=2' for frontend_option = -O1, -O2 or -O3. +// Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for +// frontend_option=-ftarget-compile-fast. +ur_result_t urPlatformGetBackendOption( + ur_platform_handle_t Platform, ///< [in] handle of the platform instance. + const char *FrontendOption, ///< [in] string containing the frontend option. + const char * + *PlatformOption ///< [out] returns the correct platform specific + ///< compiler option based on the frontend option. +) { + std::ignore = Platform; + using namespace std::literals; + if (FrontendOption == nullptr) { + return UR_RESULT_SUCCESS; + } + if (FrontendOption == ""sv) { + *PlatformOption = ""; + return UR_RESULT_SUCCESS; + } + if (FrontendOption == "-O0"sv) { + *PlatformOption = "-ze-opt-disable"; + return UR_RESULT_SUCCESS; + } + if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv || + FrontendOption == "-O3"sv) { + *PlatformOption = "-ze-opt-level=2"; + return UR_RESULT_SUCCESS; + } + if (FrontendOption == "-ftarget-compile-fast"sv) { + *PlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; + return UR_RESULT_SUCCESS; + } + return UR_RESULT_ERROR_INVALID_VALUE; +} + +} // namespace ur::level_zero + ur_result_t ur_platform_handle_t_::initialize() { ZE2UR_CALL(zeDriverGetApiVersion, (ZeDriver, &ZeApiVersion)); ZeDriverApiVersion = std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + "." + @@ -513,41 +556,3 @@ ur_device_handle_t ur_platform_handle_t_::getDeviceById(DeviceId id) { } return nullptr; } - -// Returns plugin specific backend option. -// Current support is only for optimization options. -// Return '-ze-opt-disable' for frontend_option = -O0. -// Return '-ze-opt-level=2' for frontend_option = -O1, -O2 or -O3. -// Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for -// frontend_option=-ftarget-compile-fast. -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( - ur_platform_handle_t Platform, ///< [in] handle of the platform instance. - const char *FrontendOption, ///< [in] string containing the frontend option. - const char * - *PlatformOption ///< [out] returns the correct platform specific - ///< compiler option based on the frontend option. -) { - std::ignore = Platform; - using namespace std::literals; - if (FrontendOption == nullptr) { - return UR_RESULT_SUCCESS; - } - if (FrontendOption == ""sv) { - *PlatformOption = ""; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-O0"sv) { - *PlatformOption = "-ze-opt-disable"; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv || - FrontendOption == "-O3"sv) { - *PlatformOption = "-ze-opt-level=2"; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-ftarget-compile-fast"sv) { - *PlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; - return UR_RESULT_SUCCESS; - } - return UR_RESULT_ERROR_INVALID_VALUE; -} diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index a6d34ccb23..02aef2d058 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -11,6 +11,7 @@ #include "program.hpp" #include "device.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #ifdef UR_ADAPTER_LEVEL_ZERO_V2 #include "v2/context.hpp" @@ -54,7 +55,9 @@ checkUnresolvedSymbols(ze_module_handle_t ZeModule, } } // extern "C" -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( +namespace ur::level_zero { + +ur_result_t urProgramCreateWithIL( ur_context_handle_t Context, ///< [in] handle of the context instance const void *IL, ///< [in] pointer to IL binary. size_t Length, ///< [in] length of `pIL` in bytes. @@ -79,7 +82,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( +ur_result_t urProgramCreateWithBinary( ur_context_handle_t Context, ///< [in] handle of the context instance ur_device_handle_t Device, ///< [in] handle to device associated with binary. @@ -115,17 +118,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild( +ur_result_t urProgramBuild( ur_context_handle_t Context, ///< [in] handle of the context instance. ur_program_handle_t Program, ///< [in] Handle of the program to build. const char *Options ///< [in][optional] pointer to build options ///< null-terminated string. ) { std::vector Devices = Context->getDevices(); - return urProgramBuildExp(Program, Devices.size(), Devices.data(), Options); + return ur::level_zero::urProgramBuildExp(Program, Devices.size(), + Devices.data(), Options); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( +ur_result_t urProgramBuildExp( ur_program_handle_t hProgram, ///< [in] Handle of the program to build. uint32_t numDevices, ///< [in] number of devices ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to @@ -228,7 +232,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( return Result; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( +ur_result_t urProgramCompileExp( ur_program_handle_t hProgram, ///< [in][out] handle of the program to compile. uint32_t numDevices, ///< [in] number of devices @@ -239,10 +243,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( ) { std::ignore = numDevices; std::ignore = phDevices; - return urProgramCompile(hProgram->Context, hProgram, pOptions); + return ur::level_zero::urProgramCompile(hProgram->Context, hProgram, + pOptions); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( +ur_result_t urProgramCompile( ur_context_handle_t Context, ///< [in] handle of the context instance. ur_program_handle_t Program, ///< [in][out] handle of the program to compile. @@ -281,7 +286,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramLink( +ur_result_t urProgramLink( ur_context_handle_t Context, ///< [in] handle of the context instance. uint32_t Count, ///< [in] number of program handles in `phPrograms`. const ur_program_handle_t *Programs, ///< [in][range(0, count)] pointer to @@ -292,11 +297,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLink( *Program ///< [out] pointer to handle of program object created. ) { std::vector Devices = Context->getDevices(); - return urProgramLinkExp(Context, Devices.size(), Devices.data(), Count, - Programs, Options, Program); + return ur::level_zero::urProgramLinkExp(Context, Devices.size(), + Devices.data(), Count, Programs, + Options, Program); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( +ur_result_t urProgramLinkExp( ur_context_handle_t hContext, ///< [in] handle of the context instance. uint32_t numDevices, ///< [in] number of devices ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to @@ -482,14 +488,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( return UrResult; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramRetain( +ur_result_t urProgramRetain( ur_program_handle_t Program ///< [in] handle for the Program to retain ) { Program->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease( +ur_result_t urProgramRelease( ur_program_handle_t Program ///< [in] handle for the Program to release ) { if (!Program->RefCount.decrementAndTest()) @@ -526,7 +532,7 @@ static bool is_in_separated_string(const std::string &str, char delimiter, return false; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( +ur_result_t urProgramGetFunctionPointer( ur_device_handle_t Device, ///< [in] handle of the device to retrieve pointer for. ur_program_handle_t @@ -566,12 +572,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( if (ZeResult == ZE_RESULT_ERROR_INVALID_ARGUMENT) { size_t Size; *FunctionPointerRet = 0; - UR_CALL(urProgramGetInfo(Program, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, - &Size)); + UR_CALL(ur::level_zero::urProgramGetInfo( + Program, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &Size)); std::string ClResult(Size, ' '); - UR_CALL(urProgramGetInfo(Program, UR_PROGRAM_INFO_KERNEL_NAMES, - ClResult.size(), &ClResult[0], nullptr)); + UR_CALL(ur::level_zero::urProgramGetInfo( + Program, UR_PROGRAM_INFO_KERNEL_NAMES, ClResult.size(), &ClResult[0], + nullptr)); // Get rid of the null terminator and search for kernel_name // If function can be found return error code to indicate it @@ -591,7 +598,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( return ze2urResult(ZeResult); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( +ur_result_t urProgramGetGlobalVariablePointer( ur_device_handle_t Device, ///< [in] handle of the device to retrieve the pointer for. ur_program_handle_t @@ -626,7 +633,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( return ze2urResult(ZeResult); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo( +ur_result_t urProgramGetInfo( ur_program_handle_t Program, ///< [in] handle of the Program object ur_program_info_t PropName, ///< [in] name of the Program property to query size_t PropSize, ///< [in] the size of the Program property. @@ -818,7 +825,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo( +ur_result_t urProgramGetBuildInfo( ur_program_handle_t Program, ///< [in] handle of the Program object ur_device_handle_t Device, ///< [in] handle of the Device object ur_program_build_info_t @@ -898,7 +905,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstant( +ur_result_t urProgramSetSpecializationConstant( ur_program_handle_t Program, ///< [in] handle of the Program object uint32_t SpecId, ///< [in] specification constant Id size_t SpecSize, ///< [in] size of the specialization constant value @@ -913,7 +920,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstant( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( +ur_result_t urProgramGetNativeHandle( ur_program_handle_t Program, ///< [in] handle of the program. ur_native_handle_t *NativeProgram ///< [out] a pointer to the native ///< handle of the program. @@ -934,7 +941,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( +ur_result_t urProgramCreateWithNativeHandle( ur_native_handle_t NativeProgram, ///< [in] the native handle of the program. ur_context_handle_t Context, ///< [in] handle of the context instance @@ -966,6 +973,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( return UR_RESULT_SUCCESS; } +ur_result_t urProgramSetSpecializationConstants( + ur_program_handle_t Program, ///< [in] handle of the Program object + uint32_t Count, ///< [in] the number of elements in the pSpecConstants array + const ur_specialization_constant_info_t + *SpecConstants ///< [in][range(0, count)] array of specialization + ///< constant value descriptions +) { + std::scoped_lock Guard(Program->Mutex); + + // Remember the value of this specialization constant until the program is + // built. Note that we only save the pointer to the buffer that contains the + // value. The caller is responsible for maintaining storage for this buffer. + // + // NOTE: SpecSize is unused in Level Zero, the size is known from SPIR-V by + // SpecID. + for (uint32_t SpecIt = 0; SpecIt < Count; SpecIt++) { + uint32_t SpecId = SpecConstants[SpecIt].id; + Program->SpecConstants[SpecId] = SpecConstants[SpecIt].pValue; + } + return UR_RESULT_SUCCESS; +} + +} // namespace ur::level_zero + ur_program_handle_t_::~ur_program_handle_t_() { if (!resourcesReleased) { ur_release_program_resources(true); @@ -1000,25 +1031,3 @@ void ur_program_handle_t_::ur_release_program_resources(bool deletion) { resourcesReleased = true; } } - -UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( - ur_program_handle_t Program, ///< [in] handle of the Program object - uint32_t Count, ///< [in] the number of elements in the pSpecConstants array - const ur_specialization_constant_info_t - *SpecConstants ///< [in][range(0, count)] array of specialization - ///< constant value descriptions -) { - std::scoped_lock Guard(Program->Mutex); - - // Remember the value of this specialization constant until the program is - // built. Note that we only save the pointer to the buffer that contains the - // value. The caller is responsible for maintaining storage for this buffer. - // - // NOTE: SpecSize is unused in Level Zero, the size is known from SPIR-V by - // SpecID. - for (uint32_t SpecIt = 0; SpecIt < Count; SpecIt++) { - uint32_t SpecId = SpecConstants[SpecIt].id; - Program->SpecConstants[SpecId] = SpecConstants[SpecIt].pValue; - } - return UR_RESULT_SUCCESS; -} diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 2845120113..978547df10 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -19,7 +19,7 @@ #include "common.hpp" #include "event.hpp" #include "queue.hpp" -#include "ur_api.h" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" #include "ur_util.hpp" #include "ze_api.h" @@ -99,7 +99,7 @@ bool ur_completion_batch::checkComplete() { return st == COMPLETED; } -ur_result_t ur_completion_batch::seal(ur_queue_handle_legacy_t queue, +ur_result_t ur_completion_batch::seal(ur_queue_handle_t queue, ze_command_list_handle_t cmdlist) { assert(st == ACCUMULATING); @@ -187,7 +187,7 @@ ur_completion_batches::ur_completion_batches() { } ur_result_t ur_completion_batches::tryCleanup( - ur_queue_handle_legacy_t queue, ze_command_list_handle_t cmdlist, + ur_queue_handle_t queue, ze_command_list_handle_t cmdlist, std::vector &events, std::vector &EventListToCleanup) { cleanup(events, EventListToCleanup); @@ -229,7 +229,7 @@ void ur_completion_batches::forceReset() { /// the call, in case of in-order queue it allows to cleanup all preceding /// events. /// @return PI_SUCCESS if successful, PI error code otherwise. -ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_legacy_t UrQueue, +ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, bool QueueLocked, bool QueueSynced, ur_event_handle_t CompletedEvent) { // Handle only immediate command lists here. @@ -303,7 +303,7 @@ ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_legacy_t UrQueue, /// @param Queue Queue where we look for signalled command lists and cleanup /// events. /// @return PI_SUCCESS if successful, PI error code otherwise. -ur_result_t resetCommandLists(ur_queue_handle_legacy_t Queue) { +ur_result_t resetCommandLists(ur_queue_handle_t Queue) { // Handle immediate command lists here, they don't need to be reset and we // only need to cleanup events. if (Queue->UsingImmCmdLists) { @@ -342,7 +342,10 @@ ur_result_t resetCommandLists(ur_queue_handle_legacy_t Queue) { return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::queueGetInfo( +namespace ur::level_zero { + +ur_result_t urQueueGetInfo( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_queue_info_t ParamName, ///< [in] name of the queue property to query size_t ParamValueSize, ///< [in] size in bytes of the queue property value ///< provided @@ -350,8 +353,6 @@ ur_result_t ur_queue_handle_legacy_t_::queueGetInfo( size_t *ParamValueSizeRet ///< [out] size in bytes returned in queue ///< property value ) { - auto Queue = this; - std::shared_lock Lock(Queue->Mutex); UrReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); // TODO: consider support for queue properties and size @@ -467,7 +468,7 @@ static bool doEagerInit = [] { return EagerInit ? std::atoi(EagerInit) != 0 : false; }(); -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( +ur_result_t urQueueCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, ///< [in] handle of the device object const ur_queue_properties_t @@ -502,7 +503,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( // Create placeholder queues in the compute queue group. // Actual L0 queues will be created at first use. std::vector ZeComputeCommandQueues( - Device->QueueGroup[ur_queue_handle_legacy_t_::queue_type::Compute] + Device->QueueGroup[ur_queue_handle_t_::queue_type::Compute] .ZeProperties.numQueues, nullptr); @@ -512,21 +513,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( size_t NumCopyGroups = 0; if (Device->hasMainCopyEngine()) { NumCopyGroups += - Device->QueueGroup[ur_queue_handle_legacy_t_::queue_type::MainCopy] + Device->QueueGroup[ur_queue_handle_t_::queue_type::MainCopy] .ZeProperties.numQueues; } if (Device->hasLinkCopyEngine()) { NumCopyGroups += - Device->QueueGroup[ur_queue_handle_legacy_t_::queue_type::LinkCopy] + Device->QueueGroup[ur_queue_handle_t_::queue_type::LinkCopy] .ZeProperties.numQueues; } std::vector ZeCopyCommandQueues(NumCopyGroups, nullptr); try { - *Queue = new ur_queue_handle_legacy_t_(ZeComputeCommandQueues, - ZeCopyCommandQueues, Context, Device, - true, Flags, ForceComputeIndex); + *Queue = + new ur_queue_handle_t_(ZeComputeCommandQueues, ZeCopyCommandQueues, + Context, Device, true, Flags, ForceComputeIndex); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { @@ -535,7 +536,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( // Do eager initialization of Level Zero handles on request. if (doEagerInit) { - ur_queue_handle_legacy_t Q = Legacy(*Queue); + auto Q = *Queue; // Creates said number of command-lists. auto warmupQueueGroup = [Q](bool UseCopyEngine, uint32_t RepeatCount) -> ur_result_t { @@ -576,9 +577,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::queueRetain() { - auto Queue = this; - +ur_result_t urQueueRetain( + ur_queue_handle_t Queue ///< [in] handle of the queue object to get access +) { { std::scoped_lock Lock(Queue->Mutex); Queue->RefCountExternal++; @@ -587,9 +588,9 @@ ur_result_t ur_queue_handle_legacy_t_::queueRetain() { return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::queueRelease() { - auto Queue = this; - +ur_result_t urQueueRelease( + ur_queue_handle_t Queue ///< [in] handle of the queue object to release +) { std::vector EventListToCleanup; { std::scoped_lock Lock(Queue->Mutex); @@ -690,13 +691,12 @@ ur_result_t ur_queue_handle_legacy_t_::queueRelease() { return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::queueGetNativeHandle( +ur_result_t urQueueGetNativeHandle( + ur_queue_handle_t Queue, ///< [in] handle of the queue. ur_queue_native_desc_t *Desc, ur_native_handle_t *NativeQueue ///< [out] a pointer to the native handle of the queue. ) { - auto Queue = this; - // Lock automatically releases when this goes out of scope. std::shared_lock lock(Queue->Mutex); @@ -728,24 +728,7 @@ ur_result_t ur_queue_handle_legacy_t_::queueGetNativeHandle( return UR_RESULT_SUCCESS; } -void ur_queue_handle_legacy_t_::ur_queue_group_t::setImmCmdList( - ur_queue_handle_legacy_t queue, ze_command_list_handle_t ZeCommandList) { - // An immediate command list was given to us but we don't have the queue - // descriptor information. Create a dummy and note that it is not recycleable. - ZeStruct ZeQueueDesc; - - ImmCmdLists = std::vector( - 1, - Queue->CommandListMap - .insert(std::pair{ - ZeCommandList, - ur_command_list_info_t(nullptr, true, false, nullptr, ZeQueueDesc, - queue->useCompletionBatching(), false, - false, true)}) - .first); -} - -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( +ur_result_t urQueueCreateWithNativeHandle( ur_native_handle_t NativeQueue, ///< [in] the native handle of the queue. ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, /// @@ -785,12 +768,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( uint32_t NumEntries = 1; ur_platform_handle_t Platform{}; ur_adapter_handle_t AdapterHandle = GlobalAdapter; - UR_CALL(urPlatformGet(&AdapterHandle, 1, NumEntries, &Platform, nullptr)); + UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, NumEntries, + &Platform, nullptr)); ur_device_handle_t UrDevice = Device; if (UrDevice == nullptr) { - UR_CALL(urDeviceGet(Platform, UR_DEVICE_TYPE_GPU, NumEntries, &UrDevice, - nullptr)); + UR_CALL(ur::level_zero::urDeviceGet(Platform, UR_DEVICE_TYPE_GPU, + NumEntries, &UrDevice, nullptr)); } // The NativeHandleDesc has value if if the native handle is an immediate @@ -800,7 +784,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( std::vector CopyQueues; try { - ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_( + ur_queue_handle_t_ *Queue = new ur_queue_handle_t_( ComputeQueues, CopyQueues, Context, UrDevice, OwnNativeHandle, Flags); *RetQueue = reinterpret_cast(Queue); } catch (const std::bad_alloc &) { @@ -808,9 +792,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( } catch (...) { return UR_RESULT_ERROR_UNKNOWN; } - auto &InitialGroup = - Legacy(*RetQueue)->ComputeQueueGroupsByTID.begin()->second; - InitialGroup.setImmCmdList(Legacy(*RetQueue), + auto &InitialGroup = (*RetQueue)->ComputeQueueGroupsByTID.begin()->second; + InitialGroup.setImmCmdList(*RetQueue, ur_cast(NativeQueue)); } else { auto ZeQueue = ur_cast(NativeQueue); @@ -823,7 +806,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( std::vector ZeroCopyQueues; try { - ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_( + ur_queue_handle_t_ *Queue = new ur_queue_handle_t_( ZeQueues, ZeroCopyQueues, Context, UrDevice, OwnNativeHandle, Flags); *RetQueue = reinterpret_cast(Queue); } catch (const std::bad_alloc &) { @@ -832,13 +815,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( return UR_RESULT_ERROR_UNKNOWN; } } - Legacy(*RetQueue)->UsingImmCmdLists = (NativeHandleDesc == 1); + (*RetQueue)->UsingImmCmdLists = (NativeHandleDesc == 1); return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::queueFinish() { - auto Queue = this; +ur_result_t urQueueFinish( + ur_queue_handle_t Queue ///< [in] handle of the queue to be finished. +) { if (Queue->UsingImmCmdLists) { // Lock automatically releases when this goes out of scope. std::scoped_lock Lock(Queue->Mutex); @@ -903,12 +887,38 @@ ur_result_t ur_queue_handle_legacy_t_::queueFinish() { return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::queueFlush() { - auto Queue = this; +ur_result_t urQueueFlush( + ur_queue_handle_t Queue ///< [in] handle of the queue to be flushed. +) { std::scoped_lock Lock(Queue->Mutex); return Queue->executeAllOpenCommandLists(); } +ur_result_t urEnqueueKernelLaunchCustomExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = hKernel; + std::ignore = workDim; + std::ignore = pGlobalWorkSize; + std::ignore = pLocalWorkSize; + std::ignore = numPropsInLaunchPropList; + std::ignore = launchPropList; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + + logger::error("[UR][L0] {} function not implemented!", + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +} // namespace ur::level_zero + // Configuration of the command-list batching. struct zeCommandListBatchConfig { // Default value of 0. This specifies to use dynamic batch size adjustment. @@ -1063,7 +1073,7 @@ static const zeCommandListBatchConfig ZeCommandListBatchCopyConfig = [] { return ZeCommandListBatchConfig(IsCopy{true}); }(); -ur_queue_handle_legacy_t_::ur_queue_handle_legacy_t_( +ur_queue_handle_t_::ur_queue_handle_t_( std::vector &ComputeQueues, std::vector &CopyQueues, ur_context_handle_t Context, ur_device_handle_t Device, @@ -1089,8 +1099,8 @@ ur_queue_handle_legacy_t_::ur_queue_handle_legacy_t_( // First, see if the queue's device allows for round-robin or it is // fixed to one particular compute CCS (it is so for sub-sub-devices). auto &ComputeQueueGroupInfo = Device->QueueGroup[queue_type::Compute]; - ur_queue_group_t ComputeQueueGroup{ - reinterpret_cast(this), queue_type::Compute}; + ur_queue_group_t ComputeQueueGroup{reinterpret_cast(this), + queue_type::Compute}; ComputeQueueGroup.ZeQueues = ComputeQueues; // Create space to hold immediate commandlists corresponding to the // ZeQueues @@ -1136,8 +1146,8 @@ ur_queue_handle_legacy_t_::ur_queue_handle_legacy_t_( ComputeQueueGroupsByTID.set(ComputeQueueGroup); // Copy group initialization. - ur_queue_group_t CopyQueueGroup{ - reinterpret_cast(this), queue_type::MainCopy}; + ur_queue_group_t CopyQueueGroup{reinterpret_cast(this), + queue_type::MainCopy}; const auto &Range = getRangeOfAllowedCopyEngines((ur_device_handle_t)Device); if (Range.first < 0 || Range.second < 0) { // We are asked not to use copy engines, just do nothing. @@ -1182,7 +1192,7 @@ ur_queue_handle_legacy_t_::ur_queue_handle_legacy_t_( Device->Platform->ZeDriverEventPoolCountingEventsExtensionFound; } -void ur_queue_handle_legacy_t_::adjustBatchSizeForFullBatch(bool IsCopy) { +void ur_queue_handle_t_::adjustBatchSizeForFullBatch(bool IsCopy) { auto &CommandBatch = IsCopy ? CopyCommandBatch : ComputeCommandBatch; auto &ZeCommandListBatchConfig = IsCopy ? ZeCommandListBatchCopyConfig : ZeCommandListBatchComputeConfig; @@ -1209,7 +1219,7 @@ void ur_queue_handle_legacy_t_::adjustBatchSizeForFullBatch(bool IsCopy) { } } -void ur_queue_handle_legacy_t_::adjustBatchSizeForPartialBatch(bool IsCopy) { +void ur_queue_handle_t_::adjustBatchSizeForPartialBatch(bool IsCopy) { auto &CommandBatch = IsCopy ? CopyCommandBatch : ComputeCommandBatch; auto &ZeCommandListBatchConfig = IsCopy ? ZeCommandListBatchCopyConfig : ZeCommandListBatchComputeConfig; @@ -1235,14 +1245,15 @@ void ur_queue_handle_legacy_t_::adjustBatchSizeForPartialBatch(bool IsCopy) { } } -ur_result_t ur_queue_handle_legacy_t_::executeCommandList( - ur_command_list_ptr_t CommandList, bool IsBlocking, bool OKToBatchCommand) { +ur_result_t +ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList, + bool IsBlocking, bool OKToBatchCommand) { // Do nothing if command list is already closed. if (CommandList->second.IsClosed) return UR_RESULT_SUCCESS; - bool UseCopyEngine = CommandList->second.isCopy( - reinterpret_cast(this)); + bool UseCopyEngine = + CommandList->second.isCopy(reinterpret_cast(this)); // If the current LastCommandEvent is the nullptr, then it means // either that no command has ever been issued to the queue @@ -1349,7 +1360,7 @@ ur_result_t ur_queue_handle_legacy_t_::executeCommandList( // ur_event_handle_t HostVisibleEvent; auto Res = createEventAndAssociateQueue( - reinterpret_cast(this), &HostVisibleEvent, + reinterpret_cast(this), &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList, /* IsInternal */ false, /* IsMultiDevice */ true, /* HostVisible */ true); @@ -1473,12 +1484,12 @@ ur_result_t ur_queue_handle_legacy_t_::executeCommandList( return UR_RESULT_SUCCESS; } -bool ur_queue_handle_legacy_t_::doReuseDiscardedEvents() { +bool ur_queue_handle_t_::doReuseDiscardedEvents() { return ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents(); } -ur_result_t ur_queue_handle_legacy_t_::resetDiscardedEvent( - ur_command_list_ptr_t CommandList) { +ur_result_t +ur_queue_handle_t_::resetDiscardedEvent(ur_command_list_ptr_t CommandList) { if (LastCommandEvent && LastCommandEvent->IsDiscarded) { ZE2UR_CALL(zeCommandListAppendBarrier, (CommandList->first, nullptr, 1, &(LastCommandEvent->ZeEvent))); @@ -1511,8 +1522,7 @@ ur_result_t ur_queue_handle_legacy_t_::resetDiscardedEvent( return UR_RESULT_SUCCESS; } -ur_result_t -ur_queue_handle_legacy_t_::addEventToQueueCache(ur_event_handle_t Event) { +ur_result_t ur_queue_handle_t_::addEventToQueueCache(ur_event_handle_t Event) { if (!Event->IsMultiDevice) { auto EventCachesMap = Event->isHostVisible() ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1]; @@ -1528,19 +1538,19 @@ ur_queue_handle_legacy_t_::addEventToQueueCache(ur_event_handle_t Event) { return UR_RESULT_SUCCESS; } -void ur_queue_handle_legacy_t_::active_barriers::add(ur_event_handle_t &Event) { +void ur_queue_handle_t_::active_barriers::add(ur_event_handle_t &Event) { Event->RefCount.increment(); Events.push_back(Event); } -ur_result_t ur_queue_handle_legacy_t_::active_barriers::clear() { +ur_result_t ur_queue_handle_t_::active_barriers::clear() { for (const auto &Event : Events) UR_CALL(urEventReleaseInternal(Event)); Events.clear(); return UR_RESULT_SUCCESS; } -void ur_queue_handle_legacy_t_::clearEndTimeRecordings() { +void ur_queue_handle_t_::clearEndTimeRecordings() { uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution; const uint64_t TimestampMaxValue = Device->getTimestampMask(); @@ -1567,7 +1577,7 @@ void ur_queue_handle_legacy_t_::clearEndTimeRecordings() { EndTimeRecordings.clear(); } -ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue) { +ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) { if (!Queue->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -1606,33 +1616,33 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue) { return UR_RESULT_SUCCESS; } -bool ur_queue_handle_legacy_t_::isBatchingAllowed(bool IsCopy) const { +bool ur_queue_handle_t_::isBatchingAllowed(bool IsCopy) const { auto &CommandBatch = IsCopy ? CopyCommandBatch : ComputeCommandBatch; return (CommandBatch.QueueBatchSize > 0 && ((UrL0Serialize & UrL0SerializeBlock) == 0)); } -bool ur_queue_handle_legacy_t_::isDiscardEvents() const { +bool ur_queue_handle_t_::isDiscardEvents() const { return ((this->Properties & UR_QUEUE_FLAG_DISCARD_EVENTS) != 0); } -bool ur_queue_handle_legacy_t_::isPriorityLow() const { +bool ur_queue_handle_t_::isPriorityLow() const { return ((this->Properties & UR_QUEUE_FLAG_PRIORITY_LOW) != 0); } -bool ur_queue_handle_legacy_t_::isPriorityHigh() const { +bool ur_queue_handle_t_::isPriorityHigh() const { return ((this->Properties & UR_QUEUE_FLAG_PRIORITY_HIGH) != 0); } -bool ur_queue_handle_legacy_t_::isBatchedSubmission() const { +bool ur_queue_handle_t_::isBatchedSubmission() const { return ((this->Properties & UR_QUEUE_FLAG_SUBMISSION_BATCHED) != 0); } -bool ur_queue_handle_legacy_t_::isImmediateSubmission() const { +bool ur_queue_handle_t_::isImmediateSubmission() const { return ((this->Properties & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE) != 0); } -bool ur_queue_handle_legacy_t_::isInOrderQueue() const { +bool ur_queue_handle_t_::isInOrderQueue() const { // If out-of-order queue property is not set, then this is a in-order queue. return ((this->Properties & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0); @@ -1662,11 +1672,11 @@ ur_result_t CleanupEventListFromResetCmdList( // TODO: Event release in immediate commandlist mode is driven by the SYCL // runtime. Need to investigate whether relase can be done earlier, at sync // points such as this, to reduce total number of active Events. -ur_result_t ur_queue_handle_legacy_t_::synchronize() { +ur_result_t ur_queue_handle_t_::synchronize() { if (!Healthy) return UR_RESULT_SUCCESS; - auto syncImmCmdList = [](ur_queue_handle_legacy_t_ *Queue, + auto syncImmCmdList = [](ur_queue_handle_t_ *Queue, ur_command_list_ptr_t ImmCmdList) { if (ImmCmdList == Queue->CommandListMap.end()) return UR_RESULT_SUCCESS; @@ -1757,9 +1767,8 @@ ur_result_t ur_queue_handle_legacy_t_::synchronize() { return UR_RESULT_SUCCESS; } -ur_event_handle_t -ur_queue_handle_legacy_t_::getEventFromQueueCache(bool IsMultiDevice, - bool HostVisible) { +ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice, + bool HostVisible) { std::list *Cache; if (!IsMultiDevice) { @@ -1791,7 +1800,7 @@ ur_queue_handle_legacy_t_::getEventFromQueueCache(bool IsMultiDevice, // at the end of a command list batch. This will only be true if the event does // not have dependencies or the dependencies are not for events which exist in // this batch. -bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, +bool eventCanBeBatched(ur_queue_handle_t Queue, bool UseCopyEngine, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList) { auto &CommandBatch = @@ -1821,7 +1830,7 @@ bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, // dependencies, then this command can be enqueued without a signal event set in // a command list batch. The signal event will be appended at the end of the // batch to be signalled at the end of the command list. -ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, +ur_result_t setSignalEvent(ur_queue_handle_t Queue, bool UseCopyEngine, ze_event_handle_t *ZeEvent, ur_event_handle_t *Event, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, @@ -1852,7 +1861,7 @@ ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, // visible pool. // \param HostVisible tells if the event must be created in the // host-visible pool. If not set then this function will decide. -ur_result_t createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, +ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType, ur_command_list_ptr_t CommandList, @@ -1908,12 +1917,12 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, // event will not be waited/released by SYCL RT, so it must be destroyed by // EventRelease in resetCommandList. if (!IsInternal) - UR_CALL(urEventRetain(*Event)); + UR_CALL(ur::level_zero::urEventRetain(*Event)); return UR_RESULT_SUCCESS; } -void ur_queue_handle_legacy_t_::CaptureIndirectAccesses() { +void ur_queue_handle_t_::CaptureIndirectAccesses() { for (auto &Kernel : KernelsToBeSubmitted) { if (!Kernel->hasIndirectAccess()) continue; @@ -1937,8 +1946,7 @@ void ur_queue_handle_legacy_t_::CaptureIndirectAccesses() { KernelsToBeSubmitted.clear(); } -ur_result_t -ur_queue_handle_legacy_t_::signalEventFromCmdListIfLastEventDiscarded( +ur_result_t ur_queue_handle_t_::signalEventFromCmdListIfLastEventDiscarded( ur_command_list_ptr_t CommandList) { // We signal new event at the end of command list only if we have queue with // discard_events property and the last command event is discarded. @@ -1952,7 +1960,7 @@ ur_queue_handle_legacy_t_::signalEventFromCmdListIfLastEventDiscarded( // from the host. ur_event_handle_t Event; UR_CALL(createEventAndAssociateQueue( - reinterpret_cast(this), &Event, + reinterpret_cast(this), &Event, UR_EXT_COMMAND_TYPE_USER, CommandList, /* IsInternal */ false, /* IsMultiDevice */ true, /* HostVisible */ false)); @@ -1964,7 +1972,7 @@ ur_queue_handle_legacy_t_::signalEventFromCmdListIfLastEventDiscarded( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::executeOpenCommandList(bool IsCopy) { +ur_result_t ur_queue_handle_t_::executeOpenCommandList(bool IsCopy) { auto &CommandBatch = IsCopy ? CopyCommandBatch : ComputeCommandBatch; // If there are any commands still in the open command list for this // queue, then close and execute that command list now. @@ -1978,7 +1986,7 @@ ur_result_t ur_queue_handle_legacy_t_::executeOpenCommandList(bool IsCopy) { return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::resetCommandList( +ur_result_t ur_queue_handle_t_::resetCommandList( ur_command_list_ptr_t CommandList, bool MakeAvailable, std::vector &EventListToCleanup, bool CheckStatus) { bool UseCopyEngine = CommandList->second.isCopy(this); @@ -2080,7 +2088,7 @@ ur_result_t ur_queue_handle_legacy_t_::resetCommandList( return UR_RESULT_SUCCESS; } -bool ur_command_list_info_t::isCopy(ur_queue_handle_legacy_t Queue) const { +bool ur_command_list_info_t::isCopy(ur_queue_handle_t Queue) const { return ZeQueueDesc.ordinal != (uint32_t)Queue->Device ->QueueGroup @@ -2096,7 +2104,7 @@ void ur_command_list_info_t::append(ur_event_handle_t Event) { } ur_command_list_ptr_t -ur_queue_handle_legacy_t_::eventOpenCommandList(ur_event_handle_t Event) { +ur_queue_handle_t_::eventOpenCommandList(ur_event_handle_t Event) { using IsCopy = bool; if (UsingImmCmdLists) { @@ -2121,15 +2129,32 @@ ur_queue_handle_legacy_t_::eventOpenCommandList(ur_event_handle_t Event) { return CommandListMap.end(); } -ur_queue_handle_legacy_t_::ur_queue_group_t & -ur_queue_handle_legacy_t_::getQueueGroup(bool UseCopyEngine) { +void ur_queue_handle_t_::ur_queue_group_t::setImmCmdList( + ur_queue_handle_t queue, ze_command_list_handle_t ZeCommandList) { + // An immediate command list was given to us but we don't have the queue + // descriptor information. Create a dummy and note that it is not recycleable. + ZeStruct ZeQueueDesc; + + ImmCmdLists = std::vector( + 1, + Queue->CommandListMap + .insert(std::pair{ + ZeCommandList, + ur_command_list_info_t(nullptr, true, false, nullptr, ZeQueueDesc, + queue->useCompletionBatching(), false, + false, true)}) + .first); +} + +ur_queue_handle_t_::ur_queue_group_t & +ur_queue_handle_t_::getQueueGroup(bool UseCopyEngine) { auto &Map = (UseCopyEngine ? CopyQueueGroupsByTID : ComputeQueueGroupsByTID); return Map.get(); } // Return the index of the next queue to use based on a // round robin strategy and the queue group ordinal. -uint32_t ur_queue_handle_legacy_t_::ur_queue_group_t::getQueueIndex( +uint32_t ur_queue_handle_t_::ur_queue_group_t::getQueueIndex( uint32_t *QueueGroupOrdinal, uint32_t *QueueIndex, bool QueryOnly) { auto CurrentIndex = NextIndex; @@ -2163,8 +2188,7 @@ uint32_t ur_queue_handle_legacy_t_::ur_queue_group_t::getQueueIndex( // This function will return one of possibly multiple available native // queues and the value of the queue group ordinal. ze_command_queue_handle_t & -ur_queue_handle_legacy_t_::ur_queue_group_t::getZeQueue( - uint32_t *QueueGroupOrdinal) { +ur_queue_handle_t_::ur_queue_group_t::getZeQueue(uint32_t *QueueGroupOrdinal) { // QueueIndex is the proper L0 index. // Index is the plugins concept of index, with main and link copy engines in @@ -2209,7 +2233,7 @@ ur_queue_handle_legacy_t_::ur_queue_group_t::getZeQueue( return ZeQueue; } -int32_t ur_queue_handle_legacy_t_::ur_queue_group_t::getCmdQueueOrdinal( +int32_t ur_queue_handle_t_::ur_queue_group_t::getCmdQueueOrdinal( ze_command_queue_handle_t CmdQueue) { // Find out the right queue group ordinal (first queue might be "main" or // "link") @@ -2221,7 +2245,7 @@ int32_t ur_queue_handle_legacy_t_::ur_queue_group_t::getCmdQueueOrdinal( return Queue->Device->QueueGroup[QueueType].ZeOrdinal; } -bool ur_queue_handle_legacy_t_::useCompletionBatching() { +bool ur_queue_handle_t_::useCompletionBatching() { static bool enabled = getenv_tobool( "UR_L0_IMMEDIATE_COMMANDLISTS_BATCH_EVENT_COMPLETIONS", false); return enabled && !isInOrderQueue() && UsingImmCmdLists; @@ -2231,7 +2255,7 @@ bool ur_queue_handle_legacy_t_::useCompletionBatching() { // fence tracking its completion. This command list & fence are added to the // map of command lists in this queue with ZeFenceInUse = false. // The caller must hold a lock of the queue already. -ur_result_t ur_queue_handle_legacy_t_::createCommandList( +ur_result_t ur_queue_handle_t_::createCommandList( bool UseCopyEngine, ur_command_list_ptr_t &CommandList, ze_command_queue_handle_t *ForcedCmdQueue) { @@ -2274,8 +2298,8 @@ ur_result_t ur_queue_handle_legacy_t_::createCommandList( } ur_result_t -ur_queue_handle_legacy_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList, - bool UseCopyEngine) { +ur_queue_handle_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList, + bool UseCopyEngine) { // Early exit if there are no active barriers. if (ActiveBarriers.empty()) return UR_RESULT_SUCCESS; @@ -2284,7 +2308,7 @@ ur_queue_handle_legacy_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList, _ur_ze_event_list_t ActiveBarriersWaitList; UR_CALL(ActiveBarriersWaitList.createAndRetainUrZeEventList( ActiveBarriers.vector().size(), ActiveBarriers.vector().data(), - reinterpret_cast(this), UseCopyEngine)); + reinterpret_cast(this), UseCopyEngine)); // We can now replace active barriers with the ones in the wait list. UR_CALL(ActiveBarriers.clear()); @@ -2300,7 +2324,7 @@ ur_queue_handle_legacy_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList, ur_event_handle_t Event = nullptr; if (auto Res = createEventAndAssociateQueue( - reinterpret_cast(this), &Event, + reinterpret_cast(this), &Event, UR_EXT_COMMAND_TYPE_USER, CmdList, /* IsInternal */ true, /* IsMultiDevice */ true)) return Res; @@ -2316,7 +2340,7 @@ ur_queue_handle_legacy_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList, return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::insertStartBarrierIfDiscardEventsMode( +ur_result_t ur_queue_handle_t_::insertStartBarrierIfDiscardEventsMode( ur_command_list_ptr_t &CmdList) { // If current command list is different from the last command list then insert // a barrier waiting for the last command event. @@ -2342,7 +2366,7 @@ static const bool UseCopyEngineForInOrderQueue = [] { (std::stoi(CopyEngineForInOrderQueue) != 0)); }(); -bool ur_queue_handle_legacy_t_::useCopyEngine(bool PreferCopyEngine) const { +bool ur_queue_handle_t_::useCopyEngine(bool PreferCopyEngine) const { auto InitialCopyGroup = CopyQueueGroupsByTID.begin()->second; return PreferCopyEngine && InitialCopyGroup.ZeQueues.size() > 0 && (!isInOrderQueue() || UseCopyEngineForInOrderQueue); @@ -2350,8 +2374,7 @@ bool ur_queue_handle_legacy_t_::useCopyEngine(bool PreferCopyEngine) const { // This function will return one of po6ssibly multiple available // immediate commandlists associated with this Queue. -ur_command_list_ptr_t & -ur_queue_handle_legacy_t_::ur_queue_group_t::getImmCmdList() { +ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { uint32_t QueueIndex, QueueOrdinal; auto Index = getQueueIndex(&QueueOrdinal, &QueueIndex); @@ -2363,6 +2386,7 @@ ur_queue_handle_legacy_t_::ur_queue_group_t::getImmCmdList() { ZeCommandQueueDesc.ordinal = QueueOrdinal; ZeCommandQueueDesc.index = QueueIndex; ZeCommandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + bool isInOrderList = false; const char *Priority = "Normal"; if (Queue->isPriorityLow()) { ZeCommandQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW; @@ -2378,6 +2402,7 @@ ur_queue_handle_legacy_t_::ur_queue_group_t::getImmCmdList() { } if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) { + isInOrderList = true; ZeCommandQueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_IN_ORDER; } @@ -2426,7 +2451,7 @@ ur_queue_handle_legacy_t_::ur_queue_group_t::getImmCmdList() { ZeCommandList, ur_command_list_info_t( nullptr, true, false, nullptr, ZeCommandQueueDesc, - Queue->useCompletionBatching(), true, false, true)}) + Queue->useCompletionBatching(), true, isInOrderList, true)}) .first; return ImmCmdLists[Index]; @@ -2455,7 +2480,7 @@ static const size_t ImmCmdListsEventCleanupThreshold = [] { return Threshold; }(); -size_t ur_queue_handle_legacy_t_::getImmdCmmdListsEventCleanupThreshold() { +size_t ur_queue_handle_t_::getImmdCmmdListsEventCleanupThreshold() { return useCompletionBatching() ? CompletionEventsPerBatch : ImmCmdListsEventCleanupThreshold; } diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 97ddcf014c..699d7ec960 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -20,19 +20,15 @@ #include #include -#include +#include #include #include #include "common.hpp" #include "device.hpp" -#include "queue_api.hpp" - -struct ur_queue_handle_legacy_t_; -using ur_queue_handle_legacy_t = ur_queue_handle_legacy_t_ *; extern "C" { -ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue); +ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue); } // extern "C" struct ur_completion_batch; @@ -74,8 +70,7 @@ struct ur_completion_batch { // Seals the event batch and appends a barrier to the command list. // Adding any further events after this, but before reset, is undefined. - ur_result_t seal(ur_queue_handle_legacy_t queue, - ze_command_list_handle_t cmdlist); + ur_result_t seal(ur_queue_handle_t queue, ze_command_list_handle_t cmdlist); // Resets a complete batch back to an empty state. Cleanups internal state // but keeps allocated resources for reuse. @@ -117,7 +112,7 @@ struct ur_completion_batches { // returned to indicate that there are no batches available. // This is safe, but will increase how many events are associated // with the active batch. - ur_result_t tryCleanup(ur_queue_handle_legacy_t queue, + ur_result_t tryCleanup(ur_queue_handle_t queue, ze_command_list_handle_t cmdlist, std::vector &EventList, std::vector &EventListToCleanup); @@ -154,10 +149,10 @@ struct ur_completion_batches { ur_completion_batch_it active; }; -ur_result_t resetCommandLists(ur_queue_handle_legacy_t Queue); +ur_result_t resetCommandLists(ur_queue_handle_t Queue); ur_result_t -CleanupEventsInImmCmdLists(ur_queue_handle_legacy_t UrQueue, - bool QueueLocked = false, bool QueueSynced = false, +CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, bool QueueLocked = false, + bool QueueSynced = false, ur_event_handle_t CompletedEvent = nullptr); // Structure describing the specific use of a command-list in a queue. @@ -208,7 +203,7 @@ struct ur_command_list_info_t { bool IsImmediate; // Helper functions to tell if this is a copy command-list. - bool isCopy(ur_queue_handle_legacy_t Queue) const; + bool isCopy(ur_queue_handle_t Queue) const; // An optional event completion batching mechanism for out-of-order immediate // command lists. @@ -230,209 +225,23 @@ using ur_command_list_map_t = // The iterator pointing to a specific command-list in use. using ur_command_list_ptr_t = ur_command_list_map_t::iterator; -struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { - ur_queue_handle_legacy_t_( - std::vector &ComputeQueues, - std::vector &CopyQueues, - ur_context_handle_t Context, ur_device_handle_t Device, - bool OwnZeCommandQueue, ur_queue_flags_t Properties = 0, - int ForceComputeIndex = -1); - - ur_result_t queueGetInfo(ur_queue_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) override; - ur_result_t queueRetain() override; - ur_result_t queueRelease() override; - ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *pDesc, - ur_native_handle_t *phNativeQueue) override; - ur_result_t queueFinish() override; - ur_result_t queueFlush() override; - ur_result_t enqueueKernelLaunch(ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, - const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueEventsWait(uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t - enqueueEventsWaitWithBarrier(uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferRead(ur_mem_handle_t hBuffer, bool blockingRead, - size_t offset, size_t size, void *pDst, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferWrite(ur_mem_handle_t hBuffer, bool blockingWrite, - size_t offset, size_t size, - const void *pSrc, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferReadRect( - ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, - ur_rect_offset_t hostOrigin, ur_rect_region_t region, - size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, - size_t hostSlicePitch, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferWriteRect( - ur_mem_handle_t hBuffer, bool blockingWrite, - ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, - ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, - size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferCopy(ur_mem_handle_t hBufferSrc, - ur_mem_handle_t hBufferDst, size_t srcOffset, - size_t dstOffset, size_t size, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferCopyRect( - ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, - ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, - ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, - size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferFill(ur_mem_handle_t hBuffer, - const void *pPattern, size_t patternSize, - size_t offset, size_t size, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemImageRead(ur_mem_handle_t hImage, bool blockingRead, - ur_rect_offset_t origin, - ur_rect_region_t region, size_t rowPitch, - size_t slicePitch, void *pDst, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemImageWrite(ur_mem_handle_t hImage, bool blockingWrite, - ur_rect_offset_t origin, - ur_rect_region_t region, size_t rowPitch, - size_t slicePitch, void *pSrc, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t - enqueueMemImageCopy(ur_mem_handle_t hImageSrc, ur_mem_handle_t hImageDst, - ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, - ur_rect_region_t region, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferMap(ur_mem_handle_t hBuffer, bool blockingMap, - ur_map_flags_t mapFlags, size_t offset, - size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent, - void **ppRetMap) override; - ur_result_t enqueueMemUnmap(ur_mem_handle_t hMem, void *pMappedPtr, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueUSMFill(void *pMem, size_t patternSize, - const void *pPattern, size_t size, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueUSMMemcpy(bool blocking, void *pDst, const void *pSrc, - size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueUSMFill2D(void *, size_t, size_t, const void *, size_t, - size_t, uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) override; - ur_result_t enqueueUSMMemcpy2D(bool, void *, size_t, const void *, size_t, - size_t, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) override; - ur_result_t enqueueUSMPrefetch(const void *pMem, size_t size, - ur_usm_migration_flags_t flags, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueUSMAdvise(const void *pMem, size_t size, - ur_usm_advice_flags_t advice, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueDeviceGlobalVariableWrite( - ur_program_handle_t hProgram, const char *name, bool blockingWrite, - size_t count, size_t offset, const void *pSrc, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueDeviceGlobalVariableRead( - ur_program_handle_t hProgram, const char *name, bool blockingRead, - size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueReadHostPipe(ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, - void *pDst, size_t size, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueWriteHostPipe(ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, - void *pSrc, size_t size, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t bindlessImagesImageCopyExp( - const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, - const ur_image_desc_t *pDstImageDesc, - const ur_image_format_t *pSrcImageFormat, - const ur_image_format_t *pDstImageFormat, - ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t bindlessImagesWaitExternalSemaphoreExp( - ur_exp_external_semaphore_handle_t hSemaphore, bool hasWaitValue, - uint64_t waitValue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t bindlessImagesSignalExternalSemaphoreExp( - ur_exp_external_semaphore_handle_t hSemaphore, bool hasSignalValue, - uint64_t signalValue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueCooperativeKernelLaunchExp( - ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t - enqueueTimestampRecordingExp(bool blocking, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueKernelLaunchCustomExp( - ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, - uint32_t numPropsInLaunchPropList, - const ur_exp_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t - enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *, - uint32_t, const ur_mem_handle_t *, - const ur_exp_enqueue_native_command_properties_t *, - uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) override; +struct ur_queue_handle_t_ : _ur_object { + ur_queue_handle_t_(std::vector &ComputeQueues, + std::vector &CopyQueues, + ur_context_handle_t Context, ur_device_handle_t Device, + bool OwnZeCommandQueue, ur_queue_flags_t Properties = 0, + int ForceComputeIndex = -1); using queue_type = ur_device_handle_t_::queue_group_info_t::type; // PI queue is in general a one to many mapping to L0 native queues. struct ur_queue_group_t { - ur_queue_handle_legacy_t Queue; + ur_queue_handle_t Queue; ur_queue_group_t() = delete; // The Queue argument captures the enclosing PI queue. // The Type argument specifies the type of this queue group. // The actual ZeQueues are populated at PI queue construction. - ur_queue_group_t(ur_queue_handle_legacy_t Queue, queue_type Type) + ur_queue_group_t(ur_queue_handle_t Queue, queue_type Type) : Queue(Queue), Type(Type) {} // The type of the queue group. @@ -462,8 +271,7 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { ze_command_queue_handle_t &getZeQueue(uint32_t *QueueGroupOrdinal); // This function sets an immediate commandlist from the interop interface. - void setImmCmdList(ur_queue_handle_legacy_t queue, - ze_command_list_handle_t); + void setImmCmdList(ur_queue_handle_t queue, ze_command_list_handle_t); // This function returns the next immediate commandlist to use. ur_command_list_ptr_t &getImmCmdList(); @@ -530,15 +338,15 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { pi_queue_group_by_tid_t CopyQueueGroupsByTID; // Keeps the PI context to which this queue belongs. - // This field is only set at ur_queue_handle_legacy_t_ creation time, and + // This field is only set at ur_queue_handle_t_ creation time, and // cannot change. Therefore it can be accessed without holding a lock on this - // ur_queue_handle_legacy_t_. + // ur_queue_handle_t_. const ur_context_handle_t Context; // Keeps the PI device to which this queue belongs. - // This field is only set at ur_queue_handle_legacy_t_ creation time, and + // This field is only set at ur_queue_handle_t_ creation time, and // cannot change. Therefore it can be accessed without holding a lock on this - // ur_queue_handle_legacy_t_. + // ur_queue_handle_t_. const ur_device_handle_t Device; // A queue may use either standard or immediate commandlists. At queue @@ -881,21 +689,10 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { // Threshold for cleaning up the EventList for immediate command lists. size_t getImmdCmmdListsEventCleanupThreshold(); -}; - -template QueueT GetQueue(ur_queue_handle_t Queue) { - if (!Queue) - return nullptr; - auto *Q = dynamic_cast(Queue); - if (!Q) { - throw UR_RESULT_ERROR_INVALID_QUEUE; - } - return Q; -} -static inline ur_queue_handle_legacy_t Legacy(ur_queue_handle_t Queue) { - return GetQueue(Queue); -} + // Pointer to the unified handle. + ur_queue_handle_t_ *UnifiedHandle; +}; // This helper function creates a ur_event_handle_t and associate a // ur_queue_handle_t. Note that the caller of this function must have acquired @@ -910,18 +707,16 @@ static inline ur_queue_handle_legacy_t Legacy(ur_queue_handle_t Queue) { // multiple devices. // \param ForceHostVisible tells if the event must be created in // the host-visible pool -ur_result_t -createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, - ur_event_handle_t *Event, ur_command_t CommandType, - ur_command_list_ptr_t CommandList, bool IsInternal, - bool IsMultiDevice, - std::optional HostVisible = std::nullopt); +ur_result_t createEventAndAssociateQueue( + ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType, + ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice, + std::optional HostVisible = std::nullopt); // This helper function checks to see if an event for a command can be included // at the end of a command list batch. This will only be true if the event does // not have dependencies or the dependencies are not for events which exist in // this batch. -bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, +bool eventCanBeBatched(ur_queue_handle_t Queue, bool UseCopyEngine, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList); @@ -930,7 +725,7 @@ bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, // dependencies, then this command can be enqueued without a signal event set in // a command list batch. The signal event will be appended at the end of the // batch to be signalled at the end of the command list. -ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, +ur_result_t setSignalEvent(ur_queue_handle_t Queue, bool UseCopyEngine, ze_event_handle_t *ZeEvent, ur_event_handle_t *Event, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, diff --git a/source/adapters/level_zero/queue_api.cpp b/source/adapters/level_zero/queue_api.cpp deleted file mode 100644 index 188f7c3102..0000000000 --- a/source/adapters/level_zero/queue_api.cpp +++ /dev/null @@ -1,323 +0,0 @@ -/* - * - * Copyright (C) 2024 Intel Corporation - * - * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM - * Exceptions. See LICENSE.TXT - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * @file queue_api.cpp - * - */ - -#include "queue_api.hpp" - -ur_queue_handle_t_::~ur_queue_handle_t_() {} - -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, - ur_queue_info_t propName, - size_t propSize, - void *pPropValue, - size_t *pPropSizeRet) { - return hQueue->queueGetInfo(propName, propSize, pPropValue, pPropSizeRet); -} -UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { - return hQueue->queueRetain(); -} -UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { - return hQueue->queueRelease(); -} -UR_APIEXPORT ur_result_t UR_APICALL -urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc, - ur_native_handle_t *phNativeQueue) { - return hQueue->queueGetNativeHandle(pDesc, phNativeQueue); -} -UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { - return hQueue->queueFinish(); -} -UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { - return hQueue->queueFlush(); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueKernelLaunch( - hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, - numEventsInWaitList, phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( - ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueEventsWait(numEventsInWaitList, phEventWaitList, - phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( - ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueEventsWaitWithBarrier(numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, - size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferRead(hBuffer, blockingRead, offset, size, pDst, - numEventsInWaitList, phEventWaitList, - phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, - size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferWrite(hBuffer, blockingWrite, offset, size, - pSrc, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, - ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, - ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, - size_t hostRowPitch, size_t hostSlicePitch, void *pDst, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferReadRect( - hBuffer, blockingRead, bufferOrigin, hostOrigin, region, bufferRowPitch, - bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, - ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, - ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, - size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferWriteRect( - hBuffer, blockingWrite, bufferOrigin, hostOrigin, region, bufferRowPitch, - bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, - ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferCopy(hBufferSrc, hBufferDst, srcOffset, - dstOffset, size, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, - ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, - ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, - size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferCopyRect( - hBufferSrc, hBufferDst, srcOrigin, dstOrigin, region, srcRowPitch, - srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern, - size_t patternSize, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferFill(hBuffer, pPattern, patternSize, offset, - size, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( - ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, - ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, - size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueMemImageRead( - hImage, blockingRead, origin, region, rowPitch, slicePitch, pDst, - numEventsInWaitList, phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( - ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, - ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, - size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueMemImageWrite( - hImage, blockingWrite, origin, region, rowPitch, slicePitch, pSrc, - numEventsInWaitList, phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, - ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, - ur_rect_offset_t dstOrigin, ur_rect_region_t region, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemImageCopy(hImageSrc, hImageDst, srcOrigin, dstOrigin, - region, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap, - ur_map_flags_t mapFlags, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent, void **ppRetMap) { - return hQueue->enqueueMemBufferMap(hBuffer, blockingMap, mapFlags, offset, - size, numEventsInWaitList, phEventWaitList, - phEvent, ppRetMap); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( - ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemUnmap(hMem, pMappedPtr, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( - ur_queue_handle_t hQueue, void *pMem, size_t patternSize, - const void *pPattern, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueUSMFill(pMem, patternSize, pPattern, size, - numEventsInWaitList, phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( - ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, - size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueUSMMemcpy(blocking, pDst, pSrc, size, - numEventsInWaitList, phEventWaitList, - phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueUSMPrefetch(pMem, size, flags, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL -urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { - return hQueue->enqueueUSMAdvise(pMem, size, advice, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( - ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize, - const void *pPattern, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueUSMFill2D(pMem, pitch, patternSize, pPattern, width, - height, numEventsInWaitList, phEventWaitList, - phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( - ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, - const void *pSrc, size_t srcPitch, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueUSMMemcpy2D(blocking, pDst, dstPitch, pSrc, srcPitch, - width, height, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, - bool blockingWrite, size_t count, size_t offset, const void *pSrc, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueDeviceGlobalVariableWrite( - hProgram, name, blockingWrite, count, offset, pSrc, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, - bool blockingRead, size_t count, size_t offset, void *pDst, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueDeviceGlobalVariableRead( - hProgram, name, blockingRead, count, offset, pDst, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pDst, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueReadHostPipe(hProgram, pipe_symbol, blocking, pDst, - size, numEventsInWaitList, phEventWaitList, - phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pSrc, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueWriteHostPipe(hProgram, pipe_symbol, blocking, pSrc, - size, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( - ur_queue_handle_t hQueue, const void *pSrc, void *pDst, - const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, - const ur_image_format_t *pSrcImageFormat, - const ur_image_format_t *pDstImageFormat, - ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->bindlessImagesImageCopyExp( - pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( - ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, - bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->bindlessImagesWaitExternalSemaphoreExp( - hSemaphore, hasWaitValue, waitValue, numEventsInWaitList, phEventWaitList, - phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( - ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, - bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->bindlessImagesSignalExternalSemaphoreExp( - hSemaphore, hasSignalValue, signalValue, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueCooperativeKernelLaunchExp( - hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, - numEventsInWaitList, phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( - ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueTimestampRecordingExp(blocking, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, - uint32_t numPropsInLaunchPropList, - const ur_exp_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueKernelLaunchCustomExp( - hKernel, workDim, pGlobalWorkSize, pLocalWorkSize, - numPropsInLaunchPropList, launchPropList, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( - ur_queue_handle_t hQueue, - ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, - uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, - const ur_exp_enqueue_native_command_properties_t *pProperties, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueNativeCommandExp( - pfnNativeEnqueue, data, numMemsInMemList, phMemList, pProperties, - numEventsInWaitList, phEventWaitList, phEvent); -} diff --git a/source/adapters/level_zero/queue_api.hpp b/source/adapters/level_zero/queue_api.hpp deleted file mode 100644 index bc01596d2b..0000000000 --- a/source/adapters/level_zero/queue_api.hpp +++ /dev/null @@ -1,153 +0,0 @@ -/* - * - * Copyright (C) 2024 Intel Corporation - * - * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM - * Exceptions. See LICENSE.TXT - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * @file queue_api.hpp - * - */ - -#pragma once - -#include - -struct ur_queue_handle_t_ { - virtual ~ur_queue_handle_t_(); - virtual ur_result_t queueGetInfo(ur_queue_info_t, size_t, void *, - size_t *) = 0; - virtual ur_result_t queueRetain() = 0; - virtual ur_result_t queueRelease() = 0; - virtual ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *, - ur_native_handle_t *) = 0; - virtual ur_result_t queueFinish() = 0; - virtual ur_result_t queueFlush() = 0; - virtual ur_result_t enqueueKernelLaunch(ur_kernel_handle_t, uint32_t, - const size_t *, const size_t *, - const size_t *, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueEventsWait(uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueEventsWaitWithBarrier(uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemBufferRead(ur_mem_handle_t, bool, size_t, - size_t, void *, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemBufferWrite(ur_mem_handle_t, bool, size_t, - size_t, const void *, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t - enqueueMemBufferReadRect(ur_mem_handle_t, bool, ur_rect_offset_t, - ur_rect_offset_t, ur_rect_region_t, size_t, size_t, - size_t, size_t, void *, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t - enqueueMemBufferWriteRect(ur_mem_handle_t, bool, ur_rect_offset_t, - ur_rect_offset_t, ur_rect_region_t, size_t, size_t, - size_t, size_t, void *, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemBufferCopy(ur_mem_handle_t, ur_mem_handle_t, - size_t, size_t, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t - enqueueMemBufferCopyRect(ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, - ur_rect_offset_t, ur_rect_region_t, size_t, size_t, - size_t, size_t, uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemBufferFill(ur_mem_handle_t, const void *, - size_t, size_t, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemImageRead(ur_mem_handle_t, bool, - ur_rect_offset_t, ur_rect_region_t, - size_t, size_t, void *, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemImageWrite(ur_mem_handle_t, bool, - ur_rect_offset_t, ur_rect_region_t, - size_t, size_t, void *, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemImageCopy(ur_mem_handle_t, ur_mem_handle_t, - ur_rect_offset_t, ur_rect_offset_t, - ur_rect_region_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemBufferMap(ur_mem_handle_t, bool, ur_map_flags_t, - size_t, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *, void **) = 0; - virtual ur_result_t enqueueMemUnmap(ur_mem_handle_t, void *, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueUSMFill(void *, size_t, const void *, size_t, - uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueUSMMemcpy(bool, void *, const void *, size_t, - uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueUSMPrefetch(const void *, size_t, - ur_usm_migration_flags_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueUSMAdvise(const void *, size_t, - ur_usm_advice_flags_t, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueUSMFill2D(void *, size_t, size_t, const void *, - size_t, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueUSMMemcpy2D(bool, void *, size_t, const void *, - size_t, size_t, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueDeviceGlobalVariableWrite( - ur_program_handle_t, const char *, bool, size_t, size_t, const void *, - uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t enqueueDeviceGlobalVariableRead( - ur_program_handle_t, const char *, bool, size_t, size_t, void *, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t enqueueReadHostPipe(ur_program_handle_t, const char *, - bool, void *, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueWriteHostPipe(ur_program_handle_t, const char *, - bool, void *, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t bindlessImagesImageCopyExp( - const void *, void *, const ur_image_desc_t *, const ur_image_desc_t *, - const ur_image_format_t *, const ur_image_format_t *, - ur_exp_image_copy_region_t *, ur_exp_image_copy_flags_t, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t bindlessImagesWaitExternalSemaphoreExp( - ur_exp_external_semaphore_handle_t, bool, uint64_t, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t bindlessImagesSignalExternalSemaphoreExp( - ur_exp_external_semaphore_handle_t, bool, uint64_t, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t enqueueCooperativeKernelLaunchExp( - ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, - const size_t *, uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueTimestampRecordingExp(bool, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueKernelLaunchCustomExp( - ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, uint32_t, - const ur_exp_launch_property_t *, uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t - enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *, - uint32_t, const ur_mem_handle_t *, - const ur_exp_enqueue_native_command_properties_t *, - uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; -}; diff --git a/source/adapters/level_zero/sampler.cpp b/source/adapters/level_zero/sampler.cpp index 54ca1b6672..d48e6aeede 100644 --- a/source/adapters/level_zero/sampler.cpp +++ b/source/adapters/level_zero/sampler.cpp @@ -12,7 +12,9 @@ #include "logger/ur_logger.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreate( +namespace ur::level_zero { + +ur_result_t urSamplerCreate( ur_context_handle_t Context, ///< [in] handle of the context object const ur_sampler_desc_t *Props, ///< [in] specifies a list of sampler property names and their @@ -109,17 +111,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerRetain( - ur_sampler_handle_t - Sampler ///< [in] handle of the sampler object to get access +ur_result_t +urSamplerRetain(ur_sampler_handle_t + Sampler ///< [in] handle of the sampler object to get access ) { Sampler->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease( - ur_sampler_handle_t - Sampler ///< [in] handle of the sampler object to release +ur_result_t +urSamplerRelease(ur_sampler_handle_t + Sampler ///< [in] handle of the sampler object to release ) { if (!Sampler->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -133,7 +135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo( +ur_result_t urSamplerGetInfo( ur_sampler_handle_t Sampler, ///< [in] handle of the sampler object ur_sampler_info_t PropName, ///< [in] name of the sampler property to query size_t PropValueSize, ///< [in] size in bytes of the sampler property value @@ -152,7 +154,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( +ur_result_t urSamplerGetNativeHandle( ur_sampler_handle_t Sampler, ///< [in] handle of the sampler. ur_native_handle_t *NativeSampler ///< [out] a pointer to the native ///< handle of the sampler. @@ -164,7 +166,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( +ur_result_t urSamplerCreateWithNativeHandle( ur_native_handle_t NativeSampler, ///< [in] the native handle of the sampler. ur_context_handle_t Context, ///< [in] handle of the context object @@ -182,3 +184,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 8941f756ea..9bdd672818 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -1,19 +1,19 @@ -//===--------- ur_interface_loader.cpp - Level Zero Adapter----------------===// +//===--------- ur_interface_loader.cpp - Level Zero Adapter ------------===// // -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2024 Intel Corporation // // Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM // Exceptions. See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - #include #include -namespace { +#include "ur_interface_loader.hpp" -ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { +static ur_result_t validateProcInputs(ur_api_version_t version, + void *pDdiTable) { if (nullptr == pDdiTable) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } @@ -24,475 +24,592 @@ ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { } return UR_RESULT_SUCCESS; } -} // namespace -#if defined(__cplusplus) +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +namespace ur::level_zero { +#elif defined(__cplusplus) extern "C" { #endif -UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_global_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( + ur_api_version_t version, ur_global_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnAdapterGet = urAdapterGet; - pDdiTable->pfnAdapterRelease = urAdapterRelease; - pDdiTable->pfnAdapterRetain = urAdapterRetain; - pDdiTable->pfnAdapterGetLastError = urAdapterGetLastError; - pDdiTable->pfnAdapterGetInfo = urAdapterGetInfo; - return retVal; + pDdiTable->pfnAdapterGet = ur::level_zero::urAdapterGet; + pDdiTable->pfnAdapterRelease = ur::level_zero::urAdapterRelease; + pDdiTable->pfnAdapterRetain = ur::level_zero::urAdapterRetain; + pDdiTable->pfnAdapterGetLastError = ur::level_zero::urAdapterGetLastError; + pDdiTable->pfnAdapterGetInfo = ur::level_zero::urAdapterGetInfo; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_context_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( + ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urContextCreate; - pDdiTable->pfnRetain = urContextRetain; - pDdiTable->pfnRelease = urContextRelease; - pDdiTable->pfnGetInfo = urContextGetInfo; - pDdiTable->pfnGetNativeHandle = urContextGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urContextCreateWithNativeHandle; - pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter; + pDdiTable->pfnUnsampledImageHandleDestroyExp = + ur::level_zero::urBindlessImagesUnsampledImageHandleDestroyExp; + pDdiTable->pfnSampledImageHandleDestroyExp = + ur::level_zero::urBindlessImagesSampledImageHandleDestroyExp; + pDdiTable->pfnImageAllocateExp = + ur::level_zero::urBindlessImagesImageAllocateExp; + pDdiTable->pfnImageFreeExp = ur::level_zero::urBindlessImagesImageFreeExp; + pDdiTable->pfnUnsampledImageCreateExp = + ur::level_zero::urBindlessImagesUnsampledImageCreateExp; + pDdiTable->pfnSampledImageCreateExp = + ur::level_zero::urBindlessImagesSampledImageCreateExp; + pDdiTable->pfnImageCopyExp = ur::level_zero::urBindlessImagesImageCopyExp; + pDdiTable->pfnImageGetInfoExp = + ur::level_zero::urBindlessImagesImageGetInfoExp; + pDdiTable->pfnMipmapGetLevelExp = + ur::level_zero::urBindlessImagesMipmapGetLevelExp; + pDdiTable->pfnMipmapFreeExp = ur::level_zero::urBindlessImagesMipmapFreeExp; + pDdiTable->pfnImportExternalMemoryExp = + ur::level_zero::urBindlessImagesImportExternalMemoryExp; + pDdiTable->pfnMapExternalArrayExp = + ur::level_zero::urBindlessImagesMapExternalArrayExp; + pDdiTable->pfnMapExternalLinearMemoryExp = + ur::level_zero::urBindlessImagesMapExternalLinearMemoryExp; + pDdiTable->pfnReleaseExternalMemoryExp = + ur::level_zero::urBindlessImagesReleaseExternalMemoryExp; + pDdiTable->pfnImportExternalSemaphoreExp = + ur::level_zero::urBindlessImagesImportExternalSemaphoreExp; + pDdiTable->pfnReleaseExternalSemaphoreExp = + ur::level_zero::urBindlessImagesReleaseExternalSemaphoreExp; + pDdiTable->pfnWaitExternalSemaphoreExp = + ur::level_zero::urBindlessImagesWaitExternalSemaphoreExp; + pDdiTable->pfnSignalExternalSemaphoreExp = + ur::level_zero::urBindlessImagesSignalExternalSemaphoreExp; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_enqueue_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( + ur_api_version_t version, ur_command_buffer_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch; - pDdiTable->pfnEventsWait = urEnqueueEventsWait; - pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier; - pDdiTable->pfnMemBufferRead = urEnqueueMemBufferRead; - pDdiTable->pfnMemBufferWrite = urEnqueueMemBufferWrite; - pDdiTable->pfnMemBufferReadRect = urEnqueueMemBufferReadRect; - pDdiTable->pfnMemBufferWriteRect = urEnqueueMemBufferWriteRect; - pDdiTable->pfnMemBufferCopy = urEnqueueMemBufferCopy; - pDdiTable->pfnMemBufferCopyRect = urEnqueueMemBufferCopyRect; - pDdiTable->pfnMemBufferFill = urEnqueueMemBufferFill; - pDdiTable->pfnMemImageRead = urEnqueueMemImageRead; - pDdiTable->pfnMemImageWrite = urEnqueueMemImageWrite; - pDdiTable->pfnMemImageCopy = urEnqueueMemImageCopy; - pDdiTable->pfnMemBufferMap = urEnqueueMemBufferMap; - pDdiTable->pfnMemUnmap = urEnqueueMemUnmap; - pDdiTable->pfnUSMFill = urEnqueueUSMFill; - pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy; - pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch; - pDdiTable->pfnUSMAdvise = urEnqueueUSMAdvise; - pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D; - pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D; - pDdiTable->pfnDeviceGlobalVariableWrite = urEnqueueDeviceGlobalVariableWrite; - pDdiTable->pfnDeviceGlobalVariableRead = urEnqueueDeviceGlobalVariableRead; - - return retVal; + pDdiTable->pfnCreateExp = ur::level_zero::urCommandBufferCreateExp; + pDdiTable->pfnRetainExp = ur::level_zero::urCommandBufferRetainExp; + pDdiTable->pfnReleaseExp = ur::level_zero::urCommandBufferReleaseExp; + pDdiTable->pfnFinalizeExp = ur::level_zero::urCommandBufferFinalizeExp; + pDdiTable->pfnAppendKernelLaunchExp = + ur::level_zero::urCommandBufferAppendKernelLaunchExp; + pDdiTable->pfnAppendUSMMemcpyExp = + ur::level_zero::urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMFillExp = + ur::level_zero::urCommandBufferAppendUSMFillExp; + pDdiTable->pfnAppendMemBufferCopyExp = + ur::level_zero::urCommandBufferAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferWriteExp = + ur::level_zero::urCommandBufferAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferReadExp = + ur::level_zero::urCommandBufferAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + ur::level_zero::urCommandBufferAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + ur::level_zero::urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + ur::level_zero::urCommandBufferAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferFillExp = + ur::level_zero::urCommandBufferAppendMemBufferFillExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur::level_zero::urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur::level_zero::urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnEnqueueExp = ur::level_zero::urCommandBufferEnqueueExp; + pDdiTable->pfnRetainCommandExp = + ur::level_zero::urCommandBufferRetainCommandExp; + pDdiTable->pfnReleaseCommandExp = + ur::level_zero::urCommandBufferReleaseCommandExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur::level_zero::urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = ur::level_zero::urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = + ur::level_zero::urCommandBufferCommandGetInfoExp; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_event_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( + ur_api_version_t version, ur_context_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGetInfo = urEventGetInfo; - pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo; - pDdiTable->pfnWait = urEventWait; - pDdiTable->pfnRetain = urEventRetain; - pDdiTable->pfnRelease = urEventRelease; - pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle; - pDdiTable->pfnSetCallback = urEventSetCallback; - - return retVal; + + pDdiTable->pfnCreate = ur::level_zero::urContextCreate; + pDdiTable->pfnRetain = ur::level_zero::urContextRetain; + pDdiTable->pfnRelease = ur::level_zero::urContextRelease; + pDdiTable->pfnGetInfo = ur::level_zero::urContextGetInfo; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urContextGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urContextCreateWithNativeHandle; + pDdiTable->pfnSetExtendedDeleter = + ur::level_zero::urContextSetExtendedDeleter; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_kernel_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( + ur_api_version_t version, ur_enqueue_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urKernelCreate; - pDdiTable->pfnGetInfo = urKernelGetInfo; - pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo; - pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo; - pDdiTable->pfnRetain = urKernelRetain; - pDdiTable->pfnRelease = urKernelRelease; - pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle; - pDdiTable->pfnSetArgValue = urKernelSetArgValue; - pDdiTable->pfnSetArgLocal = urKernelSetArgLocal; - pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; - pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; - pDdiTable->pfnSetArgSampler = urKernelSetArgSampler; - pDdiTable->pfnSetArgMemObj = urKernelSetArgMemObj; - pDdiTable->pfnSetSpecializationConstants = urKernelSetSpecializationConstants; - pDdiTable->pfnGetSuggestedLocalWorkSize = urKernelGetSuggestedLocalWorkSize; - return retVal; + + pDdiTable->pfnKernelLaunch = ur::level_zero::urEnqueueKernelLaunch; + pDdiTable->pfnEventsWait = ur::level_zero::urEnqueueEventsWait; + pDdiTable->pfnEventsWaitWithBarrier = + ur::level_zero::urEnqueueEventsWaitWithBarrier; + pDdiTable->pfnMemBufferRead = ur::level_zero::urEnqueueMemBufferRead; + pDdiTable->pfnMemBufferWrite = ur::level_zero::urEnqueueMemBufferWrite; + pDdiTable->pfnMemBufferReadRect = ur::level_zero::urEnqueueMemBufferReadRect; + pDdiTable->pfnMemBufferWriteRect = + ur::level_zero::urEnqueueMemBufferWriteRect; + pDdiTable->pfnMemBufferCopy = ur::level_zero::urEnqueueMemBufferCopy; + pDdiTable->pfnMemBufferCopyRect = ur::level_zero::urEnqueueMemBufferCopyRect; + pDdiTable->pfnMemBufferFill = ur::level_zero::urEnqueueMemBufferFill; + pDdiTable->pfnMemImageRead = ur::level_zero::urEnqueueMemImageRead; + pDdiTable->pfnMemImageWrite = ur::level_zero::urEnqueueMemImageWrite; + pDdiTable->pfnMemImageCopy = ur::level_zero::urEnqueueMemImageCopy; + pDdiTable->pfnMemBufferMap = ur::level_zero::urEnqueueMemBufferMap; + pDdiTable->pfnMemUnmap = ur::level_zero::urEnqueueMemUnmap; + pDdiTable->pfnUSMFill = ur::level_zero::urEnqueueUSMFill; + pDdiTable->pfnUSMMemcpy = ur::level_zero::urEnqueueUSMMemcpy; + pDdiTable->pfnUSMPrefetch = ur::level_zero::urEnqueueUSMPrefetch; + pDdiTable->pfnUSMAdvise = ur::level_zero::urEnqueueUSMAdvise; + pDdiTable->pfnUSMFill2D = ur::level_zero::urEnqueueUSMFill2D; + pDdiTable->pfnUSMMemcpy2D = ur::level_zero::urEnqueueUSMMemcpy2D; + pDdiTable->pfnDeviceGlobalVariableWrite = + ur::level_zero::urEnqueueDeviceGlobalVariableWrite; + pDdiTable->pfnDeviceGlobalVariableRead = + ur::level_zero::urEnqueueDeviceGlobalVariableRead; + pDdiTable->pfnReadHostPipe = ur::level_zero::urEnqueueReadHostPipe; + pDdiTable->pfnWriteHostPipe = ur::level_zero::urEnqueueWriteHostPipe; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetMemProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_mem_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnImageCreate = urMemImageCreate; - pDdiTable->pfnBufferCreate = urMemBufferCreate; - pDdiTable->pfnRetain = urMemRetain; - pDdiTable->pfnRelease = urMemRelease; - pDdiTable->pfnBufferPartition = urMemBufferPartition; - pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle; - pDdiTable->pfnBufferCreateWithNativeHandle = - urMemBufferCreateWithNativeHandle; - pDdiTable->pfnImageCreateWithNativeHandle = urMemImageCreateWithNativeHandle; - pDdiTable->pfnGetInfo = urMemGetInfo; - pDdiTable->pfnImageGetInfo = urMemImageGetInfo; - return retVal; + pDdiTable->pfnKernelLaunchCustomExp = + ur::level_zero::urEnqueueKernelLaunchCustomExp; + pDdiTable->pfnCooperativeKernelLaunchExp = + ur::level_zero::urEnqueueCooperativeKernelLaunchExp; + pDdiTable->pfnTimestampRecordingExp = + ur::level_zero::urEnqueueTimestampRecordingExp; + pDdiTable->pfnNativeCommandExp = ur::level_zero::urEnqueueNativeCommandExp; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_platform_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( + ur_api_version_t version, ur_event_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGet = urPlatformGet; - pDdiTable->pfnGetInfo = urPlatformGetInfo; - pDdiTable->pfnGetNativeHandle = urPlatformGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urPlatformCreateWithNativeHandle; - pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion; - pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption; - - return retVal; -} -UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_program_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { + pDdiTable->pfnGetInfo = ur::level_zero::urEventGetInfo; + pDdiTable->pfnGetProfilingInfo = ur::level_zero::urEventGetProfilingInfo; + pDdiTable->pfnWait = ur::level_zero::urEventWait; + pDdiTable->pfnRetain = ur::level_zero::urEventRetain; + pDdiTable->pfnRelease = ur::level_zero::urEventRelease; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urEventGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urEventCreateWithNativeHandle; + pDdiTable->pfnSetCallback = ur::level_zero::urEventSetCallback; + + return result; +} - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( + ur_api_version_t version, ur_kernel_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreateWithIL = urProgramCreateWithIL; - pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary; - pDdiTable->pfnBuild = urProgramBuild; - pDdiTable->pfnCompile = urProgramCompile; - pDdiTable->pfnLink = urProgramLink; - pDdiTable->pfnRetain = urProgramRetain; - pDdiTable->pfnRelease = urProgramRelease; - pDdiTable->pfnGetFunctionPointer = urProgramGetFunctionPointer; - pDdiTable->pfnGetGlobalVariablePointer = urProgramGetGlobalVariablePointer; - pDdiTable->pfnGetInfo = urProgramGetInfo; - pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo; + + pDdiTable->pfnCreate = ur::level_zero::urKernelCreate; + pDdiTable->pfnGetInfo = ur::level_zero::urKernelGetInfo; + pDdiTable->pfnGetGroupInfo = ur::level_zero::urKernelGetGroupInfo; + pDdiTable->pfnGetSubGroupInfo = ur::level_zero::urKernelGetSubGroupInfo; + pDdiTable->pfnRetain = ur::level_zero::urKernelRetain; + pDdiTable->pfnRelease = ur::level_zero::urKernelRelease; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urKernelGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urKernelCreateWithNativeHandle; + pDdiTable->pfnGetSuggestedLocalWorkSize = + ur::level_zero::urKernelGetSuggestedLocalWorkSize; + pDdiTable->pfnSetArgValue = ur::level_zero::urKernelSetArgValue; + pDdiTable->pfnSetArgLocal = ur::level_zero::urKernelSetArgLocal; + pDdiTable->pfnSetArgPointer = ur::level_zero::urKernelSetArgPointer; + pDdiTable->pfnSetExecInfo = ur::level_zero::urKernelSetExecInfo; + pDdiTable->pfnSetArgSampler = ur::level_zero::urKernelSetArgSampler; + pDdiTable->pfnSetArgMemObj = ur::level_zero::urKernelSetArgMemObj; pDdiTable->pfnSetSpecializationConstants = - urProgramSetSpecializationConstants; - pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle; + ur::level_zero::urKernelSetSpecializationConstants; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_queue_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGetInfo = urQueueGetInfo; - pDdiTable->pfnCreate = urQueueCreate; - pDdiTable->pfnRetain = urQueueRetain; - pDdiTable->pfnRelease = urQueueRelease; - pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle; - pDdiTable->pfnFinish = urQueueFinish; - pDdiTable->pfnFlush = urQueueFlush; + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = + ur::level_zero::urKernelSuggestMaxCooperativeGroupCountExp; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_sampler_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL +urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urSamplerCreate; - pDdiTable->pfnRetain = urSamplerRetain; - pDdiTable->pfnRelease = urSamplerRelease; - pDdiTable->pfnGetInfo = urSamplerGetInfo; - pDdiTable->pfnGetNativeHandle = urSamplerGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle; - - return retVal; + + pDdiTable->pfnImageCreate = ur::level_zero::urMemImageCreate; + pDdiTable->pfnBufferCreate = ur::level_zero::urMemBufferCreate; + pDdiTable->pfnRetain = ur::level_zero::urMemRetain; + pDdiTable->pfnRelease = ur::level_zero::urMemRelease; + pDdiTable->pfnBufferPartition = ur::level_zero::urMemBufferPartition; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urMemGetNativeHandle; + pDdiTable->pfnBufferCreateWithNativeHandle = + ur::level_zero::urMemBufferCreateWithNativeHandle; + pDdiTable->pfnImageCreateWithNativeHandle = + ur::level_zero::urMemImageCreateWithNativeHandle; + pDdiTable->pfnGetInfo = ur::level_zero::urMemGetInfo; + pDdiTable->pfnImageGetInfo = ur::level_zero::urMemImageGetInfo; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_usm_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( + ur_api_version_t version, ur_physical_mem_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnHostAlloc = urUSMHostAlloc; - pDdiTable->pfnDeviceAlloc = urUSMDeviceAlloc; - pDdiTable->pfnSharedAlloc = urUSMSharedAlloc; - pDdiTable->pfnFree = urUSMFree; - pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo; - pDdiTable->pfnPoolCreate = urUSMPoolCreate; - pDdiTable->pfnPoolRetain = urUSMPoolRetain; - pDdiTable->pfnPoolRelease = urUSMPoolRelease; - pDdiTable->pfnPoolGetInfo = urUSMPoolGetInfo; - - return retVal; + pDdiTable->pfnCreate = ur::level_zero::urPhysicalMemCreate; + pDdiTable->pfnRetain = ur::level_zero::urPhysicalMemRetain; + pDdiTable->pfnRelease = ur::level_zero::urPhysicalMemRelease; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_device_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( + ur_api_version_t version, ur_platform_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGet = urDeviceGet; - pDdiTable->pfnGetInfo = urDeviceGetInfo; - pDdiTable->pfnRetain = urDeviceRetain; - pDdiTable->pfnRelease = urDeviceRelease; - pDdiTable->pfnPartition = urDevicePartition; - pDdiTable->pfnSelectBinary = urDeviceSelectBinary; - pDdiTable->pfnGetNativeHandle = urDeviceGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle; - pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps; - - return retVal; + + pDdiTable->pfnGet = ur::level_zero::urPlatformGet; + pDdiTable->pfnGetInfo = ur::level_zero::urPlatformGetInfo; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urPlatformGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urPlatformCreateWithNativeHandle; + pDdiTable->pfnGetApiVersion = ur::level_zero::urPlatformGetApiVersion; + pDdiTable->pfnGetBackendOption = ur::level_zero::urPlatformGetBackendOption; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_command_buffer_exp_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( + ur_api_version_t version, ur_program_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreateExp = urCommandBufferCreateExp; - pDdiTable->pfnRetainExp = urCommandBufferRetainExp; - pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; - pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; - pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; - pDdiTable->pfnAppendUSMFillExp = urCommandBufferAppendUSMFillExp; - pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; - pDdiTable->pfnAppendMemBufferCopyRectExp = - urCommandBufferAppendMemBufferCopyRectExp; - pDdiTable->pfnAppendMemBufferReadExp = urCommandBufferAppendMemBufferReadExp; - pDdiTable->pfnAppendMemBufferReadRectExp = - urCommandBufferAppendMemBufferReadRectExp; - pDdiTable->pfnAppendMemBufferWriteExp = - urCommandBufferAppendMemBufferWriteExp; - pDdiTable->pfnAppendMemBufferWriteRectExp = - urCommandBufferAppendMemBufferWriteRectExp; - pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; - pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; - pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; - pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; - pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; - pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; - pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; - pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; - pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; - - return retVal; + + pDdiTable->pfnCreateWithIL = ur::level_zero::urProgramCreateWithIL; + pDdiTable->pfnCreateWithBinary = ur::level_zero::urProgramCreateWithBinary; + pDdiTable->pfnBuild = ur::level_zero::urProgramBuild; + pDdiTable->pfnCompile = ur::level_zero::urProgramCompile; + pDdiTable->pfnLink = ur::level_zero::urProgramLink; + pDdiTable->pfnRetain = ur::level_zero::urProgramRetain; + pDdiTable->pfnRelease = ur::level_zero::urProgramRelease; + pDdiTable->pfnGetFunctionPointer = + ur::level_zero::urProgramGetFunctionPointer; + pDdiTable->pfnGetGlobalVariablePointer = + ur::level_zero::urProgramGetGlobalVariablePointer; + pDdiTable->pfnGetInfo = ur::level_zero::urProgramGetInfo; + pDdiTable->pfnGetBuildInfo = ur::level_zero::urProgramGetBuildInfo; + pDdiTable->pfnSetSpecializationConstants = + ur::level_zero::urProgramSetSpecializationConstants; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urProgramGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urProgramCreateWithNativeHandle; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( - ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( + ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnEnablePeerAccessExp = urUsmP2PEnablePeerAccessExp; - pDdiTable->pfnDisablePeerAccessExp = urUsmP2PDisablePeerAccessExp; - pDdiTable->pfnPeerAccessGetInfoExp = urUsmP2PPeerAccessGetInfoExp; - return retVal; + pDdiTable->pfnBuildExp = ur::level_zero::urProgramBuildExp; + pDdiTable->pfnCompileExp = ur::level_zero::urProgramCompileExp; + pDdiTable->pfnLinkExp = ur::level_zero::urProgramLinkExp; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( - ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( + ur_api_version_t version, ur_queue_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnUnsampledImageHandleDestroyExp = - urBindlessImagesUnsampledImageHandleDestroyExp; - pDdiTable->pfnSampledImageHandleDestroyExp = - urBindlessImagesSampledImageHandleDestroyExp; - pDdiTable->pfnImageAllocateExp = urBindlessImagesImageAllocateExp; - pDdiTable->pfnImageFreeExp = urBindlessImagesImageFreeExp; - pDdiTable->pfnUnsampledImageCreateExp = - urBindlessImagesUnsampledImageCreateExp; - pDdiTable->pfnSampledImageCreateExp = urBindlessImagesSampledImageCreateExp; - pDdiTable->pfnImageCopyExp = urBindlessImagesImageCopyExp; - pDdiTable->pfnImageGetInfoExp = urBindlessImagesImageGetInfoExp; - pDdiTable->pfnMipmapGetLevelExp = urBindlessImagesMipmapGetLevelExp; - pDdiTable->pfnMipmapFreeExp = urBindlessImagesMipmapFreeExp; - pDdiTable->pfnImportExternalMemoryExp = - urBindlessImagesImportExternalMemoryExp; - pDdiTable->pfnMapExternalArrayExp = urBindlessImagesMapExternalArrayExp; - pDdiTable->pfnMapExternalLinearMemoryExp = - urBindlessImagesMapExternalLinearMemoryExp; - pDdiTable->pfnReleaseExternalMemoryExp = - urBindlessImagesReleaseExternalMemoryExp; - pDdiTable->pfnImportExternalSemaphoreExp = - urBindlessImagesImportExternalSemaphoreExp; - pDdiTable->pfnReleaseExternalSemaphoreExp = - urBindlessImagesReleaseExternalSemaphoreExp; - pDdiTable->pfnWaitExternalSemaphoreExp = - urBindlessImagesWaitExternalSemaphoreExp; - pDdiTable->pfnSignalExternalSemaphoreExp = - urBindlessImagesSignalExternalSemaphoreExp; - return UR_RESULT_SUCCESS; + + pDdiTable->pfnGetInfo = ur::level_zero::urQueueGetInfo; + pDdiTable->pfnCreate = ur::level_zero::urQueueCreate; + pDdiTable->pfnRetain = ur::level_zero::urQueueRetain; + pDdiTable->pfnRelease = ur::level_zero::urQueueRelease; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urQueueGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urQueueCreateWithNativeHandle; + pDdiTable->pfnFinish = ur::level_zero::urQueueFinish; + pDdiTable->pfnFlush = ur::level_zero::urQueueFlush; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( - ur_api_version_t version, ur_usm_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( + ur_api_version_t version, ur_sampler_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnPitchedAllocExp = urUSMPitchedAllocExp; - pDdiTable->pfnImportExp = urUSMImportExp; - pDdiTable->pfnReleaseExp = urUSMReleaseExp; - return UR_RESULT_SUCCESS; + + pDdiTable->pfnCreate = ur::level_zero::urSamplerCreate; + pDdiTable->pfnRetain = ur::level_zero::urSamplerRetain; + pDdiTable->pfnRelease = ur::level_zero::urSamplerRelease; + pDdiTable->pfnGetInfo = ur::level_zero::urSamplerGetInfo; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urSamplerGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urSamplerCreateWithNativeHandle; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_virtual_mem_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL +urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnFree = urVirtualMemFree; - pDdiTable->pfnGetInfo = urVirtualMemGetInfo; - pDdiTable->pfnGranularityGetInfo = urVirtualMemGranularityGetInfo; - pDdiTable->pfnMap = urVirtualMemMap; - pDdiTable->pfnReserve = urVirtualMemReserve; - pDdiTable->pfnSetAccess = urVirtualMemSetAccess; - pDdiTable->pfnUnmap = urVirtualMemUnmap; - - return retVal; + pDdiTable->pfnHostAlloc = ur::level_zero::urUSMHostAlloc; + pDdiTable->pfnDeviceAlloc = ur::level_zero::urUSMDeviceAlloc; + pDdiTable->pfnSharedAlloc = ur::level_zero::urUSMSharedAlloc; + pDdiTable->pfnFree = ur::level_zero::urUSMFree; + pDdiTable->pfnGetMemAllocInfo = ur::level_zero::urUSMGetMemAllocInfo; + pDdiTable->pfnPoolCreate = ur::level_zero::urUSMPoolCreate; + pDdiTable->pfnPoolRetain = ur::level_zero::urUSMPoolRetain; + pDdiTable->pfnPoolRelease = ur::level_zero::urUSMPoolRelease; + pDdiTable->pfnPoolGetInfo = ur::level_zero::urUSMPoolGetInfo; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_physical_mem_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( + ur_api_version_t version, ur_usm_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urPhysicalMemCreate; - pDdiTable->pfnRelease = urPhysicalMemRelease; - pDdiTable->pfnRetain = urPhysicalMemRetain; + pDdiTable->pfnPitchedAllocExp = ur::level_zero::urUSMPitchedAllocExp; + pDdiTable->pfnImportExp = ur::level_zero::urUSMImportExp; + pDdiTable->pfnReleaseExp = ur::level_zero::urUSMReleaseExp; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( - ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( + ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnCooperativeKernelLaunchExp = - urEnqueueCooperativeKernelLaunchExp; - pDdiTable->pfnTimestampRecordingExp = urEnqueueTimestampRecordingExp; - pDdiTable->pfnNativeCommandExp = urEnqueueNativeCommandExp; + pDdiTable->pfnEnablePeerAccessExp = + ur::level_zero::urUsmP2PEnablePeerAccessExp; + pDdiTable->pfnDisablePeerAccessExp = + ur::level_zero::urUsmP2PDisablePeerAccessExp; + pDdiTable->pfnPeerAccessGetInfoExp = + ur::level_zero::urUsmP2PPeerAccessGetInfoExp; - return UR_RESULT_SUCCESS; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( - ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( + ur_api_version_t version, ur_virtual_mem_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = - urKernelSuggestMaxCooperativeGroupCountExp; + pDdiTable->pfnGranularityGetInfo = + ur::level_zero::urVirtualMemGranularityGetInfo; + pDdiTable->pfnReserve = ur::level_zero::urVirtualMemReserve; + pDdiTable->pfnFree = ur::level_zero::urVirtualMemFree; + pDdiTable->pfnMap = ur::level_zero::urVirtualMemMap; + pDdiTable->pfnUnmap = ur::level_zero::urVirtualMemUnmap; + pDdiTable->pfnSetAccess = ur::level_zero::urVirtualMemSetAccess; + pDdiTable->pfnGetInfo = ur::level_zero::urVirtualMemGetInfo; - return UR_RESULT_SUCCESS; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( - ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( + ur_api_version_t version, ur_device_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnBuildExp = urProgramBuildExp; - pDdiTable->pfnCompileExp = urProgramCompileExp; - pDdiTable->pfnLinkExp = urProgramLinkExp; - - return UR_RESULT_SUCCESS; + pDdiTable->pfnGet = ur::level_zero::urDeviceGet; + pDdiTable->pfnGetInfo = ur::level_zero::urDeviceGetInfo; + pDdiTable->pfnRetain = ur::level_zero::urDeviceRetain; + pDdiTable->pfnRelease = ur::level_zero::urDeviceRelease; + pDdiTable->pfnPartition = ur::level_zero::urDevicePartition; + pDdiTable->pfnSelectBinary = ur::level_zero::urDeviceSelectBinary; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urDeviceGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urDeviceCreateWithNativeHandle; + pDdiTable->pfnGetGlobalTimestamps = + ur::level_zero::urDeviceGetGlobalTimestamps; + + return result; } -#if defined(__cplusplus) + +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +} // namespace ur::level_zero +#elif defined(__cplusplus) } // extern "C" #endif + +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +namespace ur::level_zero { +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { + if (ddi == nullptr) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + ur_result_t result; + + result = ur::level_zero::urGetGlobalProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Global); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetBindlessImagesExpProcAddrTable( + UR_API_VERSION_CURRENT, &ddi->BindlessImagesExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetCommandBufferExpProcAddrTable( + UR_API_VERSION_CURRENT, &ddi->CommandBufferExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetContextProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Context); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetEnqueueProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Enqueue); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->EnqueueExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetEventProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Event); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Kernel); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->KernelExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = + ur::level_zero::urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Mem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetPhysicalMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->PhysicalMem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetPlatformProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Platform); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetProgramProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Program); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->ProgramExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetQueueProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Queue); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Sampler); + if (result != UR_RESULT_SUCCESS) + return result; + result = + ur::level_zero::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &ddi->USM); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetUSMExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->USMExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetUsmP2PExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->UsmP2PExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetVirtualMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->VirtualMem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetDeviceProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Device); + if (result != UR_RESULT_SUCCESS) + return result; + + return result; +} +} // namespace ur::level_zero +#endif diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp new file mode 100644 index 0000000000..f95625dd5b --- /dev/null +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -0,0 +1,706 @@ +//===--------- ur_interface_loader.hpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include +#include + +namespace ur::level_zero { +ur_result_t urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, + uint32_t *pNumAdapters); +ur_result_t urAdapterRelease(ur_adapter_handle_t hAdapter); +ur_result_t urAdapterRetain(ur_adapter_handle_t hAdapter); +ur_result_t urAdapterGetLastError(ur_adapter_handle_t hAdapter, + const char **ppMessage, int32_t *pError); +ur_result_t urAdapterGetInfo(ur_adapter_handle_t hAdapter, + ur_adapter_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urPlatformGet(ur_adapter_handle_t *phAdapters, uint32_t NumAdapters, + uint32_t NumEntries, + ur_platform_handle_t *phPlatforms, + uint32_t *pNumPlatforms); +ur_result_t urPlatformGetInfo(ur_platform_handle_t hPlatform, + ur_platform_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urPlatformGetApiVersion(ur_platform_handle_t hPlatform, + ur_api_version_t *pVersion); +ur_result_t urPlatformGetNativeHandle(ur_platform_handle_t hPlatform, + ur_native_handle_t *phNativePlatform); +ur_result_t urPlatformCreateWithNativeHandle( + ur_native_handle_t hNativePlatform, ur_adapter_handle_t hAdapter, + const ur_platform_native_properties_t *pProperties, + ur_platform_handle_t *phPlatform); +ur_result_t urPlatformGetBackendOption(ur_platform_handle_t hPlatform, + const char *pFrontendOption, + const char **ppPlatformOption); +ur_result_t urDeviceGet(ur_platform_handle_t hPlatform, + ur_device_type_t DeviceType, uint32_t NumEntries, + ur_device_handle_t *phDevices, uint32_t *pNumDevices); +ur_result_t urDeviceGetInfo(ur_device_handle_t hDevice, + ur_device_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urDeviceRetain(ur_device_handle_t hDevice); +ur_result_t urDeviceRelease(ur_device_handle_t hDevice); +ur_result_t +urDevicePartition(ur_device_handle_t hDevice, + const ur_device_partition_properties_t *pProperties, + uint32_t NumDevices, ur_device_handle_t *phSubDevices, + uint32_t *pNumDevicesRet); +ur_result_t urDeviceSelectBinary(ur_device_handle_t hDevice, + const ur_device_binary_t *pBinaries, + uint32_t NumBinaries, + uint32_t *pSelectedBinary); +ur_result_t urDeviceGetNativeHandle(ur_device_handle_t hDevice, + ur_native_handle_t *phNativeDevice); +ur_result_t +urDeviceCreateWithNativeHandle(ur_native_handle_t hNativeDevice, + ur_adapter_handle_t hAdapter, + const ur_device_native_properties_t *pProperties, + ur_device_handle_t *phDevice); +ur_result_t urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice, + uint64_t *pDeviceTimestamp, + uint64_t *pHostTimestamp); +ur_result_t urContextCreate(uint32_t DeviceCount, + const ur_device_handle_t *phDevices, + const ur_context_properties_t *pProperties, + ur_context_handle_t *phContext); +ur_result_t urContextRetain(ur_context_handle_t hContext); +ur_result_t urContextRelease(ur_context_handle_t hContext); +ur_result_t urContextGetInfo(ur_context_handle_t hContext, + ur_context_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urContextGetNativeHandle(ur_context_handle_t hContext, + ur_native_handle_t *phNativeContext); +ur_result_t urContextCreateWithNativeHandle( + ur_native_handle_t hNativeContext, ur_adapter_handle_t hAdapter, + uint32_t numDevices, const ur_device_handle_t *phDevices, + const ur_context_native_properties_t *pProperties, + ur_context_handle_t *phContext); +ur_result_t +urContextSetExtendedDeleter(ur_context_handle_t hContext, + ur_context_extended_deleter_t pfnDeleter, + void *pUserData); +ur_result_t urMemImageCreate(ur_context_handle_t hContext, ur_mem_flags_t flags, + const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, void *pHost, + ur_mem_handle_t *phMem); +ur_result_t urMemBufferCreate(ur_context_handle_t hContext, + ur_mem_flags_t flags, size_t size, + const ur_buffer_properties_t *pProperties, + ur_mem_handle_t *phBuffer); +ur_result_t urMemRetain(ur_mem_handle_t hMem); +ur_result_t urMemRelease(ur_mem_handle_t hMem); +ur_result_t urMemBufferPartition(ur_mem_handle_t hBuffer, ur_mem_flags_t flags, + ur_buffer_create_type_t bufferCreateType, + const ur_buffer_region_t *pRegion, + ur_mem_handle_t *phMem); +ur_result_t urMemGetNativeHandle(ur_mem_handle_t hMem, + ur_device_handle_t hDevice, + ur_native_handle_t *phNativeMem); +ur_result_t urMemBufferCreateWithNativeHandle( + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem); +ur_result_t urMemImageCreateWithNativeHandle( + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem); +ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urMemImageGetInfo(ur_mem_handle_t hMemory, ur_image_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urSamplerCreate(ur_context_handle_t hContext, + const ur_sampler_desc_t *pDesc, + ur_sampler_handle_t *phSampler); +ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler); +ur_result_t urSamplerRelease(ur_sampler_handle_t hSampler); +ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler, + ur_sampler_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urSamplerGetNativeHandle(ur_sampler_handle_t hSampler, + ur_native_handle_t *phNativeSampler); +ur_result_t urSamplerCreateWithNativeHandle( + ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, + const ur_sampler_native_properties_t *pProperties, + ur_sampler_handle_t *phSampler); +ur_result_t urUSMHostAlloc(ur_context_handle_t hContext, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); +ur_result_t urUSMDeviceAlloc(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); +ur_result_t urUSMSharedAlloc(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); +ur_result_t urUSMFree(ur_context_handle_t hContext, void *pMem); +ur_result_t urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, + ur_usm_alloc_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urUSMPoolCreate(ur_context_handle_t hContext, + ur_usm_pool_desc_t *pPoolDesc, + ur_usm_pool_handle_t *ppPool); +ur_result_t urUSMPoolRetain(ur_usm_pool_handle_t pPool); +ur_result_t urUSMPoolRelease(ur_usm_pool_handle_t pPool); +ur_result_t urUSMPoolGetInfo(ur_usm_pool_handle_t hPool, + ur_usm_pool_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urVirtualMemGranularityGetInfo( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_virtual_mem_granularity_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, + const void *pStart, size_t size, + void **ppStart); +ur_result_t urVirtualMemFree(ur_context_handle_t hContext, const void *pStart, + size_t size); +ur_result_t urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, + size_t size, ur_physical_mem_handle_t hPhysicalMem, + size_t offset, ur_virtual_mem_access_flags_t flags); +ur_result_t urVirtualMemUnmap(ur_context_handle_t hContext, const void *pStart, + size_t size); +ur_result_t urVirtualMemSetAccess(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_access_flags_t flags); +ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urPhysicalMemCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, size_t size, + const ur_physical_mem_properties_t *pProperties, + ur_physical_mem_handle_t *phPhysicalMem); +ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem); +ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem); +ur_result_t urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL, + size_t length, + const ur_program_properties_t *pProperties, + ur_program_handle_t *phProgram); +ur_result_t urProgramCreateWithBinary( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + const uint8_t *pBinary, const ur_program_properties_t *pProperties, + ur_program_handle_t *phProgram); +ur_result_t urProgramBuild(ur_context_handle_t hContext, + ur_program_handle_t hProgram, const char *pOptions); +ur_result_t urProgramCompile(ur_context_handle_t hContext, + ur_program_handle_t hProgram, + const char *pOptions); +ur_result_t urProgramLink(ur_context_handle_t hContext, uint32_t count, + const ur_program_handle_t *phPrograms, + const char *pOptions, ur_program_handle_t *phProgram); +ur_result_t urProgramRetain(ur_program_handle_t hProgram); +ur_result_t urProgramRelease(ur_program_handle_t hProgram); +ur_result_t urProgramGetFunctionPointer(ur_device_handle_t hDevice, + ur_program_handle_t hProgram, + const char *pFunctionName, + void **ppFunctionPointer); +ur_result_t urProgramGetGlobalVariablePointer( + ur_device_handle_t hDevice, ur_program_handle_t hProgram, + const char *pGlobalVariableName, size_t *pGlobalVariableSizeRet, + void **ppGlobalVariablePointerRet); +ur_result_t urProgramGetInfo(ur_program_handle_t hProgram, + ur_program_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urProgramGetBuildInfo(ur_program_handle_t hProgram, + ur_device_handle_t hDevice, + ur_program_build_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urProgramSetSpecializationConstants( + ur_program_handle_t hProgram, uint32_t count, + const ur_specialization_constant_info_t *pSpecConstants); +ur_result_t urProgramGetNativeHandle(ur_program_handle_t hProgram, + ur_native_handle_t *phNativeProgram); +ur_result_t urProgramCreateWithNativeHandle( + ur_native_handle_t hNativeProgram, ur_context_handle_t hContext, + const ur_program_native_properties_t *pProperties, + ur_program_handle_t *phProgram); +ur_result_t urKernelCreate(ur_program_handle_t hProgram, + const char *pKernelName, + ur_kernel_handle_t *phKernel); +ur_result_t urKernelSetArgValue( + ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, + const ur_kernel_arg_value_properties_t *pProperties, const void *pArgValue); +ur_result_t +urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, + size_t argSize, + const ur_kernel_arg_local_properties_t *pProperties); +ur_result_t urKernelGetInfo(ur_kernel_handle_t hKernel, + ur_kernel_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urKernelGetGroupInfo(ur_kernel_handle_t hKernel, + ur_device_handle_t hDevice, + ur_kernel_group_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, + ur_device_handle_t hDevice, + ur_kernel_sub_group_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urKernelRetain(ur_kernel_handle_t hKernel); +ur_result_t urKernelRelease(ur_kernel_handle_t hKernel); +ur_result_t +urKernelSetArgPointer(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_pointer_properties_t *pProperties, + const void *pArgValue); +ur_result_t +urKernelSetExecInfo(ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, + size_t propSize, + const ur_kernel_exec_info_properties_t *pProperties, + const void *pPropValue); +ur_result_t +urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_sampler_properties_t *pProperties, + ur_sampler_handle_t hArgValue); +ur_result_t +urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_mem_obj_properties_t *pProperties, + ur_mem_handle_t hArgValue); +ur_result_t urKernelSetSpecializationConstants( + ur_kernel_handle_t hKernel, uint32_t count, + const ur_specialization_constant_info_t *pSpecConstants); +ur_result_t urKernelGetNativeHandle(ur_kernel_handle_t hKernel, + ur_native_handle_t *phNativeKernel); +ur_result_t +urKernelCreateWithNativeHandle(ur_native_handle_t hNativeKernel, + ur_context_handle_t hContext, + ur_program_handle_t hProgram, + const ur_kernel_native_properties_t *pProperties, + ur_kernel_handle_t *phKernel); +ur_result_t urKernelGetSuggestedLocalWorkSize(ur_kernel_handle_t hKernel, + ur_queue_handle_t hQueue, + uint32_t numWorkDim, + const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, + size_t *pSuggestedLocalWorkSize); +ur_result_t urQueueGetInfo(ur_queue_handle_t hQueue, ur_queue_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urQueueCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_queue_properties_t *pProperties, + ur_queue_handle_t *phQueue); +ur_result_t urQueueRetain(ur_queue_handle_t hQueue); +ur_result_t urQueueRelease(ur_queue_handle_t hQueue); +ur_result_t urQueueGetNativeHandle(ur_queue_handle_t hQueue, + ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue); +ur_result_t urQueueCreateWithNativeHandle( + ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, + ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, + ur_queue_handle_t *phQueue); +ur_result_t urQueueFinish(ur_queue_handle_t hQueue); +ur_result_t urQueueFlush(ur_queue_handle_t hQueue); +ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urEventGetProfilingInfo(ur_event_handle_t hEvent, + ur_profiling_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urEventWait(uint32_t numEvents, + const ur_event_handle_t *phEventWaitList); +ur_result_t urEventRetain(ur_event_handle_t hEvent); +ur_result_t urEventRelease(ur_event_handle_t hEvent); +ur_result_t urEventGetNativeHandle(ur_event_handle_t hEvent, + ur_native_handle_t *phNativeEvent); +ur_result_t +urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, + ur_context_handle_t hContext, + const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent); +ur_result_t urEventSetCallback(ur_event_handle_t hEvent, + ur_execution_info_t execStatus, + ur_event_callback_t pfnNotify, void *pUserData); +ur_result_t urEnqueueKernelLaunch( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueEventsWait(ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueEventsWaitWithBarrier( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferRead(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferReadRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferWriteRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferCopy(ur_queue_handle_t hQueue, + ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferCopyRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferFill(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, + size_t offset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemImageRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemImageWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t +urEnqueueMemImageCopy(ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, + ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferMap(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, void **ppRetMap); +ur_result_t urEnqueueMemUnmap(ur_queue_handle_t hQueue, ur_mem_handle_t hMem, + void *pMappedPtr, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMFill(ur_queue_handle_t hQueue, void *pMem, + size_t patternSize, const void *pPattern, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMMemcpy(ur_queue_handle_t hQueue, bool blocking, + void *pDst, const void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMPrefetch(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMFill2D(ur_queue_handle_t hQueue, void *pMem, + size_t pitch, size_t patternSize, + const void *pPattern, size_t width, + size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMMemcpy2D(ur_queue_handle_t hQueue, bool blocking, + void *pDst, size_t dstPitch, const void *pSrc, + size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueDeviceGlobalVariableWrite( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingWrite, size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueDeviceGlobalVariableRead( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingRead, size_t count, size_t offset, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t widthInBytes, + size_t height, size_t elementSizeBytes, + void **ppMem, size_t *pResultPitch); +ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_native_handle_t hImage); +ur_result_t urBindlessImagesSampledImageHandleDestroyExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_native_handle_t hImage); +ur_result_t urBindlessImagesImageAllocateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_mem_native_handle_t *phImageMem); +ur_result_t +urBindlessImagesImageFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem); +ur_result_t urBindlessImagesUnsampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_native_handle_t *phImage); +ur_result_t urBindlessImagesSampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_sampler_handle_t hSampler, ur_exp_image_native_handle_t *phImage); +ur_result_t urBindlessImagesImageCopyExp( + ur_queue_handle_t hQueue, const void *pSrc, void *pDst, + const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, + const ur_image_format_t *pSrcImageFormat, + const ur_image_format_t *pDstImageFormat, + ur_exp_image_copy_region_t *pCopyRegion, + ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urBindlessImagesImageGetInfoExp( + ur_context_handle_t hContext, ur_exp_image_mem_native_handle_t hImageMem, + ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet); +ur_result_t urBindlessImagesMipmapGetLevelExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel, + ur_exp_image_mem_native_handle_t *phImageMem); +ur_result_t +urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hMem); +ur_result_t urBindlessImagesImportExternalMemoryExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + ur_exp_external_mem_type_t memHandleType, + ur_exp_external_mem_desc_t *pExternalMemDesc, + ur_exp_external_mem_handle_t *phExternalMem); +ur_result_t urBindlessImagesMapExternalArrayExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_external_mem_handle_t hExternalMem, + ur_exp_image_mem_native_handle_t *phImageMem); +ur_result_t urBindlessImagesMapExternalLinearMemoryExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, uint64_t offset, + uint64_t size, ur_exp_external_mem_handle_t hExternalMem, void **ppRetMem); +ur_result_t urBindlessImagesReleaseExternalMemoryExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_external_mem_handle_t hExternalMem); +ur_result_t urBindlessImagesImportExternalSemaphoreExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_external_semaphore_type_t semHandleType, + ur_exp_external_semaphore_desc_t *pExternalSemaphoreDesc, + ur_exp_external_semaphore_handle_t *phExternalSemaphore); +ur_result_t urBindlessImagesReleaseExternalSemaphoreExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_external_semaphore_handle_t hExternalSemaphore); +ur_result_t urBindlessImagesWaitExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, + bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urBindlessImagesSignalExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, + bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t +urCommandBufferCreateExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + ur_exp_command_buffer_handle_t *phCommandBuffer); +ur_result_t +urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer); +ur_result_t +urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer); +ur_result_t +urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer); +ur_result_t urCommandBufferAppendKernelLaunchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, + uint32_t workDim, const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *phCommand); +ur_result_t urCommandBufferAppendUSMMemcpyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, + size_t size, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory, + const void *pPattern, size_t patternSize, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferCopyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferWriteExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + size_t offset, size_t size, const void *pSrc, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferReadExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferCopyRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferWriteRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferReadRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, size_t offset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, + size_t size, ur_usm_migration_flags_t flags, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, + size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferEnqueueExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t hCommand); +ur_result_t urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t hCommand); +ur_result_t urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch); +ur_result_t +urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t hCommand, + ur_exp_command_buffer_command_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, size_t localWorkSize, + size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet); +ur_result_t urEnqueueTimestampRecordingExp( + ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueKernelLaunchCustomExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urProgramBuildExp(ur_program_handle_t hProgram, uint32_t numDevices, + ur_device_handle_t *phDevices, + const char *pOptions); +ur_result_t urProgramCompileExp(ur_program_handle_t hProgram, + uint32_t numDevices, + ur_device_handle_t *phDevices, + const char *pOptions); +ur_result_t urProgramLinkExp(ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, uint32_t count, + const ur_program_handle_t *phPrograms, + const char *pOptions, + ur_program_handle_t *phProgram); +ur_result_t urUSMImportExp(ur_context_handle_t hContext, void *pMem, + size_t size); +ur_result_t urUSMReleaseExp(ur_context_handle_t hContext, void *pMem); +ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice); +ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice); +ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice, + ur_exp_peer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urEnqueueNativeCommandExp( + ur_queue_handle_t hQueue, + ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, + uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, + const ur_exp_enqueue_native_command_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); +#endif +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/ur_level_zero.hpp b/source/adapters/level_zero/ur_level_zero.hpp index 096ae076f9..36965c5d58 100644 --- a/source/adapters/level_zero/ur_level_zero.hpp +++ b/source/adapters/level_zero/ur_level_zero.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 1069ec78da..5296391794 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -17,6 +17,7 @@ #include "usm.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" #include "ur_util.hpp" @@ -296,7 +297,9 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( +namespace ur::level_zero { + +ur_result_t urUSMHostAlloc( ur_context_handle_t Context, ///< [in] handle of the context object const ur_usm_desc_t *USMDesc, ///< [in][optional] USM memory allocation descriptor @@ -335,7 +338,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( // We are going to defer memory release if there are kernels with indirect // access, that is why explicitly retain context to be sure that it is // released after all memory allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } else { ContextLock.lock(); } @@ -368,7 +371,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( +ur_result_t urUSMDeviceAlloc( ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, ///< [in] handle of the device object const ur_usm_desc_t @@ -410,7 +413,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( // We are going to defer memory release if there are kernels with indirect // access, that is why explicitly retain context to be sure that it is // released after all memory allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } else { ContextLock.lock(); } @@ -448,7 +451,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( +ur_result_t urUSMSharedAlloc( ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, ///< [in] handle of the device object const ur_usm_desc_t @@ -513,7 +516,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( // We are going to defer memory release if there are kernels with indirect // access, that is why explicitly retain context to be sure that it is // released after all memory allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } umf_memory_pool_handle_t hPoolInternal = nullptr; @@ -555,9 +558,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( - ur_context_handle_t Context, ///< [in] handle of the context object - void *Mem ///< [in] pointer to USM memory object +ur_result_t +urUSMFree(ur_context_handle_t Context, ///< [in] handle of the context object + void *Mem ///< [in] pointer to USM memory object ) { ur_platform_handle_t Plt = Context->getPlatform(); @@ -567,7 +570,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( return USMFreeHelper(Context, Mem); } -UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( +ur_result_t urUSMGetMemAllocInfo( ur_context_handle_t Context, ///< [in] handle of the context object const void *Ptr, ///< [in] pointer to USM memory object ur_usm_alloc_info_t @@ -667,6 +670,103 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( return UR_RESULT_SUCCESS; } +ur_result_t urUSMPoolCreate( + ur_context_handle_t Context, ///< [in] handle of the context object + ur_usm_pool_desc_t + *PoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with + ///< ::ur_usm_pool_limits_desc_t + ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool +) { + + try { + *Pool = reinterpret_cast( + new ur_usm_pool_handle_t_(Context, PoolDesc)); + + std::shared_lock ContextLock(Context->Mutex); + Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool); + + } catch (const UsmAllocationException &Ex) { + return Ex.getError(); + } + return UR_RESULT_SUCCESS; +} + +ur_result_t +urUSMPoolRetain(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool +) { + Pool->RefCount.increment(); + return UR_RESULT_SUCCESS; +} + +ur_result_t +urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool +) { + if (Pool->RefCount.decrementAndTest()) { + std::shared_lock ContextLock(Pool->Context->Mutex); + Pool->Context->UsmPoolHandles.remove(Pool); + delete Pool; + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urUSMPoolGetInfo( + ur_usm_pool_handle_t Pool, ///< [in] handle of the USM memory pool + ur_usm_pool_info_t PropName, ///< [in] name of the pool property to query + size_t PropSize, ///< [in] size in bytes of the pool property value provided + void *PropValue, ///< [out][typename(propName, propSize)] value of the pool + ///< property + size_t *PropSizeRet ///< [out] size in bytes returned in pool property value +) { + UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); + + switch (PropName) { + case UR_USM_POOL_INFO_REFERENCE_COUNT: { + return ReturnValue(Pool->RefCount.load()); + } + case UR_USM_POOL_INFO_CONTEXT: { + return ReturnValue(Pool->Context); + } + default: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + } +} + +ur_result_t urUSMImportExp(ur_context_handle_t Context, void *HostPtr, + size_t Size) { + UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); + + // Promote the host ptr to USM host memory. + if (ZeUSMImport.Supported && HostPtr != nullptr) { + // Query memory type of the host pointer + ze_device_handle_t ZeDeviceHandle; + ZeStruct ZeMemoryAllocationProperties; + ZE2UR_CALL(zeMemGetAllocProperties, + (Context->ZeContext, HostPtr, &ZeMemoryAllocationProperties, + &ZeDeviceHandle)); + + // If not shared of any type, we can import the ptr + if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) { + // Promote the host ptr to USM host memory + ze_driver_handle_t driverHandle = + Context->getPlatform()->ZeDriverHandleExpTranslated; + ZeUSMImport.doZeUSMImport(driverHandle, HostPtr, Size); + } + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urUSMReleaseExp(ur_context_handle_t Context, void *HostPtr) { + UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); + + // Release the imported memory. + if (ZeUSMImport.Supported && HostPtr != nullptr) + ZeUSMImport.doZeUSMRelease( + Context->getPlatform()->ZeDriverHandleExpTranslated, HostPtr); + return UR_RESULT_SUCCESS; +} +} // namespace ur::level_zero + static ur_result_t USMFreeImpl(ur_context_handle_t Context, void *Ptr) { auto ZeResult = ZE_CALL_NOCHECK(zeMemFree, (Context->ZeContext, Ptr)); // Handle When the driver is already released @@ -972,68 +1072,6 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, } } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( - ur_context_handle_t Context, ///< [in] handle of the context object - ur_usm_pool_desc_t - *PoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with - ///< ::ur_usm_pool_limits_desc_t - ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool -) { - - try { - *Pool = reinterpret_cast( - new ur_usm_pool_handle_t_(Context, PoolDesc)); - - std::shared_lock ContextLock(Context->Mutex); - Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool); - - } catch (const UsmAllocationException &Ex) { - return Ex.getError(); - } - return UR_RESULT_SUCCESS; -} - -ur_result_t -urUSMPoolRetain(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool -) { - Pool->RefCount.increment(); - return UR_RESULT_SUCCESS; -} - -ur_result_t -urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool -) { - if (Pool->RefCount.decrementAndTest()) { - std::shared_lock ContextLock(Pool->Context->Mutex); - Pool->Context->UsmPoolHandles.remove(Pool); - delete Pool; - } - return UR_RESULT_SUCCESS; -} - -ur_result_t urUSMPoolGetInfo( - ur_usm_pool_handle_t Pool, ///< [in] handle of the USM memory pool - ur_usm_pool_info_t PropName, ///< [in] name of the pool property to query - size_t PropSize, ///< [in] size in bytes of the pool property value provided - void *PropValue, ///< [out][typename(propName, propSize)] value of the pool - ///< property - size_t *PropSizeRet ///< [out] size in bytes returned in pool property value -) { - UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); - - switch (PropName) { - case UR_USM_POOL_INFO_REFERENCE_COUNT: { - return ReturnValue(Pool->RefCount.load()); - } - case UR_USM_POOL_INFO_CONTEXT: { - return ReturnValue(Pool->Context); - } - default: { - return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; - } - } -} - // If indirect access tracking is not enabled then this functions just performs // zeMemFree. If indirect access tracking is enabled then reference counting is // performed. @@ -1116,38 +1154,3 @@ ur_result_t USMFreeHelper(ur_context_handle_t Context, void *Ptr, UR_CALL(ContextReleaseHelper(Context)); return umf2urResult(umfRet); } - -UR_APIEXPORT ur_result_t UR_APICALL urUSMImportExp(ur_context_handle_t Context, - void *HostPtr, size_t Size) { - UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); - - // Promote the host ptr to USM host memory. - if (ZeUSMImport.Supported && HostPtr != nullptr) { - // Query memory type of the host pointer - ze_device_handle_t ZeDeviceHandle; - ZeStruct ZeMemoryAllocationProperties; - ZE2UR_CALL(zeMemGetAllocProperties, - (Context->ZeContext, HostPtr, &ZeMemoryAllocationProperties, - &ZeDeviceHandle)); - - // If not shared of any type, we can import the ptr - if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) { - // Promote the host ptr to USM host memory - ze_driver_handle_t driverHandle = - Context->getPlatform()->ZeDriverHandleExpTranslated; - ZeUSMImport.doZeUSMImport(driverHandle, HostPtr, Size); - } - } - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t Context, - void *HostPtr) { - UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); - - // Release the imported memory. - if (ZeUSMImport.Supported && HostPtr != nullptr) - ZeUSMImport.doZeUSMRelease( - Context->getPlatform()->ZeDriverHandleExpTranslated, HostPtr); - return UR_RESULT_SUCCESS; -} diff --git a/source/adapters/level_zero/usm_p2p.cpp b/source/adapters/level_zero/usm_p2p.cpp index 2b81828423..6e701aa803 100644 --- a/source/adapters/level_zero/usm_p2p.cpp +++ b/source/adapters/level_zero/usm_p2p.cpp @@ -11,8 +11,10 @@ #include "logger/ur_logger.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +namespace ur::level_zero { + +ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { std::ignore = commandDevice; std::ignore = peerDevice; @@ -21,8 +23,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { std::ignore = commandDevice; std::ignore = peerDevice; @@ -31,10 +33,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice, - ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice, + ur_exp_peer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); @@ -69,3 +72,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return ReturnValue(propertyValue); } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/api.cpp b/source/adapters/level_zero/v2/api.cpp index dc52874364..cd25f838fe 100644 --- a/source/adapters/level_zero/v2/api.cpp +++ b/source/adapters/level_zero/v2/api.cpp @@ -17,13 +17,14 @@ std::mutex ZeCall::GlobalLock; -ur_result_t UR_APICALL urContextGetNativeHandle( - ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { +namespace ur::level_zero { +ur_result_t urContextGetNativeHandle(ur_context_handle_t hContext, + ur_native_handle_t *phNativeContext) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urContextCreateWithNativeHandle( +ur_result_t urContextCreateWithNativeHandle( ur_native_handle_t hNativeContext, ur_adapter_handle_t hAdapter, uint32_t numDevices, const ur_device_handle_t *phDevices, const ur_context_native_properties_t *pProperties, @@ -32,62 +33,30 @@ ur_result_t UR_APICALL urContextCreateWithNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urContextSetExtendedDeleter( - ur_context_handle_t hContext, ur_context_extended_deleter_t pfnDeleter, - void *pUserData) { +ur_result_t +urContextSetExtendedDeleter(ur_context_handle_t hContext, + ur_context_extended_deleter_t pfnDeleter, + void *pUserData) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemImageCreate(ur_context_handle_t hContext, - ur_mem_flags_t flags, - const ur_image_format_t *pImageFormat, - const ur_image_desc_t *pImageDesc, - void *pHost, ur_mem_handle_t *phMem) { +ur_result_t urMemImageCreate(ur_context_handle_t hContext, ur_mem_flags_t flags, + const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, void *pHost, + ur_mem_handle_t *phMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemBufferCreate( - ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, - const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { +ur_result_t urMemGetNativeHandle(ur_mem_handle_t hMem, + ur_device_handle_t hDevice, + ur_native_handle_t *phNativeMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t UR_APICALL urMemBufferPartition( - ur_mem_handle_t hBuffer, ur_mem_flags_t flags, - ur_buffer_create_type_t bufferCreateType, const ur_buffer_region_t *pRegion, - ur_mem_handle_t *phMem) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t UR_APICALL urMemGetNativeHandle(ur_mem_handle_t hMem, - ur_device_handle_t hDevice, - ur_native_handle_t *phNativeMem) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( - ur_native_handle_t hNativeMem, ur_context_handle_t hContext, - const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( +ur_result_t urMemImageCreateWithNativeHandle( ur_native_handle_t hNativeMem, ur_context_handle_t hContext, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { @@ -95,53 +64,51 @@ ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, - ur_mem_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { +ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, - ur_image_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urMemImageGetInfo(ur_mem_handle_t hMemory, ur_image_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerCreate(ur_context_handle_t hContext, - const ur_sampler_desc_t *pDesc, - ur_sampler_handle_t *phSampler) { +ur_result_t urSamplerCreate(ur_context_handle_t hContext, + const ur_sampler_desc_t *pDesc, + ur_sampler_handle_t *phSampler) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerRetain(ur_sampler_handle_t hSampler) { +ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerRelease(ur_sampler_handle_t hSampler) { +ur_result_t urSamplerRelease(ur_sampler_handle_t hSampler) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerGetInfo(ur_sampler_handle_t hSampler, - ur_sampler_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler, + ur_sampler_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerGetNativeHandle( - ur_sampler_handle_t hSampler, ur_native_handle_t *phNativeSampler) { +ur_result_t urSamplerGetNativeHandle(ur_sampler_handle_t hSampler, + ur_native_handle_t *phNativeSampler) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( +ur_result_t urSamplerCreateWithNativeHandle( ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, const ur_sampler_native_properties_t *pProperties, ur_sampler_handle_t *phSampler) { @@ -149,7 +116,7 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( +ur_result_t urVirtualMemGranularityGetInfo( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_virtual_mem_granularity_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { @@ -157,111 +124,74 @@ ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemReserve(ur_context_handle_t hContext, - const void *pStart, size_t size, - void **ppStart) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t UR_APICALL urVirtualMemFree(ur_context_handle_t hContext, - const void *pStart, size_t size) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t UR_APICALL urVirtualMemMap(ur_context_handle_t hContext, - const void *pStart, size_t size, - ur_physical_mem_handle_t hPhysicalMem, - size_t offset, - ur_virtual_mem_access_flags_t flags) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t UR_APICALL urVirtualMemUnmap(ur_context_handle_t hContext, - const void *pStart, size_t size) { +ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, + const void *pStart, size_t size, + void **ppStart) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL -urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, - size_t size, ur_virtual_mem_access_flags_t flags) { +ur_result_t urVirtualMemFree(ur_context_handle_t hContext, const void *pStart, + size_t size) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemGetInfo(ur_context_handle_t hContext, - const void *pStart, size_t size, - ur_virtual_mem_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, + size_t size, ur_physical_mem_handle_t hPhysicalMem, + size_t offset, + ur_virtual_mem_access_flags_t flags) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urPhysicalMemCreate( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const ur_physical_mem_properties_t *pProperties, - ur_physical_mem_handle_t *phPhysicalMem) { +ur_result_t urVirtualMemUnmap(ur_context_handle_t hContext, const void *pStart, + size_t size) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL -urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urVirtualMemSetAccess(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_access_flags_t flags) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL -urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelSetArgLocal( - ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, - const ur_kernel_arg_local_properties_t *pProperties) { +ur_result_t urPhysicalMemCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, size_t size, + const ur_physical_mem_properties_t *pProperties, + ur_physical_mem_handle_t *phPhysicalMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, - ur_kernel_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelGetGroupInfo(ur_kernel_handle_t hKernel, - ur_device_handle_t hDevice, - ur_kernel_group_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL -urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, - ur_kernel_sub_group_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { +ur_result_t urKernelGetInfo(ur_kernel_handle_t hKernel, + ur_kernel_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelSetExecInfo( - ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, size_t propSize, - const ur_kernel_exec_info_properties_t *pProperties, - const void *pPropValue) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t UR_APICALL +ur_result_t urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_sampler_properties_t *pProperties, ur_sampler_handle_t hArgValue) { @@ -269,112 +199,94 @@ urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex, return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL -urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, - const ur_kernel_arg_mem_obj_properties_t *pProperties, - ur_mem_handle_t hArgValue) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t UR_APICALL urKernelSetSpecializationConstants( +ur_result_t urKernelSetSpecializationConstants( ur_kernel_handle_t hKernel, uint32_t count, const ur_specialization_constant_info_t *pSpecConstants) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelGetNativeHandle( - ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) { +ur_result_t urKernelGetNativeHandle(ur_kernel_handle_t hKernel, + ur_native_handle_t *phNativeKernel) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelCreateWithNativeHandle( - ur_native_handle_t hNativeKernel, ur_context_handle_t hContext, - ur_program_handle_t hProgram, - const ur_kernel_native_properties_t *pProperties, - ur_kernel_handle_t *phKernel) { +ur_result_t +urKernelCreateWithNativeHandle(ur_native_handle_t hNativeKernel, + ur_context_handle_t hContext, + ur_program_handle_t hProgram, + const ur_kernel_native_properties_t *pProperties, + ur_kernel_handle_t *phKernel) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( - ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t numWorkDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - size_t *pSuggestedLocalWorkSize) { +ur_result_t urKernelGetSuggestedLocalWorkSize(ur_kernel_handle_t hKernel, + ur_queue_handle_t hQueue, + uint32_t numWorkDim, + const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, + size_t *pSuggestedLocalWorkSize) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, - ur_event_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { +ur_result_t urEventGetProfilingInfo(ur_event_handle_t hEvent, + ur_profiling_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventGetProfilingInfo(ur_event_handle_t hEvent, - ur_profiling_info_t propName, - size_t propSize, - void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urEventGetNativeHandle(ur_event_handle_t hEvent, + ur_native_handle_t *phNativeEvent) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventWait(uint32_t numEvents, - const ur_event_handle_t *phEventWaitList) { +ur_result_t +urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, + ur_context_handle_t hContext, + const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventGetNativeHandle( - ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) { +ur_result_t urEventSetCallback(ur_event_handle_t hEvent, + ur_execution_info_t execStatus, + ur_event_callback_t pfnNotify, void *pUserData) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventCreateWithNativeHandle( - ur_native_handle_t hNativeEvent, ur_context_handle_t hContext, - const ur_event_native_properties_t *pProperties, - ur_event_handle_t *phEvent) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t UR_APICALL urEventSetCallback(ur_event_handle_t hEvent, - ur_execution_info_t execStatus, - ur_event_callback_t pfnNotify, - void *pUserData) { +ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t widthInBytes, + size_t height, size_t elementSizeBytes, + void **ppMem, size_t *pResultPitch) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUSMPitchedAllocExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, - size_t widthInBytes, size_t height, size_t elementSizeBytes, void **ppMem, - size_t *pResultPitch) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t UR_APICALL urBindlessImagesUnsampledImageHandleDestroyExp( +ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_native_handle_t hImage) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesSampledImageHandleDestroyExp( +ur_result_t urBindlessImagesSampledImageHandleDestroyExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_native_handle_t hImage) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( +ur_result_t urBindlessImagesImageAllocateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_image_mem_native_handle_t *phImageMem) { @@ -382,14 +294,15 @@ ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImageFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hImageMem) { +ur_result_t +urBindlessImagesImageFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( +ur_result_t urBindlessImagesUnsampledImageCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -398,7 +311,7 @@ ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( +ur_result_t urBindlessImagesSampledImageCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -407,14 +320,14 @@ ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( +ur_result_t urBindlessImagesImageGetInfoExp( ur_context_handle_t hContext, ur_exp_image_mem_native_handle_t hImageMem, ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( +ur_result_t urBindlessImagesMipmapGetLevelExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel, ur_exp_image_mem_native_handle_t *phImageMem) { @@ -422,14 +335,15 @@ ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hMem) { +ur_result_t +urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( +ur_result_t urBindlessImagesImportExternalMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, ur_exp_external_mem_type_t memHandleType, ur_exp_external_mem_desc_t *pExternalMemDesc, @@ -438,7 +352,7 @@ ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( +ur_result_t urBindlessImagesMapExternalArrayExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_external_mem_handle_t hExternalMem, @@ -447,21 +361,21 @@ ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp( +ur_result_t urBindlessImagesMapExternalLinearMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, uint64_t offset, uint64_t size, ur_exp_external_mem_handle_t hExternalMem, void **ppRetMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( +ur_result_t urBindlessImagesReleaseExternalMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_mem_handle_t hExternalMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( +ur_result_t urBindlessImagesImportExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_semaphore_type_t semHandleType, ur_exp_external_semaphore_desc_t *pExternalSemaphoreDesc, @@ -470,40 +384,41 @@ ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp( +ur_result_t urBindlessImagesReleaseExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_semaphore_handle_t hExternalSemaphore) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_exp_command_buffer_desc_t *pCommandBufferDesc, - ur_exp_command_buffer_handle_t *phCommandBuffer) { +ur_result_t +urCommandBufferCreateExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + ur_exp_command_buffer_handle_t *phCommandBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL +ur_result_t urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL +ur_result_t urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL +ur_result_t urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( +ur_result_t urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, @@ -515,7 +430,7 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( +ur_result_t urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, @@ -524,7 +439,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( +ur_result_t urCommandBufferAppendUSMFillExp( ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory, const void *pPattern, size_t patternSize, size_t size, uint32_t numSyncPointsInWaitList, @@ -534,7 +449,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( +ur_result_t urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numSyncPointsInWaitList, @@ -544,7 +459,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( +ur_result_t urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, size_t offset, size_t size, const void *pSrc, uint32_t numSyncPointsInWaitList, @@ -554,7 +469,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( +ur_result_t urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, @@ -563,7 +478,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( +ur_result_t urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, @@ -575,7 +490,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( +ur_result_t urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -587,7 +502,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( +ur_result_t urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -599,7 +514,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( +ur_result_t urCommandBufferAppendMemBufferFillExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, const void *pPattern, size_t patternSize, size_t offset, size_t size, uint32_t numSyncPointsInWaitList, @@ -609,7 +524,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( +ur_result_t urCommandBufferAppendUSMPrefetchExp( ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, size_t size, ur_usm_migration_flags_t flags, uint32_t numSyncPointsInWaitList, @@ -619,7 +534,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( +ur_result_t urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, @@ -628,7 +543,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferEnqueueExp( +ur_result_t urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -636,19 +551,19 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferRetainCommandExp( +ur_result_t urCommandBufferRetainCommandExp( ur_exp_command_buffer_command_handle_t hCommand) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( +ur_result_t urCommandBufferReleaseCommandExp( ur_exp_command_buffer_command_handle_t hCommand) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( +ur_result_t urCommandBufferUpdateKernelLaunchExp( ur_exp_command_buffer_command_handle_t hCommand, const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch) { @@ -656,15 +571,16 @@ ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferGetInfoExp( - ur_exp_command_buffer_handle_t hCommandBuffer, - ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t +urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( +ur_result_t urCommandBufferCommandGetInfoExp( ur_exp_command_buffer_command_handle_t hCommand, ur_exp_command_buffer_command_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { @@ -672,41 +588,42 @@ ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( +ur_result_t urKernelSuggestMaxCooperativeGroupCountExp( ur_kernel_handle_t hKernel, size_t localWorkSize, size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUSMImportExp(ur_context_handle_t hContext, void *pMem, - size_t size) { +ur_result_t urUSMImportExp(ur_context_handle_t hContext, void *pMem, + size_t size) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t hContext, - void *pMem) { +ur_result_t urUSMReleaseExp(ur_context_handle_t hContext, void *pMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice, - ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice, + ur_exp_peer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/command_list_cache.cpp b/source/adapters/level_zero/v2/command_list_cache.cpp index eee6555f87..651cb5944a 100644 --- a/source/adapters/level_zero/v2/command_list_cache.cpp +++ b/source/adapters/level_zero/v2/command_list_cache.cpp @@ -43,7 +43,7 @@ inline size_t command_list_descriptor_hash_t::operator()( command_list_cache_t::command_list_cache_t(ze_context_handle_t ZeContext) : ZeContext{ZeContext} {} -raii::ze_command_list_t +raii::ze_command_list_handle_t command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) { if (auto ImmCmdDesc = std::get_if(&desc)) { @@ -61,7 +61,7 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) { ZE2UR_CALL_THROWS( zeCommandListCreateImmediate, (ZeContext, ImmCmdDesc->ZeDevice, &QueueDesc, &ZeCommandList)); - return raii::ze_command_list_t(ZeCommandList, &zeCommandListDestroy); + return raii::ze_command_list_handle_t(ZeCommandList); } else { auto RegCmdDesc = std::get(desc); ZeStruct CmdListDesc; @@ -72,7 +72,7 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) { ze_command_list_handle_t ZeCommandList; ZE2UR_CALL_THROWS(zeCommandListCreate, (ZeContext, RegCmdDesc.ZeDevice, &CmdListDesc, &ZeCommandList)); - return raii::ze_command_list_t(ZeCommandList, &zeCommandListDestroy); + return raii::ze_command_list_handle_t(ZeCommandList); } } @@ -94,8 +94,7 @@ command_list_cache_t::getImmediateCommandList( auto CommandList = getCommandList(Desc).release(); return raii::cache_borrowed_command_list_t( CommandList, [Cache = this, Desc](ze_command_list_handle_t CmdList) { - Cache->addCommandList( - Desc, raii::ze_command_list_t(CmdList, &zeCommandListDestroy)); + Cache->addCommandList(Desc, raii::ze_command_list_handle_t(CmdList)); }); } @@ -113,12 +112,11 @@ command_list_cache_t::getRegularCommandList(ze_device_handle_t ZeDevice, return raii::cache_borrowed_command_list_t( CommandList, [Cache = this, Desc](ze_command_list_handle_t CmdList) { - Cache->addCommandList( - Desc, raii::ze_command_list_t(CmdList, &zeCommandListDestroy)); + Cache->addCommandList(Desc, raii::ze_command_list_handle_t(CmdList)); }); } -raii::ze_command_list_t +raii::ze_command_list_handle_t command_list_cache_t::getCommandList(const command_list_descriptor_t &desc) { std::unique_lock Lock(ZeCommandListCacheMutex); auto it = ZeCommandListCache.find(desc); @@ -129,7 +127,8 @@ command_list_cache_t::getCommandList(const command_list_descriptor_t &desc) { assert(!it->second.empty()); - raii::ze_command_list_t CommandListHandle = std::move(it->second.top()); + raii::ze_command_list_handle_t CommandListHandle = + std::move(it->second.top()); it->second.pop(); if (it->second.empty()) @@ -138,8 +137,9 @@ command_list_cache_t::getCommandList(const command_list_descriptor_t &desc) { return CommandListHandle; } -void command_list_cache_t::addCommandList(const command_list_descriptor_t &desc, - raii::ze_command_list_t cmdList) { +void command_list_cache_t::addCommandList( + const command_list_descriptor_t &desc, + raii::ze_command_list_handle_t cmdList) { // TODO: add a limit? std::unique_lock Lock(ZeCommandListCacheMutex); auto [it, _] = ZeCommandListCache.try_emplace(desc); diff --git a/source/adapters/level_zero/v2/command_list_cache.hpp b/source/adapters/level_zero/v2/command_list_cache.hpp index 1850a4334c..9884e16dc4 100644 --- a/source/adapters/level_zero/v2/command_list_cache.hpp +++ b/source/adapters/level_zero/v2/command_list_cache.hpp @@ -14,18 +14,16 @@ #include "latency_tracker.hpp" #include -#include +#include #include -#include "../common.hpp" +#include "common.hpp" namespace v2 { namespace raii { -using ze_command_list_t = std::unique_ptr<::_ze_command_list_handle_t, - decltype(&zeCommandListDestroy)>; using cache_borrowed_command_list_t = std::unique_ptr<::_ze_command_list_handle_t, - std::function>; + std::function>; } // namespace raii struct immediate_command_list_descriptor_t { @@ -72,15 +70,16 @@ struct command_list_cache_t { private: ze_context_handle_t ZeContext; std::unordered_map, + std::stack, command_list_descriptor_hash_t> ZeCommandListCache; ur_mutex ZeCommandListCacheMutex; - raii::ze_command_list_t getCommandList(const command_list_descriptor_t &desc); + raii::ze_command_list_handle_t + getCommandList(const command_list_descriptor_t &desc); void addCommandList(const command_list_descriptor_t &desc, - raii::ze_command_list_t cmdList); - raii::ze_command_list_t + raii::ze_command_list_handle_t cmdList); + raii::ze_command_list_handle_t createCommandList(const command_list_descriptor_t &desc); }; } // namespace v2 diff --git a/source/adapters/level_zero/v2/common.hpp b/source/adapters/level_zero/v2/common.hpp index fdfed0c661..4fb851bad8 100644 --- a/source/adapters/level_zero/v2/common.hpp +++ b/source/adapters/level_zero/v2/common.hpp @@ -54,6 +54,8 @@ struct ze_handle_wrapper { try { reset(); } catch (...) { + // TODO: add appropriate logging or pass the error + // to the caller (make the dtor noexcept(false) or use tls?) } } @@ -85,70 +87,6 @@ struct ze_handle_wrapper { bool ownZeHandle; }; -template -struct ur_shared_handle { - using handle_t = URHandle; - - ur_shared_handle() : handle(nullptr) {} - explicit ur_shared_handle(handle_t handle) : handle(handle) {} - ~ur_shared_handle() { - try { - reset(); - } catch (...) { - } - } - - ur_shared_handle(const ur_shared_handle &other) : handle(other.handle) { - retain(handle); - } - ur_shared_handle(ur_shared_handle &&other) : handle(other.handle) { - other.handle = nullptr; - } - ur_shared_handle(std::nullptr_t) : handle(nullptr) {} - - void reset() { - if (!handle) { - return; - } - - UR_CALL_THROWS(release(handle)); - handle = nullptr; - } - - ur_shared_handle &operator=(const ur_shared_handle &other) { - if (handle) { - release(handle); - } - handle = other.handle; - retain(handle); - return *this; - } - ur_shared_handle &operator=(ur_shared_handle &&other) { - if (handle) { - release(handle); - } - handle = other.handle; - other.handle = nullptr; - return *this; - } - ur_shared_handle &operator=(std::nullptr_t) { - if (handle) { - release(handle); - } - new (this) ur_shared_handle(nullptr); - return *this; - } - - handle_t *ptr() { return &handle; } - handle_t get() const { return handle; } - handle_t operator->() { return handle; } - operator handle_t() { return handle; } - -private: - handle_t handle; -}; - using ze_kernel_handle_t = ze_handle_wrapper<::ze_kernel_handle_t, zeKernelDestroy>; @@ -158,11 +96,11 @@ using ze_event_handle_t = using ze_event_pool_handle_t = ze_handle_wrapper<::ze_event_pool_handle_t, zeEventPoolDestroy>; -using ur_queue_shared_handle_t = - ur_shared_handle; +using ze_context_handle_t = + ze_handle_wrapper<::ze_context_handle_t, zeContextDestroy>; -using ur_kernel_shared_handle_t = - ur_shared_handle; +using ze_command_list_handle_t = + ze_handle_wrapper<::ze_command_list_handle_t, zeCommandListDestroy>; } // namespace raii } // namespace v2 diff --git a/source/adapters/level_zero/v2/context.cpp b/source/adapters/level_zero/v2/context.cpp index 08032fe85e..abb8a13538 100644 --- a/source/adapters/level_zero/v2/context.cpp +++ b/source/adapters/level_zero/v2/context.cpp @@ -17,8 +17,8 @@ ur_context_handle_t_::ur_context_handle_t_(ze_context_handle_t hContext, uint32_t numDevices, const ur_device_handle_t *phDevices, bool ownZeContext) - : hContext(hContext), hDevices(phDevices, phDevices + numDevices), - commandListCache(hContext), + : hContext(hContext, ownZeContext), + hDevices(phDevices, phDevices + numDevices), commandListCache(hContext), eventPoolCache(phDevices[0]->Platform->getNumDevices(), [context = this, platform = phDevices[0]->Platform](DeviceId deviceId) { @@ -27,19 +27,7 @@ ur_context_handle_t_::ur_context_handle_t_(ze_context_handle_t hContext, return std::make_unique( context, device, v2::EVENT_COUNTER, v2::QUEUE_IMMEDIATE); - }) { - std::ignore = ownZeContext; -} - -ur_context_handle_t_::~ur_context_handle_t_() noexcept(false) { - // ur_context_handle_t_ is only created/destroyed through urContextCreate - // and urContextRelease so it's safe to throw here - ZE2UR_CALL_THROWS(zeContextDestroy, (hContext)); -} - -ze_context_handle_t ur_context_handle_t_::getZeHandle() const { - return hContext; -} + }) {} ur_result_t ur_context_handle_t_::retain() { RefCount.increment(); @@ -72,10 +60,11 @@ bool ur_context_handle_t_::isValidDevice(ur_device_handle_t hDevice) const { return false; } -UR_APIEXPORT ur_result_t UR_APICALL -urContextCreate(uint32_t deviceCount, const ur_device_handle_t *phDevices, - const ur_context_properties_t *pProperties, - ur_context_handle_t *phContext) { +namespace ur::level_zero { +ur_result_t urContextCreate(uint32_t deviceCount, + const ur_device_handle_t *phDevices, + const ur_context_properties_t *pProperties, + ur_context_handle_t *phContext) { std::ignore = pProperties; ur_platform_handle_t hPlatform = phDevices[0]->Platform; @@ -89,23 +78,20 @@ urContextCreate(uint32_t deviceCount, const ur_device_handle_t *phDevices, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urContextRetain(ur_context_handle_t hContext) { +ur_result_t urContextRetain(ur_context_handle_t hContext) { return hContext->retain(); } -UR_APIEXPORT ur_result_t UR_APICALL -urContextRelease(ur_context_handle_t hContext) { +ur_result_t urContextRelease(ur_context_handle_t hContext) { return hContext->release(); } -UR_APIEXPORT ur_result_t UR_APICALL -urContextGetInfo(ur_context_handle_t hContext, - ur_context_info_t contextInfoType, size_t propSize, +ur_result_t urContextGetInfo(ur_context_handle_t hContext, + ur_context_info_t contextInfoType, size_t propSize, - void *pContextInfo, + void *pContextInfo, - size_t *pPropSizeRet) { + size_t *pPropSizeRet) { std::shared_lock Lock(hContext->Mutex); UrReturnHelper ReturnValue(propSize, pContextInfo, pPropSizeRet); switch ( @@ -117,7 +103,14 @@ urContextGetInfo(ur_context_handle_t hContext, return ReturnValue(uint32_t(hContext->getDevices().size())); case UR_CONTEXT_INFO_REFERENCE_COUNT: return ReturnValue(uint32_t{hContext->RefCount.load()}); + case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: + // TODO: this is currently not implemented + return ReturnValue(uint8_t{false}); + case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT: + // 2D USM fill is not supported. + return ReturnValue(uint8_t{false}); default: return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/context.hpp b/source/adapters/level_zero/v2/context.hpp index 69bf406594..0ed701400d 100644 --- a/source/adapters/level_zero/v2/context.hpp +++ b/source/adapters/level_zero/v2/context.hpp @@ -13,17 +13,17 @@ #include #include "command_list_cache.hpp" +#include "common.hpp" #include "event_pool_cache.hpp" struct ur_context_handle_t_ : _ur_object { ur_context_handle_t_(ze_context_handle_t hContext, uint32_t numDevices, const ur_device_handle_t *phDevices, bool ownZeContext); - ~ur_context_handle_t_() noexcept(false); ur_result_t retain(); ur_result_t release(); - ze_context_handle_t getZeHandle() const; + inline ze_context_handle_t getZeHandle() const { return hContext.get(); } ur_platform_handle_t getPlatform() const; const std::vector &getDevices() const; @@ -31,7 +31,7 @@ struct ur_context_handle_t_ : _ur_object { // For that the Device or its root devices need to be in the context. bool isValidDevice(ur_device_handle_t Device) const; - const ze_context_handle_t hContext; + const v2::raii::ze_context_handle_t hContext; const std::vector hDevices; v2::command_list_cache_t commandListCache; v2::event_pool_cache eventPoolCache; diff --git a/source/adapters/level_zero/v2/event.cpp b/source/adapters/level_zero/v2/event.cpp index 3129e3dd3e..df99c83b53 100644 --- a/source/adapters/level_zero/v2/event.cpp +++ b/source/adapters/level_zero/v2/event.cpp @@ -45,10 +45,47 @@ ur_result_t ur_event_handle_t_::release() { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { - return hEvent->retain(); -} +namespace ur::level_zero { +ur_result_t urEventRetain(ur_event_handle_t hEvent) { return hEvent->retain(); } -UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { +ur_result_t urEventRelease(ur_event_handle_t hEvent) { return hEvent->release(); } + +ur_result_t urEventWait(uint32_t numEvents, + const ur_event_handle_t *phEventWaitList) { + for (uint32_t i = 0; i < numEvents; ++i) { + ZE2UR_CALL(zeEventHostSynchronize, + (phEventWaitList[i]->getZeEvent(), UINT64_MAX)); + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, + size_t propValueSize, void *pPropValue, + size_t *pPropValueSizeRet) { + UrReturnHelper returnValue(propValueSize, pPropValue, pPropValueSizeRet); + + switch (propName) { + case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: { + auto zeStatus = ZE_CALL_NOCHECK(zeEventQueryStatus, (hEvent->getZeEvent())); + + if (zeStatus == ZE_RESULT_NOT_READY) { + return returnValue(UR_EVENT_STATUS_SUBMITTED); + } else { + return returnValue(UR_EVENT_STATUS_COMPLETE); + } + } + case UR_EVENT_INFO_REFERENCE_COUNT: { + return returnValue(hEvent->RefCount.load()); + } + default: + logger::error( + "Unsupported ParamName in urEventGetInfo: ParamName=ParamName={}(0x{})", + propName, logger::toHex(propName)); + return UR_RESULT_ERROR_INVALID_VALUE; + } + + return UR_RESULT_SUCCESS; +} +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/event_provider_counter.cpp b/source/adapters/level_zero/v2/event_provider_counter.cpp index 5334b2f888..76caea4c58 100644 --- a/source/adapters/level_zero/v2/event_provider_counter.cpp +++ b/source/adapters/level_zero/v2/event_provider_counter.cpp @@ -27,9 +27,9 @@ provider_counter::provider_counter(ur_platform_handle_t platform, ZE2UR_CALL_THROWS(zeDriverGetExtensionFunctionAddress, (platform->ZeDriver, "zexCounterBasedEventCreate", (void **)&this->eventCreateFunc)); - ZE2UR_CALL_THROWS( - zelLoaderTranslateHandle, - (ZEL_HANDLE_CONTEXT, context->hContext, (void **)&translatedContext)); + ZE2UR_CALL_THROWS(zelLoaderTranslateHandle, + (ZEL_HANDLE_CONTEXT, context->getZeHandle(), + (void **)&translatedContext)); ZE2UR_CALL_THROWS( zelLoaderTranslateHandle, (ZEL_HANDLE_DEVICE, device->ZeDevice, (void **)&translatedDevice)); @@ -39,7 +39,7 @@ event_allocation provider_counter::allocate() { if (freelist.empty()) { ZeStruct desc; desc.index = 0; - desc.signal = 0; + desc.signal = ZE_EVENT_SCOPE_FLAG_HOST; desc.wait = 0; ze_event_handle_t handle; diff --git a/source/adapters/level_zero/v2/event_provider_normal.cpp b/source/adapters/level_zero/v2/event_provider_normal.cpp index f5a1c940c6..4df05c12ed 100644 --- a/source/adapters/level_zero/v2/event_provider_normal.cpp +++ b/source/adapters/level_zero/v2/event_provider_normal.cpp @@ -32,7 +32,7 @@ provider_pool::provider_pool(ur_context_handle_t context, desc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_pool_counter_based_exp_desc_t counterBasedExt = { - ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC, nullptr}; + ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC, nullptr, 0}; if (events == event_type::EVENT_COUNTER) { counterBasedExt.flags = @@ -43,7 +43,7 @@ provider_pool::provider_pool(ur_context_handle_t context, } ZE2UR_CALL_THROWS(zeEventPoolCreate, - (context->hContext, &desc, 1, + (context->getZeHandle(), &desc, 1, const_cast(&device->ZeDevice), pool.ptr())); @@ -51,7 +51,7 @@ provider_pool::provider_pool(ur_context_handle_t context, for (int i = 0; i < EVENTS_BURST; ++i) { ZeStruct desc; desc.index = i; - desc.signal = 0; + desc.signal = ZE_EVENT_SCOPE_FLAG_HOST; desc.wait = 0; ZE2UR_CALL_THROWS(zeEventCreate, (pool.get(), &desc, freelist[i].ptr())); } diff --git a/source/adapters/level_zero/v2/event_provider_normal.hpp b/source/adapters/level_zero/v2/event_provider_normal.hpp index 238ab2f360..1260964a4f 100644 --- a/source/adapters/level_zero/v2/event_provider_normal.hpp +++ b/source/adapters/level_zero/v2/event_provider_normal.hpp @@ -23,6 +23,7 @@ #include "event.hpp" #include "../device.hpp" +#include "../ur_interface_loader.hpp" namespace v2 { @@ -50,10 +51,10 @@ class provider_normal : public event_provider { event_type etype, queue_type qtype) : producedType(etype), queueType(qtype), urContext(context), urDevice(device) { - urDeviceRetain(device); + ur::level_zero::urDeviceRetain(device); } - ~provider_normal() override { urDeviceRelease(urDevice); } + ~provider_normal() override { ur::level_zero::urDeviceRelease(urDevice); } event_allocation allocate() override; ur_device_handle_t device() override; diff --git a/source/adapters/level_zero/v2/kernel.cpp b/source/adapters/level_zero/v2/kernel.cpp index 58e1a10ef1..8bfad2d2ad 100644 --- a/source/adapters/level_zero/v2/kernel.cpp +++ b/source/adapters/level_zero/v2/kernel.cpp @@ -12,12 +12,14 @@ #include "context.hpp" #include "kernel.hpp" +#include "memory.hpp" #include "../device.hpp" #include "../platform.hpp" #include "../program.hpp" +#include "../ur_interface_loader.hpp" -ur_single_device_kernel_t::ur_single_device_kernel_t(ze_device_handle_t hDevice, +ur_single_device_kernel_t::ur_single_device_kernel_t(ur_device_handle_t hDevice, ze_kernel_handle_t hKernel, bool ownZeHandle) : hDevice(hDevice), hKernel(hKernel, ownZeHandle) { @@ -32,10 +34,12 @@ ur_result_t ur_single_device_kernel_t::release() { return UR_RESULT_SUCCESS; } -ur_kernel_handle_t_::ur_kernel_handle_t_(ur_program_shared_handle_t hProgram, +ur_kernel_handle_t_::ur_kernel_handle_t_(ur_program_handle_t hProgram, const char *kernelName) : hProgram(hProgram), deviceKernels(hProgram->Context->getPlatform()->getNumDevices()) { + ur::level_zero::urProgramRetain(hProgram); + for (auto [zeDevice, zeModule] : hProgram->ZeModuleMap) { ZeStruct zeKernelDesc; zeKernelDesc.pKernelName = kernelName; @@ -51,7 +55,7 @@ ur_kernel_handle_t_::ur_kernel_handle_t_(ur_program_shared_handle_t hProgram, assert(urDevice != hProgram->Context->getDevices().end()); auto deviceId = (*urDevice)->Id.value(); - deviceKernels[deviceId].emplace(zeDevice, zeKernel, true); + deviceKernels[deviceId].emplace(*urDevice, zeKernel, true); } completeInitialization(); } @@ -78,7 +82,8 @@ ur_result_t ur_kernel_handle_t_::release() { singleDeviceKernelOpt.value().hKernel.reset(); } } - hProgram.reset(); + + UR_CALL_THROWS(ur::level_zero::urProgramRelease(hProgram)); return UR_RESULT_SUCCESS; } @@ -114,7 +119,7 @@ ur_kernel_handle_t_::getZeHandle(ur_device_handle_t hDevice) { auto &kernel = deviceKernels[0].value(); // hDevice is nullptr for native handle - if ((kernel.hDevice != nullptr && kernel.hDevice != hDevice->ZeDevice)) { + if ((kernel.hDevice != nullptr && kernel.hDevice != hDevice)) { throw UR_RESULT_ERROR_INVALID_DEVICE; } @@ -190,25 +195,77 @@ ur_result_t ur_kernel_handle_t_::setArgPointer( } ur_program_handle_t ur_kernel_handle_t_::getProgramHandle() const { - return hProgram.get(); + return hProgram; +} + +ur_result_t ur_kernel_handle_t_::setExecInfo(ur_kernel_exec_info_t propName, + const void *pPropValue) { + std::scoped_lock Guard(Mutex); + + for (auto &kernel : deviceKernels) { + if (!kernel.has_value()) + continue; + if (propName == UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS && + *(static_cast(pPropValue)) == true) { + // The whole point for users really was to not need to know anything + // about the types of allocations kernel uses. So in DPC++ we always + // just set all 3 modes for each kernel. + ze_kernel_indirect_access_flags_t indirectFlags = + ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST | + ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE | + ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED; + ZE2UR_CALL(zeKernelSetIndirectAccess, + (kernel->hKernel.get(), indirectFlags)); + } else if (propName == UR_KERNEL_EXEC_INFO_CACHE_CONFIG) { + ze_cache_config_flag_t zeCacheConfig{}; + auto cacheConfig = + *(static_cast(pPropValue)); + if (cacheConfig == UR_KERNEL_CACHE_CONFIG_LARGE_SLM) + zeCacheConfig = ZE_CACHE_CONFIG_FLAG_LARGE_SLM; + else if (cacheConfig == UR_KERNEL_CACHE_CONFIG_LARGE_DATA) + zeCacheConfig = ZE_CACHE_CONFIG_FLAG_LARGE_DATA; + else if (cacheConfig == UR_KERNEL_CACHE_CONFIG_DEFAULT) + zeCacheConfig = static_cast(0); + else + // Unexpected cache configuration value. + return UR_RESULT_ERROR_INVALID_VALUE; + ZE2UR_CALL(zeKernelSetCacheConfig, + (kernel->hKernel.get(), zeCacheConfig);); + } else { + logger::error("urKernelSetExecInfo: unsupported ParamName"); + return UR_RESULT_ERROR_INVALID_VALUE; + } + } + + return UR_RESULT_SUCCESS; +} + +std::vector ur_kernel_handle_t_::getDevices() const { + std::vector devices; + for (size_t i = 0; i < deviceKernels.size(); ++i) { + if (deviceKernels[i].has_value()) { + devices.push_back(deviceKernels[i].value().hDevice); + } + } + return devices; } -UR_APIEXPORT ur_result_t UR_APICALL -urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, - ur_kernel_handle_t *phKernel) { - *phKernel = new ur_kernel_handle_t_( - ur_kernel_handle_t_::ur_program_shared_handle_t(hProgram), pKernelName); +namespace ur::level_zero { +ur_result_t urKernelCreate(ur_program_handle_t hProgram, + const char *pKernelName, + ur_kernel_handle_t *phKernel) { + *phKernel = new ur_kernel_handle_t_(hProgram, pKernelName); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain( +ur_result_t urKernelRetain( ur_kernel_handle_t hKernel ///< [in] handle for the Kernel to retain ) { hKernel->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( +ur_result_t urKernelRelease( ur_kernel_handle_t hKernel ///< [in] handle for the Kernel to release ) { if (!hKernel->RefCount.decrementAndTest()) @@ -220,7 +277,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( +ur_result_t urKernelSetArgValue( ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object uint32_t argIndex, ///< [in] argument index in range [0, num args - 1] size_t argSize, ///< [in] size of argument type @@ -233,7 +290,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( return hKernel->setArgValue(argIndex, argSize, pProperties, pArgValue); } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( +ur_result_t urKernelSetArgPointer( ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object uint32_t argIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_pointer_properties_t @@ -244,3 +301,162 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgPointer"); return hKernel->setArgPointer(argIndex, pProperties, pArgValue); } + +ur_result_t +urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_mem_obj_properties_t *pProperties, + ur_mem_handle_t hArgValue) { + TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgMemObj"); + + // TODO: support properties + std::ignore = pProperties; + + auto kernelDevices = hKernel->getDevices(); + if (kernelDevices.size() == 1) { + auto zePtr = hArgValue->getPtr(kernelDevices.front()); + return hKernel->setArgPointer(argIndex, nullptr, zePtr); + } else { + // TODO: Implement this for multi-device kernels. + // Do this the same way as in legacy (keep a pending Args vector and + // do actual allocation on kernel submission) or allocate the memory + // immediately (only for small allocations?) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } +} + +ur_result_t +urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, + size_t argSize, + const ur_kernel_arg_local_properties_t *pProperties) { + TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgLocal"); + + std::ignore = pProperties; + + return hKernel->setArgValue(argIndex, argSize, nullptr, nullptr); +} + +ur_result_t urKernelSetExecInfo( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + ur_kernel_exec_info_t propName, ///< [in] name of the execution attribute + size_t propSize, ///< [in] size in byte the attribute value + const ur_kernel_exec_info_properties_t + *pProperties, ///< [in][optional] pointer to execution info properties + const void *pPropValue ///< [in][range(0, propSize)] pointer to memory + ///< location holding the property value. +) { + std::ignore = propSize; + std::ignore = pProperties; + + return hKernel->setExecInfo(propName, pPropValue); +} + +ur_result_t urKernelGetGroupInfo( + ur_kernel_handle_t hKernel, ///< [in] handle of the Kernel object + ur_device_handle_t hDevice, ///< [in] handle of the Device object + ur_kernel_group_info_t + paramName, ///< [in] name of the work Group property to query + size_t + paramValueSize, ///< [in] size of the Kernel Work Group property value + void *pParamValue, ///< [in,out][optional][range(0, propSize)] value of the + ///< Kernel Work Group property. + size_t *pParamValueSizeRet ///< [out][optional] pointer to the actual size + ///< in bytes of data being queried by propName. +) { + UrReturnHelper returnValue(paramValueSize, pParamValue, pParamValueSizeRet); + + std::shared_lock Guard(hKernel->Mutex); + switch (paramName) { + case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: { + // TODO: To revisit after level_zero/issues/262 is resolved + struct { + size_t Arr[3]; + } GlobalWorkSize = {{(hDevice->ZeDeviceComputeProperties->maxGroupSizeX * + hDevice->ZeDeviceComputeProperties->maxGroupCountX), + (hDevice->ZeDeviceComputeProperties->maxGroupSizeY * + hDevice->ZeDeviceComputeProperties->maxGroupCountY), + (hDevice->ZeDeviceComputeProperties->maxGroupSizeZ * + hDevice->ZeDeviceComputeProperties->maxGroupCountZ)}}; + return returnValue(GlobalWorkSize); + } + case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { + ZeStruct workGroupProperties; + workGroupProperties.maxGroupSize = 0; + + ZeStruct kernelProperties; + kernelProperties.pNext = &workGroupProperties; + + auto zeDevice = hKernel->getZeHandle(hDevice); + if (zeDevice) { + auto zeResult = + ZE_CALL_NOCHECK(zeKernelGetProperties, (zeDevice, &kernelProperties)); + if (zeResult == ZE_RESULT_SUCCESS && + workGroupProperties.maxGroupSize != 0) { + return returnValue(workGroupProperties.maxGroupSize); + } + return returnValue( + uint64_t{hDevice->ZeDeviceComputeProperties->maxTotalGroupSize}); + } + } + case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: { + auto props = hKernel->getProperties(hDevice); + struct { + size_t Arr[3]; + } WgSize = {{props.requiredGroupSizeX, props.requiredGroupSizeY, + props.requiredGroupSizeZ}}; + return returnValue(WgSize); + } + case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: { + auto props = hKernel->getProperties(hDevice); + return returnValue(uint32_t{props.localMemSize}); + } + case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { + return returnValue( + size_t{hDevice->ZeDeviceProperties->physicalEUSimdWidth}); + } + case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: { + auto props = hKernel->getProperties(hDevice); + return returnValue(uint32_t{props.privateMemSize}); + } + default: { + logger::error( + "Unknown ParamName in urKernelGetGroupInfo: ParamName={}(0x{})", + paramName, logger::toHex(paramName)); + return UR_RESULT_ERROR_INVALID_VALUE; + } + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urKernelGetSubGroupInfo( + ur_kernel_handle_t hKernel, ///< [in] handle of the Kernel object + ur_device_handle_t hDevice, ///< [in] handle of the Device object + ur_kernel_sub_group_info_t + propName, ///< [in] name of the SubGroup property to query + size_t propSize, ///< [in] size of the Kernel SubGroup property value + void *pPropValue, ///< [in,out][range(0, propSize)][optional] value of the + ///< Kernel SubGroup property. + size_t *pPropSizeRet ///< [out][optional] pointer to the actual size in + ///< bytes of data being queried by propName. +) { + std::ignore = hDevice; + + UrReturnHelper returnValue(propSize, pPropValue, pPropSizeRet); + + auto props = hKernel->getProperties(hDevice); + + std::shared_lock Guard(hKernel->Mutex); + if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) { + returnValue(uint32_t{props.maxSubgroupSize}); + } else if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS) { + returnValue(uint32_t{props.maxNumSubgroups}); + } else if (propName == UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS) { + returnValue(uint32_t{props.requiredNumSubGroups}); + } else if (propName == UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL) { + returnValue(uint32_t{props.requiredSubgroupSize}); + } else { + die("urKernelGetSubGroupInfo: parameter not implemented"); + return {}; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/kernel.hpp b/source/adapters/level_zero/v2/kernel.hpp index b6309ab20f..2d3a891826 100644 --- a/source/adapters/level_zero/v2/kernel.hpp +++ b/source/adapters/level_zero/v2/kernel.hpp @@ -15,32 +15,19 @@ #include "common.hpp" struct ur_single_device_kernel_t { - ur_single_device_kernel_t(ze_device_handle_t hDevice, + ur_single_device_kernel_t(ur_device_handle_t hDevice, ze_kernel_handle_t hKernel, bool ownZeHandle); ur_result_t release(); - ze_device_handle_t hDevice; + ur_device_handle_t hDevice; v2::raii::ze_kernel_handle_t hKernel; mutable ZeCache> zeKernelProperties; }; struct ur_kernel_handle_t_ : _ur_object { private: - static inline ur_result_t - internalProgramRelease(ur_program_handle_t hProgram) { - // do a release on the program this kernel was part of without delete of the - // program handle. - hProgram->ur_release_program_resources(false); - return UR_RESULT_SUCCESS; - } - public: - using ur_program_shared_handle_t = - v2::raii::ur_shared_handle; - - ur_kernel_handle_t_(ur_program_shared_handle_t hProgram, - const char *kernelName); + ur_kernel_handle_t_(ur_program_handle_t hProgram, const char *kernelName); // From native handle ur_kernel_handle_t_(ur_native_handle_t hNativeKernel, @@ -53,6 +40,9 @@ struct ur_kernel_handle_t_ : _ur_object { // Get program handle of the kernel. ur_program_handle_t getProgramHandle() const; + // Get devices the kernel is built for. + std::vector getDevices() const; + // Get name of the kernel. const std::string &getName() const; @@ -70,12 +60,16 @@ struct ur_kernel_handle_t_ : _ur_object { const ur_kernel_arg_pointer_properties_t *pProperties, const void *pArgValue); + // Implementation of urKernelSetExecInfo. + ur_result_t setExecInfo(ur_kernel_exec_info_t propName, + const void *pPropValue); + // Perform cleanup. ur_result_t release(); private: // Keep the program of the kernel. - ur_program_shared_handle_t hProgram; + ur_program_handle_t hProgram; // Vector of ur_single_device_kernel_t indexed by device->Id std::vector> deviceKernels; diff --git a/source/adapters/level_zero/v2/memory.cpp b/source/adapters/level_zero/v2/memory.cpp new file mode 100644 index 0000000000..fc9a7522a4 --- /dev/null +++ b/source/adapters/level_zero/v2/memory.cpp @@ -0,0 +1,180 @@ +//===--------- memory.cpp - Level Zero Adapter ---------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "memory.hpp" +#include "context.hpp" + +#include "../helpers/memory_helpers.hpp" + +ur_mem_handle_t_::ur_mem_handle_t_(ur_context_handle_t hContext, size_t size) + : hContext(hContext), size(size) {} + +ur_host_mem_handle_t::ur_host_mem_handle_t(ur_context_handle_t hContext, + void *hostPtr, size_t size, + host_ptr_action_t hostPtrAction) + : ur_mem_handle_t_(hContext, size) { + bool hostPtrImported = false; + if (hostPtrAction == host_ptr_action_t::import) { + hostPtrImported = + maybeImportUSM(hContext->getPlatform()->ZeDriverHandleExpTranslated, + hContext->getZeHandle(), hostPtr, size); + } + + if (!hostPtrImported) { + // TODO: use UMF + ZeStruct hostDesc; + ZE2UR_CALL_THROWS(zeMemAllocHost, (hContext->getZeHandle(), &hostDesc, size, + 0, &this->ptr)); + + if (hostPtr) { + std::memcpy(this->ptr, hostPtr, size); + } + } +} + +ur_host_mem_handle_t::~ur_host_mem_handle_t() { + // TODO: use UMF API here + if (ptr) { + ZE_CALL_NOCHECK(zeMemFree, (hContext->getZeHandle(), ptr)); + } +} + +void *ur_host_mem_handle_t::getPtr(ur_device_handle_t hDevice) { + std::ignore = hDevice; + return ptr; +} + +ur_device_mem_handle_t::ur_device_mem_handle_t(ur_context_handle_t hContext, + void *hostPtr, size_t size) + : ur_mem_handle_t_(hContext, size), + deviceAllocations(hContext->getPlatform()->getNumDevices()) { + // Legacy adapter allocated the memory directly on a device (first on the + // contxt) and if the buffer is used on another device, memory is migrated + // (depending on an env var setting). + // + // TODO: port this behavior or figure out if it makes sense to keep the memory + // in a host buffer (e.g. for smaller sizes). + if (hostPtr) { + buffer.assign(reinterpret_cast(hostPtr), + reinterpret_cast(hostPtr) + size); + } +} + +ur_device_mem_handle_t::~ur_device_mem_handle_t() { + // TODO: use UMF API here + for (auto &ptr : deviceAllocations) { + if (ptr) { + ZE_CALL_NOCHECK(zeMemFree, (hContext->getZeHandle(), ptr)); + } + } +} + +void *ur_device_mem_handle_t::getPtr(ur_device_handle_t hDevice) { + std::lock_guard lock(this->Mutex); + + auto &ptr = deviceAllocations[hDevice->Id.value()]; + if (!ptr) { + ZeStruct deviceDesc; + ZE2UR_CALL_THROWS(zeMemAllocDevice, (hContext->getZeHandle(), &deviceDesc, + size, 0, hDevice->ZeDevice, &ptr)); + + if (!buffer.empty()) { + auto commandList = hContext->commandListCache.getImmediateCommandList( + hDevice->ZeDevice, true, + hDevice + ->QueueGroup + [ur_device_handle_t_::queue_group_info_t::type::Compute] + .ZeOrdinal, + ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, + std::nullopt); + ZE2UR_CALL_THROWS( + zeCommandListAppendMemoryCopy, + (commandList.get(), ptr, buffer.data(), size, nullptr, 0, nullptr)); + } + } + return ptr; +} + +namespace ur::level_zero { +ur_result_t urMemBufferCreate(ur_context_handle_t hContext, + ur_mem_flags_t flags, size_t size, + const ur_buffer_properties_t *pProperties, + ur_mem_handle_t *phBuffer) { + if (flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) { + // TODO: + // Having PI_MEM_FLAGS_HOST_PTR_ALLOC for buffer requires allocation of + // pinned host memory, see: + // sycl/doc/extensions/supported/sycl_ext_oneapi_use_pinned_host_memory_property.asciidoc + // We are however missing such functionality in Level Zero, so we just + // ignore the flag for now. + } + + void *hostPtr = pProperties ? pProperties->pHost : nullptr; + + // We treat integrated devices (physical memory shared with the CPU) + // differently from discrete devices (those with distinct memories). + // For integrated devices, allocating the buffer in the host memory + // enables automatic access from the device, and makes copying + // unnecessary in the map/unmap operations. This improves performance. + bool useHostBuffer = hContext->getDevices().size() == 1 && + hContext->getDevices()[0]->ZeDeviceProperties->flags & + ZE_DEVICE_PROPERTY_FLAG_INTEGRATED; + + if (useHostBuffer) { + // TODO: assert that if hostPtr is set, either UR_MEM_FLAG_USE_HOST_POINTER + // or UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER is set? + auto hostPtrAction = flags & UR_MEM_FLAG_USE_HOST_POINTER + ? ur_host_mem_handle_t::host_ptr_action_t::import + : ur_host_mem_handle_t::host_ptr_action_t::copy; + *phBuffer = + new ur_host_mem_handle_t(hContext, hostPtr, size, hostPtrAction); + } else { + *phBuffer = new ur_device_mem_handle_t(hContext, hostPtr, size); + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t urMemBufferPartition(ur_mem_handle_t hBuffer, ur_mem_flags_t flags, + ur_buffer_create_type_t bufferCreateType, + const ur_buffer_region_t *pRegion, + ur_mem_handle_t *phMem) { + std::ignore = hBuffer; + std::ignore = flags; + std::ignore = bufferCreateType; + std::ignore = pRegion; + std::ignore = phMem; + logger::error("{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t urMemBufferCreateWithNativeHandle( + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { + std::ignore = hNativeMem; + std::ignore = hContext; + std::ignore = pProperties; + std::ignore = phMem; + logger::error("{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t urMemRetain(ur_mem_handle_t hMem) { + hMem->RefCount.increment(); + return UR_RESULT_SUCCESS; +} + +ur_result_t urMemRelease(ur_mem_handle_t hMem) { + if (hMem->RefCount.decrementAndTest()) { + delete hMem; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/memory.hpp b/source/adapters/level_zero/v2/memory.hpp new file mode 100644 index 0000000000..be4456d728 --- /dev/null +++ b/source/adapters/level_zero/v2/memory.hpp @@ -0,0 +1,55 @@ +//===--------- memory.hpp - Level Zero Adapter ---------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#include "common.hpp" + +struct ur_mem_handle_t_ : _ur_object { + ur_mem_handle_t_(ur_context_handle_t hContext, size_t size); + virtual ~ur_mem_handle_t_() = default; + + virtual void *getPtr(ur_device_handle_t) = 0; + + inline size_t getSize() { return size; } + +protected: + const ur_context_handle_t hContext; + const size_t size; +}; + +struct ur_host_mem_handle_t : public ur_mem_handle_t_ { + enum class host_ptr_action_t { import, copy }; + + ur_host_mem_handle_t(ur_context_handle_t hContext, void *hostPtr, size_t size, + host_ptr_action_t useHostPtr); + ~ur_host_mem_handle_t(); + + void *getPtr(ur_device_handle_t) override; + +private: + void *ptr; +}; + +struct ur_device_mem_handle_t : public ur_mem_handle_t_ { + ur_device_mem_handle_t(ur_context_handle_t hContext, void *hostPtr, + size_t size); + ~ur_device_mem_handle_t(); + + void *getPtr(ur_device_handle_t) override; + +private: + std::vector buffer; + + // Vector of per-device allocations indexed by device->Id + std::vector deviceAllocations; +}; diff --git a/source/adapters/level_zero/v2/queue_api.cpp b/source/adapters/level_zero/v2/queue_api.cpp index 188f7c3102..ea2e931bfe 100644 --- a/source/adapters/level_zero/v2/queue_api.cpp +++ b/source/adapters/level_zero/v2/queue_api.cpp @@ -14,31 +14,30 @@ ur_queue_handle_t_::~ur_queue_handle_t_() {} -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, - ur_queue_info_t propName, - size_t propSize, - void *pPropValue, - size_t *pPropSizeRet) { +namespace ur::level_zero { +ur_result_t urQueueGetInfo(ur_queue_handle_t hQueue, ur_queue_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { return hQueue->queueGetInfo(propName, propSize, pPropValue, pPropSizeRet); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { +ur_result_t urQueueRetain(ur_queue_handle_t hQueue) { return hQueue->queueRetain(); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { +ur_result_t urQueueRelease(ur_queue_handle_t hQueue) { return hQueue->queueRelease(); } -UR_APIEXPORT ur_result_t UR_APICALL -urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc, - ur_native_handle_t *phNativeQueue) { +ur_result_t urQueueGetNativeHandle(ur_queue_handle_t hQueue, + ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue) { return hQueue->queueGetNativeHandle(pDesc, phNativeQueue); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { +ur_result_t urQueueFinish(ur_queue_handle_t hQueue) { return hQueue->queueFinish(); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { +ur_result_t urQueueFlush(ur_queue_handle_t hQueue) { return hQueue->queueFlush(); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( +ur_result_t urEnqueueKernelLaunch( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, @@ -47,27 +46,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( - ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueEventsWait(ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueEventsWait(numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( +ur_result_t urEnqueueEventsWaitWithBarrier( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { return hQueue->enqueueEventsWaitWithBarrier(numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, - size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemBufferRead(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemBufferRead(hBuffer, blockingRead, offset, size, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( +ur_result_t urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -75,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( +ur_result_t urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -87,7 +89,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( +ur_result_t urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -99,16 +101,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, - ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemBufferCopy(ur_queue_handle_t hQueue, + ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemBufferCopy(hBufferSrc, hBufferDst, srcOffset, dstOffset, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( +ur_result_t urEnqueueMemBufferCopyRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, @@ -120,16 +124,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern, - size_t patternSize, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemBufferFill(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, + size_t offset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemBufferFill(hBuffer, pPattern, patternSize, offset, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( +ur_result_t urEnqueueMemImageRead( ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, @@ -138,7 +144,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( hImage, blockingRead, origin, region, rowPitch, slicePitch, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( +ur_result_t urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, @@ -147,78 +153,85 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( hImage, blockingWrite, origin, region, rowPitch, slicePitch, pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, - ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, - ur_rect_offset_t dstOrigin, ur_rect_region_t region, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t +urEnqueueMemImageCopy(ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, + ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemImageCopy(hImageSrc, hImageDst, srcOrigin, dstOrigin, region, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap, - ur_map_flags_t mapFlags, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent, void **ppRetMap) { +ur_result_t urEnqueueMemBufferMap(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, void **ppRetMap) { return hQueue->enqueueMemBufferMap(hBuffer, blockingMap, mapFlags, offset, size, numEventsInWaitList, phEventWaitList, phEvent, ppRetMap); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( - ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemUnmap(ur_queue_handle_t hQueue, ur_mem_handle_t hMem, + void *pMappedPtr, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemUnmap(hMem, pMappedPtr, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( - ur_queue_handle_t hQueue, void *pMem, size_t patternSize, - const void *pPattern, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMFill(ur_queue_handle_t hQueue, void *pMem, + size_t patternSize, const void *pPattern, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMFill(pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( - ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, - size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMMemcpy(ur_queue_handle_t hQueue, bool blocking, + void *pDst, const void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMMemcpy(blocking, pDst, pSrc, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMPrefetch(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMPrefetch(pMem, size, flags, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL -urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMAdvise(pMem, size, advice, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( - ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize, - const void *pPattern, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMFill2D(ur_queue_handle_t hQueue, void *pMem, + size_t pitch, size_t patternSize, + const void *pPattern, size_t width, + size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMFill2D(pMem, pitch, patternSize, pPattern, width, height, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( - ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, - const void *pSrc, size_t srcPitch, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMMemcpy2D(ur_queue_handle_t hQueue, bool blocking, + void *pDst, size_t dstPitch, const void *pSrc, + size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMMemcpy2D(blocking, pDst, dstPitch, pSrc, srcPitch, width, height, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( +ur_result_t urEnqueueDeviceGlobalVariableWrite( ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, bool blockingWrite, size_t count, size_t offset, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, @@ -227,7 +240,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( hProgram, name, blockingWrite, count, offset, pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( +ur_result_t urEnqueueDeviceGlobalVariableRead( ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, bool blockingRead, size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, @@ -236,25 +249,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( hProgram, name, blockingRead, count, offset, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pDst, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueReadHostPipe(hProgram, pipe_symbol, blocking, pDst, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pSrc, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueWriteHostPipe(hProgram, pipe_symbol, blocking, pSrc, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( +ur_result_t urBindlessImagesImageCopyExp( ur_queue_handle_t hQueue, const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, const ur_image_format_t *pSrcImageFormat, @@ -267,7 +284,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( +ur_result_t urBindlessImagesWaitExternalSemaphoreExp( ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -275,7 +292,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( hSemaphore, hasWaitValue, waitValue, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( +ur_result_t urBindlessImagesSignalExternalSemaphoreExp( ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -283,7 +300,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( hSemaphore, hasSignalValue, signalValue, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( +ur_result_t urEnqueueCooperativeKernelLaunchExp( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, @@ -292,13 +309,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( +ur_result_t urEnqueueTimestampRecordingExp( ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { return hQueue->enqueueTimestampRecordingExp(blocking, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( +ur_result_t urEnqueueKernelLaunchCustomExp( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList, @@ -310,7 +327,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( numPropsInLaunchPropList, launchPropList, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( +ur_result_t urEnqueueNativeCommandExp( ur_queue_handle_t hQueue, ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, @@ -321,3 +338,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( pfnNativeEnqueue, data, numMemsInMemList, phMemList, pProperties, numEventsInWaitList, phEventWaitList, phEvent); } +} // namespace ur::level_zero \ No newline at end of file diff --git a/source/adapters/level_zero/v2/queue_create.cpp b/source/adapters/level_zero/v2/queue_create.cpp index 938dd5cb64..c72320842b 100644 --- a/source/adapters/level_zero/v2/queue_create.cpp +++ b/source/adapters/level_zero/v2/queue_create.cpp @@ -17,16 +17,18 @@ #include #include -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { +namespace ur::level_zero { +ur_result_t urQueueCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_queue_properties_t *pProperties, + ur_queue_handle_t *phQueue) { // TODO: For now, always use immediate, in-order *phQueue = new v2::ur_queue_immediate_in_order_t(hContext, hDevice, pProperties); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( +ur_result_t urQueueCreateWithNativeHandle( ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, ur_queue_handle_t *phQueue) { @@ -38,3 +40,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index d6afd2eef3..3882a69135 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -10,6 +10,7 @@ #include "queue_immediate_in_order.hpp" #include "kernel.hpp" +#include "memory.hpp" #include "ur.hpp" #include "../helpers/kernel_helpers.hpp" @@ -146,6 +147,7 @@ ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName, // We can exit early if we have in-order queue. if (!lastHandler) return ReturnValue(true); + [[fallthrough]]; } default: logger::error( @@ -251,49 +253,59 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunch( ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWait( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueEventsWait"); + + std::unique_lock lock(this->Mutex); + + auto handler = getCommandListHandlerForCompute(); + auto signalEvent = getSignalEvent(handler, phEvent); + auto [pWaitEvents, numWaitEvents] = + getWaitListView(phEventWaitList, numEventsInWaitList, handler); + + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (handler->commandList.get(), numWaitEvents, pWaitEvents)); + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (handler->commandList.get(), signalEvent)); + + lastHandler = handler; + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + // For in-order queue we don't need a real barrier, just wait for + // requested events in potentially different queues and add a "barrier" + // event signal because it is already guaranteed that previous commands + // in this queue are completed when the signal is started. + return enqueueEventsWait(numEventsInWaitList, phEventWaitList, phEvent); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferRead( ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hBuffer; - std::ignore = blockingRead; - std::ignore = offset; - std::ignore = size; - std::ignore = pDst; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueMemBufferRead"); + + UR_ASSERT(offset + size <= hBuffer->getSize(), UR_RESULT_ERROR_INVALID_SIZE); + + auto ptr = ur_cast(hBuffer->getPtr(hDevice)); + return enqueueUSMMemcpy(blockingRead, pDst, ptr + offset, size, + numEventsInWaitList, phEventWaitList, phEvent); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWrite( ur_mem_handle_t hBuffer, bool blockingWrite, size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hBuffer; - std::ignore = blockingWrite; - std::ignore = offset; - std::ignore = size; - std::ignore = pSrc; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueMemBufferWrite"); + + UR_ASSERT(offset + size <= hBuffer->getSize(), UR_RESULT_ERROR_INVALID_SIZE); + + auto ptr = ur_cast(hBuffer->getPtr(hDevice)); + return enqueueUSMMemcpy(blockingWrite, ptr + offset, pSrc, size, + numEventsInWaitList, phEventWaitList, phEvent); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferReadRect( @@ -344,15 +356,18 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopy( ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hBufferSrc; - std::ignore = hBufferDst; - std::ignore = srcOffset; - std::ignore = dstOffset; - std::ignore = size; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueMemBufferCopy"); + + UR_ASSERT(srcOffset + size <= hBufferSrc->getSize(), + UR_RESULT_ERROR_INVALID_SIZE); + UR_ASSERT(dstOffset + size <= hBufferDst->getSize(), + UR_RESULT_ERROR_INVALID_SIZE); + + auto srcPtr = ur_cast(hBufferSrc->getPtr(hDevice)); + auto dstPtr = ur_cast(hBufferDst->getPtr(hDevice)); + + return enqueueUSMMemcpy(false, dstPtr + dstOffset, srcPtr + srcOffset, size, + numEventsInWaitList, phEventWaitList, phEvent); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopyRect( @@ -380,15 +395,13 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferFill( ur_mem_handle_t hBuffer, const void *pPattern, size_t patternSize, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hBuffer; - std::ignore = pPattern; - std::ignore = patternSize; - std::ignore = offset; - std::ignore = size; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueMemBufferFill"); + + UR_ASSERT(offset + size <= hBuffer->getSize(), UR_RESULT_ERROR_INVALID_SIZE); + + auto ptr = ur_cast(hBuffer->getPtr(hDevice)); + return enqueueUSMFill(ptr + offset, patternSize, pPattern, size, + numEventsInWaitList, phEventWaitList, phEvent); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemImageRead( diff --git a/source/adapters/level_zero/v2/usm.cpp b/source/adapters/level_zero/v2/usm.cpp index 6ac5f0f3b6..3706fe21e4 100644 --- a/source/adapters/level_zero/v2/usm.cpp +++ b/source/adapters/level_zero/v2/usm.cpp @@ -22,7 +22,8 @@ ur_context_handle_t ur_usm_pool_handle_t_::getContextHandle() const { return hContext; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( +namespace ur::level_zero { +ur_result_t urUSMPoolCreate( ur_context_handle_t hContext, ///< [in] handle of the context object ur_usm_pool_desc_t * pPoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with @@ -34,15 +35,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRetain( - ur_usm_pool_handle_t hPool ///< [in] pointer to USM memory pool +ur_result_t +urUSMPoolRetain(ur_usm_pool_handle_t hPool ///< [in] pointer to USM memory pool ) { hPool->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRelease( - ur_usm_pool_handle_t hPool ///< [in] pointer to USM memory pool +ur_result_t +urUSMPoolRelease(ur_usm_pool_handle_t hPool ///< [in] pointer to USM memory pool ) { if (hPool->RefCount.decrementAndTest()) { delete hPool; @@ -50,7 +51,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( +ur_result_t urUSMPoolGetInfo( ur_usm_pool_handle_t hPool, ///< [in] handle of the USM memory pool ur_usm_pool_info_t propName, ///< [in] name of the pool property to query size_t propSize, ///< [in] size in bytes of the pool property value provided @@ -74,7 +75,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( } } -UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( +ur_result_t urUSMDeviceAlloc( ur_context_handle_t hContext, ///< [in] handle of the context object ur_device_handle_t hDevice, ///< [in] handle of the device object const ur_usm_desc_t @@ -96,7 +97,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( +ur_result_t urUSMSharedAlloc( ur_context_handle_t hContext, ///< [in] handle of the context object ur_device_handle_t hDevice, ///< [in] handle of the device object const ur_usm_desc_t @@ -121,7 +122,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( +ur_result_t urUSMHostAlloc( ur_context_handle_t hContext, ///< [in] handle of the context object const ur_usm_desc_t *pUSMDesc, ///< [in][optional] USM memory allocation descriptor @@ -142,9 +143,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( - ur_context_handle_t hContext, ///< [in] handle of the context object - void *pMem ///< [in] pointer to USM memory object +ur_result_t +urUSMFree(ur_context_handle_t hContext, ///< [in] handle of the context object + void *pMem ///< [in] pointer to USM memory object ) { std::ignore = hContext; @@ -152,7 +153,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( +ur_result_t urUSMGetMemAllocInfo( ur_context_handle_t hContext, ///< [in] handle of the context object const void *ptr, ///< [in] pointer to USM memory object ur_usm_alloc_info_t @@ -223,3 +224,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( } return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/virtual_mem.cpp b/source/adapters/level_zero/virtual_mem.cpp index e3b90121a1..e89899ded7 100644 --- a/source/adapters/level_zero/virtual_mem.cpp +++ b/source/adapters/level_zero/virtual_mem.cpp @@ -15,7 +15,9 @@ #include "physical_mem.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( +namespace ur::level_zero { + +ur_result_t urVirtualMemGranularityGetInfo( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_virtual_mem_granularity_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { @@ -39,24 +41,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urVirtualMemReserve(ur_context_handle_t hContext, const void *pStart, - size_t size, void **ppStart) { +ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, + const void *pStart, size_t size, + void **ppStart) { ZE2UR_CALL(zeVirtualMemReserve, (hContext->ZeContext, pStart, size, ppStart)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree( - ur_context_handle_t hContext, const void *pStart, size_t size) { +ur_result_t urVirtualMemFree(ur_context_handle_t hContext, const void *pStart, + size_t size) { ZE2UR_CALL(zeVirtualMemFree, (hContext->ZeContext, pStart, size)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, - size_t size, ur_virtual_mem_access_flags_t flags) { +ur_result_t urVirtualMemSetAccess(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_access_flags_t flags) { ze_memory_access_attribute_t AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_NONE; if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_READWRITE; @@ -69,10 +71,10 @@ urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size, - ur_physical_mem_handle_t hPhysicalMem, size_t offset, - ur_virtual_mem_access_flags_t flags) { +ur_result_t urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, + size_t size, ur_physical_mem_handle_t hPhysicalMem, + size_t offset, + ur_virtual_mem_access_flags_t flags) { ze_memory_access_attribute_t AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_NONE; if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_READWRITE; @@ -86,17 +88,18 @@ urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap( - ur_context_handle_t hContext, const void *pStart, size_t size) { +ur_result_t urVirtualMemUnmap(ur_context_handle_t hContext, const void *pStart, + size_t size) { ZE2UR_CALL(zeVirtualMemUnmap, (hContext->ZeContext, pStart, size)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( - ur_context_handle_t hContext, const void *pStart, - [[maybe_unused]] size_t size, ur_virtual_mem_info_t propName, - size_t propSize, void *pPropValue, size_t *pPropSizeRet) { +ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext, + const void *pStart, + [[maybe_unused]] size_t size, + ur_virtual_mem_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { @@ -119,3 +122,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index c5652398e3..0061fae907 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -160,9 +160,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_VERSION: return ReturnValue("0.1"); case UR_DEVICE_INFO_COMPILER_AVAILABLE: - return ReturnValue(bool{false}); + return ReturnValue(bool{true}); case UR_DEVICE_INFO_LINKER_AVAILABLE: - return ReturnValue(bool{false}); + return ReturnValue(bool{true}); case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: return ReturnValue(static_cast(hDevice->tp.num_threads())); case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: @@ -364,11 +364,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: return UR_RESULT_ERROR_INVALID_VALUE; + case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { + // Currently for Native CPU fences are implemented using OCK + // builtins, so we have different capabilities than atomic operations + ur_memory_order_capability_flags_t Capabilities = + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + return ReturnValue(Capabilities); + } case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { ur_memory_order_capability_flags_t Capabilities = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED; return ReturnValue(Capabilities); } + case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { uint64_t Capabilities = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | diff --git a/source/adapters/native_cpu/memory.cpp b/source/adapters/native_cpu/memory.cpp index 1f8a927c67..ddf93e44bc 100644 --- a/source/adapters/native_cpu/memory.cpp +++ b/source/adapters/native_cpu/memory.cpp @@ -46,7 +46,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( UR_ASSERT(size != 0, UR_RESULT_ERROR_INVALID_BUFFER_SIZE); const bool useHostPtr = flags & UR_MEM_FLAG_USE_HOST_POINTER; - const bool copyHostPtr = flags & UR_MEM_FLAG_USE_HOST_POINTER; + const bool copyHostPtr = flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; ur_mem_handle_t_ *retMem; diff --git a/source/adapters/native_cpu/platform.cpp b/source/adapters/native_cpu/platform.cpp index 8d650764c1..840f18f8b3 100644 --- a/source/adapters/native_cpu/platform.cpp +++ b/source/adapters/native_cpu/platform.cpp @@ -92,7 +92,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( std::ignore = pFrontendOption; std::ignore = ppPlatformOption; - CONTINUE_NO_IMPLEMENTATION; + std::ignore = hPlatform; + using namespace std::literals; + if (pFrontendOption == nullptr) + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + if (pFrontendOption == "-O0"sv || pFrontendOption == "-O1"sv || + pFrontendOption == "-O2"sv || pFrontendOption == "-O3"sv || + pFrontendOption == ""sv) { + *ppPlatformOption = ""; + return UR_RESULT_SUCCESS; + } + return UR_RESULT_ERROR_INVALID_VALUE; } UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( diff --git a/source/adapters/opencl/CMakeLists.txt b/source/adapters/opencl/CMakeLists.txt index fe197849b6..b2db1f5bcd 100644 --- a/source/adapters/opencl/CMakeLists.txt +++ b/source/adapters/opencl/CMakeLists.txt @@ -54,7 +54,7 @@ if(UR_OPENCL_INCLUDE_DIR) else() FetchContent_Declare(OpenCL-Headers GIT_REPOSITORY "https://github.com/KhronosGroup/OpenCL-Headers.git" - GIT_TAG main + GIT_TAG 1e193332d02e27e15812d24ff2a3a7a908eb92a3 ) FetchContent_MakeAvailable(OpenCL-Headers) FetchContent_GetProperties(OpenCL-Headers diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index a31d6580a0..6cdfb3a97d 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -32,6 +32,17 @@ ur_result_t cl_adapter::getDeviceVersion(cl_device_id Dev, return UR_RESULT_SUCCESS; } +static bool isIntelFPGAEmuDevice(cl_device_id Dev) { + size_t NameSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_NAME, 0, nullptr, &NameSize)); + std::string NameStr(NameSize, '\0'); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_NAME, NameSize, NameStr.data(), nullptr)); + + return NameStr.find("Intel(R) FPGA Emulation Device") != std::string::npos; +} + ur_result_t cl_adapter::checkDeviceExtensions( cl_device_id Dev, const std::vector &Exts, bool &Supported) { size_t ExtSize = 0; @@ -46,6 +57,14 @@ ur_result_t cl_adapter::checkDeviceExtensions( Supported = true; for (const std::string &Ext : Exts) { if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { + // The Intel FPGA emulation device does actually support these, even if it + // doesn't report them. + if (isIntelFPGAEmuDevice(Dev) && + (Ext == "cl_intel_device_attribute_query" || + Ext == "cl_intel_required_subgroup_size")) { + Supported = true; + continue; + } break; } } @@ -431,15 +450,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, URValue[i].type = static_cast(CLValue[0]); switch (URValue[i].type) { case UR_DEVICE_PARTITION_EQUALLY: { - URValue[i].value.equally = CLValue[i + 1]; + URValue[i].value.equally = static_cast(CLValue[i + 1]); break; } case UR_DEVICE_PARTITION_BY_COUNTS: { - URValue[i].value.count = CLValue[i + 1]; + URValue[i].value.count = static_cast(CLValue[i + 1]); break; } case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: { - URValue[i].value.affinity_domain = CLValue[i + 1]; + URValue[i].value.affinity_domain = + static_cast(CLValue[i + 1]); break; } default: { @@ -825,12 +845,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: case UR_DEVICE_INFO_LOCAL_MEM_TYPE: case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: - case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: - case UR_DEVICE_INFO_USM_HOST_SUPPORT: - case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: - case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: - case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: - case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: { + case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: { /* CL type: cl_bitfield / enum * UR type: ur_flags_t (uint32_t) */ @@ -844,6 +859,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * types are uint32_t */ return ReturnValue(static_cast(CLValue)); } + case UR_DEVICE_INFO_USM_HOST_SUPPORT: + case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: + case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: { + /* CL type: cl_bitfield / enum + * UR type: ur_flags_t (uint32_t) */ + bool Supported = false; + UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_intel_unified_shared_memory"}, Supported)); + if (Supported) { + cl_bitfield CLValue = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_bitfield), &CLValue, nullptr)); + return ReturnValue(static_cast(CLValue)); + } else { + return ReturnValue(0); + } + } case UR_DEVICE_INFO_IMAGE_SUPPORTED: case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: @@ -918,8 +954,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_VERSION: case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: case UR_DEVICE_INFO_BUILT_IN_KERNELS: - case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: - case UR_DEVICE_INFO_IP_VERSION: { + case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: { /* We can just use the OpenCL outputs because the sizes of OpenCL types * are the same as UR. * | CL | UR | Size | @@ -937,7 +972,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; } + case UR_DEVICE_INFO_IP_VERSION: { + bool Supported; + UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_intel_device_attribute_query"}, Supported)); + if (!Supported) { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + propSize, pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; + } + case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { + bool isExtensionSupported; + if (cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_intel_required_subgroup_size"}, + isExtensionSupported) != UR_RESULT_SUCCESS || + !isExtensionSupported) { + std::vector aThreadIsItsOwnSubGroup({1}); + return ReturnValue(aThreadIsItsOwnSubGroup.data(), + aThreadIsItsOwnSubGroup.size()); + } + // Have to convert size_t to uint32_t size_t SubGroupSizesSize = 0; CL_RETURN_ON_FAILURE( diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 9735abefbf..ec1fe43423 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -302,7 +302,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( switch (propName) { case UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS: { - if (*(static_cast(pPropValue)) == true) { + if (*(static_cast(pPropValue))) { UR_RETURN_ON_FAILURE(usmSetIndirectAccess(hKernel)); } return UR_RESULT_SUCCESS; diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index 01f6a34325..0cb3777601 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -36,6 +36,13 @@ FetchContent_Declare(unified-memory-framework GIT_TAG ${UMF_TAG} ) +if (UR_STATIC_ADAPTER_L0) + if (UMF_BUILD_SHARED_LIBRARY) + message(STATUS "Static adapter is not compatible with shared UMF, switching to fully statically linked UMF") + set(UMF_BUILD_SHARED_LIBRARY OFF) + endif() +endif() + set(UMF_BUILD_TESTS OFF CACHE INTERNAL "Build UMF tests") set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "Build UMF examples") # TODO: L0 provider not ready yet @@ -85,9 +92,9 @@ install(TARGETS ur_common add_library(ur_umf INTERFACE) target_sources(ur_umf INTERFACE - umf_helpers.hpp - umf_pools/disjoint_pool_config_parser.cpp - ur_pool_manager.hpp + $ + $ + $ ) add_library(${PROJECT_NAME}::umf ALIAS ur_umf) diff --git a/source/common/latency_tracker.hpp b/source/common/latency_tracker.hpp index 03ed6b6443..bf20e3819f 100644 --- a/source/common/latency_tracker.hpp +++ b/source/common/latency_tracker.hpp @@ -42,6 +42,7 @@ static constexpr double percentiles[numPercentiles] = { 50.0, 90.0, 99.0, 99.9, 99.99, 99.999, 99.9999}; struct latencyValues { + int64_t count; int64_t min; int64_t max; int64_t mean; @@ -54,6 +55,7 @@ using histogram_ptr = static inline latencyValues getValues(const struct hdr_histogram *histogram) { latencyValues values; + values.count = histogram->total_count; values.max = hdr_max(histogram); values.min = hdr_min(histogram); values.mean = static_cast(hdr_mean(histogram)); @@ -92,13 +94,16 @@ class latency_printer { for (auto &[name, histogram] : values) { auto value = getValues(histogram.get()); - logger.log(logger::Level::INFO, - "{},{},{},{},{},{},{},{},{},{},{},{},ns", name, - value.min, value.max, value.mean, value.stddev, - value.percentileValues[0], value.percentileValues[1], - value.percentileValues[2], value.percentileValues[3], - value.percentileValues[4], value.percentileValues[5], - value.percentileValues[6]); + auto f = groupDigits; + logger.log( + logger::Level::INFO, + "{},{},{},{},{},{},{},{},{},{},{},{},{},{},ns", name, + f(value.mean), f(value.percentileValues[0]), + f(value.percentileValues[1]), f(value.percentileValues[2]), + f(value.percentileValues[3]), f(value.percentileValues[4]), + f(value.percentileValues[5]), f(value.percentileValues[6]), + f(value.count), f(value.count * value.mean), f(value.min), + f(value.max), value.stddev); } } @@ -106,7 +111,8 @@ class latency_printer { inline void printHeader() { logger.log(logger::Level::INFO, "Latency histogram:"); logger.log(logger::Level::INFO, - "name,min,max,mean,stdev,p{},p{},p{},p{},p{},p{},p{},unit", + "name,mean,p{},p{},p{},p{},p{},p{}" + ",p{},count,sum,min,max,stdev,unit", percentiles[0], percentiles[1], percentiles[2], percentiles[3], percentiles[4], percentiles[5], percentiles[6]); diff --git a/source/common/umf_helpers.hpp b/source/common/umf_helpers.hpp index e2e5b9a467..39f4a4b579 100644 --- a/source/common/umf_helpers.hpp +++ b/source/common/umf_helpers.hpp @@ -50,7 +50,7 @@ using provider_unique_handle_t = typename std::enable_if::value, umf_result_t>::type \ CALL_OP_##op(T *t, Args &&...args) { \ return t->op(std::forward(args)...); \ - }; \ + } \ \ static inline umf_result_t CALL_OP_##op(...) { \ return UMF_RESULT_ERROR_NOT_SUPPORTED; \ diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index 0475cf31e4..0ede3c93dc 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -60,12 +60,14 @@ int ur_duplicate_fd(int pid, int fd_in); /////////////////////////////////////////////////////////////////////////////// #if defined(_WIN32) #define MAKE_LIBRARY_NAME(NAME, VERSION) NAME ".dll" +#define STATIC_LIBRARY_EXTENSION ".lib" #else #if defined(__APPLE__) #define MAKE_LIBRARY_NAME(NAME, VERSION) "lib" NAME "." VERSION ".dylib" #else #define MAKE_LIBRARY_NAME(NAME, VERSION) "lib" NAME ".so." VERSION #endif +#define STATIC_LIBRARY_EXTENSION ".a" #endif inline std::string create_library_path(const char *name, const char *path) { @@ -478,6 +480,25 @@ template class AtomicSingleton { } }; +template +static inline std::string groupDigits(Numeric numeric) { + auto number = std::to_string(numeric); + std::string sign = numeric >= 0 ? "" : "-"; + auto digits = number.substr(sign.size(), number.size() - sign.size()); + + std::string separated; + + for (size_t i = 0; i < digits.size(); i++) { + separated.push_back(digits[i]); + + if (i != digits.size() - 1 && (digits.size() - i - 1) % 3 == 0) { + separated.push_back('\''); + } + } + + return sign + separated; +} + template Spinlock> AtomicSingleton::instance; #endif /* UR_UTIL_H */ diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index af05c81767..48329cfb37 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -47,6 +47,7 @@ add_library(${PROJECT_NAME}::loader ALIAS ur_loader) target_include_directories(ur_loader PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/layers ) @@ -60,6 +61,13 @@ target_link_libraries(ur_loader PRIVATE ${PROJECT_NAME}::headers ) +if(UR_STATIC_ADAPTER_L0) + target_link_libraries(ur_loader PRIVATE + ur_adapter_level_zero + ) + target_compile_definitions(ur_loader PRIVATE UR_STATIC_ADAPTER_LEVEL_ZERO) +endif() + if(UR_ENABLE_TRACING) target_link_libraries(ur_loader PRIVATE ${TARGET_XPTI}) target_include_directories(ur_loader PRIVATE ${xpti_SOURCE_DIR}/include) diff --git a/source/loader/layers/sanitizer/asan_options.hpp b/source/loader/layers/sanitizer/asan_options.hpp index 298639b73c..eb3f6bb03d 100644 --- a/source/loader/layers/sanitizer/asan_options.hpp +++ b/source/loader/layers/sanitizer/asan_options.hpp @@ -42,7 +42,17 @@ struct AsanOptions { private: AsanOptions(logger::Logger &logger) { - auto OptionsEnvMap = getenv_to_map("UR_LAYER_ASAN_OPTIONS"); + std::optional OptionsEnvMap; + try { + OptionsEnvMap = getenv_to_map("UR_LAYER_ASAN_OPTIONS"); + } catch (const std::invalid_argument &e) { + std::stringstream SS; + SS << "[ERROR]: "; + SS << e.what(); + logger.always(SS.str().c_str()); + die("Sanitizer failed to parse options.\n"); + } + if (!OptionsEnvMap.has_value()) { return; } @@ -74,8 +84,8 @@ struct AsanOptions { Opt = false; } else { std::stringstream SS; - SS << "[ERROR]: \"" << Name << "\" is set to \"" - << Value << "\", which is not an valid setting. "; + SS << "\"" << Name << "\" is set to \"" << Value + << "\", which is not an valid setting. "; SS << "Acceptable input are: for enable, use:"; for (auto &S : TrueStrings) { SS << " \"" << S << "\""; @@ -86,7 +96,8 @@ struct AsanOptions { SS << " \"" << S << "\""; } SS << "."; - die(SS.str().c_str()); + logger.error(SS.str().c_str()); + die("Sanitizer failed to parse options.\n"); } } }; @@ -106,9 +117,10 @@ struct AsanOptions { } MaxQuarantineSizeMB = temp_long; } catch (...) { - die("[ERROR]: \"quarantine_size_mb\" should be " - "an positive integer that smaller than or equal to " - "4294967295."); + logger.error("\"quarantine_size_mb\" should be " + "an integer in range[0, {}].", + UINT32_MAX); + die("Sanitizer failed to parse options.\n"); } } @@ -120,10 +132,12 @@ struct AsanOptions { if (MinRZSize < 16) { MinRZSize = 16; logger.warning("Trying to set redzone size to a " - "value less than 16 is ignored"); + "value less than 16 is ignored."); } } catch (...) { - die("[ERROR]: \"redzone\" should be an integer"); + logger.error( + "\"redzone\" should be an integer in range[0, 16]."); + die("Sanitizer failed to parse options.\n"); } } @@ -135,10 +149,12 @@ struct AsanOptions { if (MaxRZSize > 2048) { MaxRZSize = 2048; logger.warning("Trying to set max redzone size to a " - "value greater than 2048 is ignored"); + "value greater than 2048 is ignored."); } } catch (...) { - die("[ERROR]: \"max_redzone\" should be an integer"); + logger.error( + "\"max_redzone\" should be an integer in range[0, 2048]."); + die("Sanitizer failed to parse options.\n"); } } } diff --git a/source/loader/layers/tracing/ur_tracing_layer.cpp b/source/loader/layers/tracing/ur_tracing_layer.cpp index 722ee77faa..c6fd4ca40d 100644 --- a/source/loader/layers/tracing/ur_tracing_layer.cpp +++ b/source/loader/layers/tracing/ur_tracing_layer.cpp @@ -34,14 +34,15 @@ struct XptiContextManager { ~XptiContextManager() { xptiFrameworkFinalize(); } }; -static std::shared_ptr xptiContextManagerGlobal = [] { - return std::make_shared(); -}(); +static std::shared_ptr xptiContextManagerGet() { + static auto contextManager = std::make_shared(); + return contextManager; +}; static thread_local xpti_td *activeEvent; /////////////////////////////////////////////////////////////////////////////// context_t::context_t() : logger(logger::create_logger("tracing", true, true)) { - this->xptiContextManager = xptiContextManagerGlobal; + this->xptiContextManager = xptiContextManagerGet(); call_stream_id = xptiRegisterStream(CALL_STREAM_NAME); std::ostringstream streamv; diff --git a/source/loader/ur_adapter_registry.hpp b/source/loader/ur_adapter_registry.hpp index 25cd9a9fff..7df799ab1e 100644 --- a/source/loader/ur_adapter_registry.hpp +++ b/source/loader/ur_adapter_registry.hpp @@ -33,6 +33,14 @@ class AdapterRegistry { if (forceLoadedAdaptersOpt.has_value()) { for (const auto &s : forceLoadedAdaptersOpt.value()) { auto path = fs::path(s); + if (path.filename().extension() == STATIC_LIBRARY_EXTENSION) { + logger::warning( + "UR_ADAPTERS_FORCE_LOAD contains a path to a static" + "library {}, it will be skipped", + s); + continue; + } + bool exists = false; try { exists = fs::exists(path); @@ -41,11 +49,12 @@ class AdapterRegistry { } if (exists) { + forceLoaded = true; adaptersLoadPaths.emplace_back( std::vector{std::move(path)}); } else { logger::warning( - "Detected nonexistent path {} in environmental " + "Detected nonexistent path {} in environment " "variable UR_ADAPTERS_FORCE_LOAD", s); } @@ -92,6 +101,8 @@ class AdapterRegistry { size_t size() const noexcept { return adaptersLoadPaths.size(); } + bool adaptersForceLoaded() { return forceLoaded; } + std::vector>::const_iterator begin() const noexcept { return adaptersLoadPaths.begin(); } @@ -152,10 +163,123 @@ class AdapterRegistry { return paths.empty() ? std::nullopt : std::optional(paths); } + ur_result_t readPreFilterODS(std::string platformBackendName) { + // TODO: Refactor this to the common code such that both the prefilter and urDeviceGetSelected use the same functionality. + bool acceptLibrary = true; + std::optional odsEnvMap; + try { + odsEnvMap = getenv_to_map("ONEAPI_DEVICE_SELECTOR", false); + + } catch (...) { + // If the selector is malformed, then we ignore selector and return success. + logger::error("ERROR: missing backend, format of filter = " + "'[!]backend:filterStrings'"); + return UR_RESULT_SUCCESS; + } + logger::debug( + "getenv_to_map parsed env var and {} a map", + (odsEnvMap.has_value() ? "produced" : "failed to produce")); + + // if the ODS env var is not set at all, then pretend it was set to the default + using EnvVarMap = std::map>; + EnvVarMap mapODS = + odsEnvMap.has_value() ? odsEnvMap.value() : EnvVarMap{{"*", {"*"}}}; + for (auto &termPair : mapODS) { + std::string backend = termPair.first; + // TODO: Figure out how to process all ODS errors rather than returning + // on the first error. + if (backend.empty()) { + // FIXME: never true because getenv_to_map rejects this case + // malformed term: missing backend -- output ERROR, then continue + logger::error("ERROR: missing backend, format of filter = " + "'[!]backend:filterStrings'"); + continue; + } + logger::debug("ONEAPI_DEVICE_SELECTOR Pre-Filter with backend '{}' " + "and platform library name '{}'", + backend, platformBackendName); + enum FilterType { + AcceptFilter, + DiscardFilter, + } termType = + (backend.front() != '!') ? AcceptFilter : DiscardFilter; + logger::debug( + "termType is {}", + (termType != AcceptFilter ? "DiscardFilter" : "AcceptFilter")); + if (termType != AcceptFilter) { + logger::debug("DEBUG: backend was '{}'", backend); + backend.erase(backend.cbegin()); + logger::debug("DEBUG: backend now '{}'", backend); + } + + // Verify that the backend string is valid, otherwise ignore the backend. + if ((strcmp(backend.c_str(), "*") != 0) && + (strcmp(backend.c_str(), "level_zero") != 0) && + (strcmp(backend.c_str(), "opencl") != 0) && + (strcmp(backend.c_str(), "cuda") != 0) && + (strcmp(backend.c_str(), "hip") != 0)) { + logger::debug("ONEAPI_DEVICE_SELECTOR Pre-Filter with illegal " + "backend '{}' ", + backend); + continue; + } + + // case-insensitive comparison by converting both tolower + std::transform(platformBackendName.begin(), + platformBackendName.end(), + platformBackendName.begin(), + [](unsigned char c) { return std::tolower(c); }); + std::transform(backend.begin(), backend.end(), backend.begin(), + [](unsigned char c) { return std::tolower(c); }); + std::size_t nameFound = platformBackendName.find(backend); + + bool backendFound = nameFound != std::string::npos; + if (termType == AcceptFilter) { + if (backend.front() != '*' && !backendFound) { + logger::debug( + "The ONEAPI_DEVICE_SELECTOR backend name '{}' was not " + "found in the platform library name '{}'", + backend, platformBackendName); + acceptLibrary = false; + continue; + } else if (backend.front() == '*' || backendFound) { + return UR_RESULT_SUCCESS; + } + } else { + if (backendFound || backend.front() == '*') { + acceptLibrary = false; + logger::debug( + "The ONEAPI_DEVICE_SELECTOR backend name for discard " + "'{}' was found in the platform library name '{}'", + backend, platformBackendName); + continue; + } + } + } + if (acceptLibrary) { + return UR_RESULT_SUCCESS; + } + return UR_RESULT_ERROR_INVALID_VALUE; + } + void discoverKnownAdapters() { auto searchPathsEnvOpt = getEnvAdapterSearchPaths(); auto loaderLibPathOpt = getLoaderLibPath(); +#if defined(_WIN32) + bool loaderPreFilter = getenv_tobool("UR_LOADER_PRELOAD_FILTER", false); +#else + bool loaderPreFilter = getenv_tobool("UR_LOADER_PRELOAD_FILTER", true); +#endif for (const auto &adapterName : knownAdapterNames) { + + if (loaderPreFilter) { + if (readPreFilterODS(adapterName) != UR_RESULT_SUCCESS) { + logger::debug("The adapter '{}' was removed based on the " + "pre-filter from ONEAPI_DEVICE_SELECTOR.", + adapterName); + continue; + } + } std::vector loadPaths; // Adapter search order: @@ -183,6 +307,8 @@ class AdapterRegistry { } } + bool forceLoaded = false; + public: void enableMock() { adaptersLoadPaths.clear(); diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 56e16b769d..26f55c071f 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -8631,6 +8631,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8689,6 +8694,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8781,6 +8791,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8875,6 +8890,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8936,6 +8956,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9019,6 +9044,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9081,6 +9111,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9142,6 +9177,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9213,6 +9253,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9269,6 +9314,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetMemProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9333,6 +9383,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9390,6 +9445,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9451,6 +9511,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9522,6 +9587,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9579,6 +9649,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9640,6 +9715,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9699,6 +9779,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9760,6 +9845,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9815,6 +9905,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9875,6 +9970,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9937,6 +10037,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } diff --git a/source/loader/ur_lib.cpp b/source/loader/ur_lib.cpp index 9aad7159c3..7f2d1baa13 100644 --- a/source/loader/ur_lib.cpp +++ b/source/loader/ur_lib.cpp @@ -560,19 +560,20 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, const auto thirdDeviceId = getDeviceId(thirdPart); deviceList.push_back(DeviceSpec{ DevicePartLevel::SUBSUB, hardwareType, firstDeviceId, - secondDeviceId, thirdDeviceId}); + secondDeviceId, thirdDeviceId, nullptr}); } else { // second dot not found, this is a subdevice - deviceList.push_back(DeviceSpec{DevicePartLevel::SUB, - hardwareType, firstDeviceId, - secondDeviceId}); + deviceList.push_back( + DeviceSpec{DevicePartLevel::SUB, hardwareType, + firstDeviceId, secondDeviceId, 0, nullptr}); } } else { // first dot not found, this is a root device const auto hardwareType = getRootHardwareType(filterString); const auto firstDeviceId = getDeviceId(filterString); deviceList.push_back(DeviceSpec{DevicePartLevel::ROOT, - hardwareType, firstDeviceId}); + hardwareType, firstDeviceId, 0, + 0, nullptr}); } } } @@ -587,8 +588,9 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, // for example, we pretend that "garbage:0;!cuda:*" was just "!cuda:*" // so we add an implicit accept-all term (equivalent to prepending "*:*;") // as we would have done if the user had given us the corrected string - acceptDeviceList.push_back(DeviceSpec{ - DevicePartLevel::ROOT, ::UR_DEVICE_TYPE_ALL, DeviceIdTypeALL}); + acceptDeviceList.push_back(DeviceSpec{DevicePartLevel::ROOT, + ::UR_DEVICE_TYPE_ALL, + DeviceIdTypeALL, 0, 0, nullptr}); } logger::debug("DEBUG: size of acceptDeviceList = {}", diff --git a/source/loader/ur_loader.cpp b/source/loader/ur_loader.cpp index bfc9da3e50..e5a2bdb34e 100644 --- a/source/loader/ur_loader.cpp +++ b/source/loader/ur_loader.cpp @@ -8,13 +8,36 @@ * */ #include "ur_loader.hpp" +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +#include "adapters/level_zero/ur_interface_loader.hpp" +#endif namespace ur_loader { /////////////////////////////////////////////////////////////////////////////// context_t *getContext() { return context_t::get_direct(); } -/////////////////////////////////////////////////////////////////////////////// ur_result_t context_t::init() { +#ifdef _WIN32 + // Suppress system errors. + // Tells the system to not display the critical-error-handler message box. + // Instead, the system sends the error to the calling process. + // This is crucial for graceful handling of adapters that couldn't be + // loaded, e.g. due to missing native run-times. + // TODO: add reporting in case of an error. + // NOTE: we restore the old mode to not affect user app behavior. + // See https://github.com/intel/llvm/blob/sycl/sycl/ur_win_proxy_loader/ur_win_proxy_loader.cpp (preloadLibraries()) + UINT SavedMode = SetErrorMode(SEM_FAILCRITICALERRORS); +#endif + +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO + // If the adapters were force loaded, it means the user wants to use + // a specific adapter library. Don't load any static adapters. + if (!adapter_registry.adaptersForceLoaded()) { + auto &level_zero = platforms.emplace_back(nullptr); + ur::level_zero::urAdapterGetDdiTables(&level_zero.dditable.ur); + } +#endif + for (const auto &adapterPaths : adapter_registry) { for (const auto &path : adapterPaths) { auto handle = LibLoader::loadAdapterLibrary(path.string().c_str()); @@ -24,6 +47,10 @@ ur_result_t context_t::init() { } } } +#ifdef _WIN32 + // Restore system error handling. + (void)SetErrorMode(SavedMode); +#endif forceIntercept = getenv_tobool("UR_ENABLE_LOADER_INTERCEPT"); diff --git a/test/adapters/level_zero/CMakeLists.txt b/test/adapters/level_zero/CMakeLists.txt index f372dc655d..b1c34b8916 100644 --- a/test/adapters/level_zero/CMakeLists.txt +++ b/test/adapters/level_zero/CMakeLists.txt @@ -40,11 +40,12 @@ if(UR_BUILD_ADAPTER_L0) generate_device_binaries kernel_names_header) endif() - if(NOT WIN32) + if(NOT WIN32 AND NOT UR_STATIC_ADAPTER_L0) # Make L0 use CallMap from a seprate shared lib so that we can access the map # from the tests. This only seems to work on linux add_library(zeCallMap SHARED zeCallMap.cpp) target_compile_definitions(ur_adapter_level_zero PRIVATE UR_L0_CALL_COUNT_IN_TESTS) + # TODO: stop exporting internals like this for tests... target_link_libraries(ur_adapter_level_zero PRIVATE zeCallMap) add_adapter_test(level_zero_ze_calls diff --git a/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp b/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp index 6ee49dbbfb..b3918c7818 100644 --- a/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp +++ b/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp @@ -24,7 +24,8 @@ TEST_P(urLevelZeroKernelNativeHandleTest, OwnedHandleRelease) { auto kernel_name = uur::KernelsEnvironment::instance->GetEntryPointNames("foo")[0]; - ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; + ze_module_desc_t moduleDesc{}; + moduleDesc.stype = ZE_STRUCTURE_TYPE_MODULE_DESC; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.inputSize = il_binary->size(); moduleDesc.pInputModule = @@ -36,7 +37,8 @@ TEST_P(urLevelZeroKernelNativeHandleTest, OwnedHandleRelease) { &module, NULL), ZE_RESULT_SUCCESS); - ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; + ze_kernel_desc_t kernelDesc{}; + kernelDesc.stype = ZE_STRUCTURE_TYPE_KERNEL_DESC; kernelDesc.pKernelName = kernel_name.c_str(); ze_kernel_handle_t native_kernel; @@ -75,7 +77,8 @@ TEST_P(urLevelZeroKernelNativeHandleTest, NullProgram) { auto kernel_name = uur::KernelsEnvironment::instance->GetEntryPointNames("foo")[0]; - ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; + ze_module_desc_t moduleDesc{}; + moduleDesc.stype = ZE_STRUCTURE_TYPE_MODULE_DESC; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.inputSize = il_binary->size(); moduleDesc.pInputModule = @@ -87,7 +90,8 @@ TEST_P(urLevelZeroKernelNativeHandleTest, NullProgram) { &module, NULL), ZE_RESULT_SUCCESS); - ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; + ze_kernel_desc_t kernelDesc{}; + kernelDesc.stype = ZE_STRUCTURE_TYPE_KERNEL_DESC; kernelDesc.pKernelName = kernel_name.c_str(); ze_kernel_handle_t native_kernel; diff --git a/test/adapters/level_zero/v2/CMakeLists.txt b/test/adapters/level_zero/v2/CMakeLists.txt index 95f1f40902..f1c88a35ee 100644 --- a/test/adapters/level_zero/v2/CMakeLists.txt +++ b/test/adapters/level_zero/v2/CMakeLists.txt @@ -35,6 +35,10 @@ add_unittest(level_zero_command_list_cache add_unittest(level_zero_event_pool event_pool_test.cpp + ${PROJECT_SOURCE_DIR}/source/ur/ur.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/adapter.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/device.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/platform.cpp ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_pool.cpp ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_pool_cache.cpp ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_provider_normal.cpp diff --git a/test/adapters/level_zero/v2/command_list_cache_test.cpp b/test/adapters/level_zero/v2/command_list_cache_test.cpp index 74bcbf4634..44755b699e 100644 --- a/test/adapters/level_zero/v2/command_list_cache_test.cpp +++ b/test/adapters/level_zero/v2/command_list_cache_test.cpp @@ -23,7 +23,7 @@ struct CommandListCacheTest : public uur::urContextTest {}; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(CommandListCacheTest); TEST_P(CommandListCacheTest, CanStoreAndRetriveImmediateAndRegularCmdLists) { - v2::command_list_cache_t cache(context->hContext); + v2::command_list_cache_t cache(context->getZeHandle()); bool IsInOrder = false; uint32_t Ordinal = 0; @@ -75,7 +75,7 @@ TEST_P(CommandListCacheTest, CanStoreAndRetriveImmediateAndRegularCmdLists) { } TEST_P(CommandListCacheTest, ImmediateCommandListsHaveProperAttributes) { - v2::command_list_cache_t cache(context->hContext); + v2::command_list_cache_t cache(context->getZeHandle()); uint32_t numQueueGroups = 0; ASSERT_EQ(zeDeviceGetCommandQueueGroupProperties(device->ZeDevice, diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt index ff22f32c07..b63c838ac8 100644 --- a/test/conformance/CMakeLists.txt +++ b/test/conformance/CMakeLists.txt @@ -122,6 +122,19 @@ add_subdirectory(queue) add_subdirectory(sampler) add_subdirectory(virtual_memory) +set(TEST_SUBDIRECTORIES_DPCXX + "device_code" + "kernel" + "program" + "enqueue" + "integration" + "exp_command_buffer" + "exp_enqueue_native" + "exp_usm_p2p" + "exp_launch_properties" + "memory-migrate" +) + if(UR_DPCXX) add_custom_target(generate_device_binaries) @@ -143,18 +156,20 @@ if(UR_DPCXX) string(REPLACE "," ";" TARGET_TRIPLES ${UR_CONFORMANCE_TARGET_TRIPLES}) endif() - add_subdirectory(device_code) - add_subdirectory(kernel) - add_subdirectory(program) - add_subdirectory(enqueue) - add_subdirectory(integration) - add_subdirectory(exp_command_buffer) - add_subdirectory(exp_enqueue_native) - add_subdirectory(exp_usm_p2p) - add_subdirectory(exp_launch_properties) - add_subdirectory(memory-migrate) + foreach(dir ${TEST_SUBDIRECTORIES_DPCXX}) + add_subdirectory(${dir}) + endforeach() else() + set(DISABLED_TESTS "") + foreach(dir ${TEST_SUBDIRECTORIES_DPCXX}) + if(NOT dir STREQUAL "device_code") + list(APPEND DISABLED_TESTS "test-${dir}") + endif() + endforeach() + + string(REPLACE ";" ", " DISABLED_TESTS_STR "${DISABLED_TESTS}") + message(WARNING "UR_DPCXX is not defined, the following conformance test executables \ - are disabled: test-program, test-kernel, test-enqueue") + are disabled: ${DISABLED_TESTS_STR}") endif() diff --git a/test/conformance/context/context_adapter_level_zero_v2.match b/test/conformance/context/context_adapter_level_zero_v2.match index fc2d1b8324..3dea8da6e5 100644 --- a/test/conformance/context/context_adapter_level_zero_v2.match +++ b/test/conformance/context/context_adapter_level_zero_v2.match @@ -1,5 +1,3 @@ urContextCreateWithNativeHandleTest.InvalidNullHandleAdapter/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ urContextCreateWithNativeHandleTest.InvalidNullPointerContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urContextGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT -urContextGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_CONTEXT_INFO_USM_FILL2D_SUPPORT urContextSetExtendedDeleterTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ diff --git a/test/conformance/context/urContextCreate.cpp b/test/conformance/context/urContextCreate.cpp index d5fb59389f..0f268a3992 100644 --- a/test/conformance/context/urContextCreate.cpp +++ b/test/conformance/context/urContextCreate.cpp @@ -17,7 +17,8 @@ TEST_P(urContextCreateTest, Success) { } TEST_P(urContextCreateTest, SuccessWithProperties) { - ur_context_properties_t properties{UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES}; + ur_context_properties_t properties{UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES, + nullptr, 0}; uur::raii::Context context = nullptr; ASSERT_SUCCESS(urContextCreate(1, &device, &properties, context.ptr())); ASSERT_NE(nullptr, context); diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index af0bc83d8a..5445531961 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -144,6 +144,7 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_usm.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/foo.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/image_copy.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/inc.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/increment.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/mean.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/cpy_and_mult.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/cpy_and_mult_usm.cpp) diff --git a/test/conformance/device_code/increment.cpp b/test/conformance/device_code/increment.cpp new file mode 100644 index 0000000000..14094c4963 --- /dev/null +++ b/test/conformance/device_code/increment.cpp @@ -0,0 +1,20 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + + const size_t inputSize = 1; + sycl::queue sycl_queue; + uint32_t *inputArray = sycl::malloc_shared(inputSize, sycl_queue); + + sycl_queue.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<1>(inputSize), + [=](sycl::id<1> itemID) { inputArray[itemID] += 1; }); + }); + return 0; +} diff --git a/test/conformance/device_code/indexers_usm.cpp b/test/conformance/device_code/indexers_usm.cpp index e055fa47cc..cd3b56bf0c 100644 --- a/test/conformance/device_code/indexers_usm.cpp +++ b/test/conformance/device_code/indexers_usm.cpp @@ -3,6 +3,9 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Offsets are deprecated, but we should still test that they work +#pragma clang diagnostic ignored "-Wdeprecated-declarations" + #include int main() { diff --git a/test/conformance/device_code/linker_error.cpp b/test/conformance/device_code/linker_error.cpp index 5fc7eebf6f..8afa369bb2 100644 --- a/test/conformance/device_code/linker_error.cpp +++ b/test/conformance/device_code/linker_error.cpp @@ -3,17 +3,17 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include +#include SYCL_EXTERNAL void this_function_does_not_exist(); int main() { - cl::sycl::queue deviceQueue; - cl::sycl::range<1> numOfItems{1}; + sycl::queue deviceQueue; + sycl::range<1> numOfItems{1}; try { - deviceQueue.submit([&](cl::sycl::handler &cgh) { - auto kern = [=](cl::sycl::id<1>) { + deviceQueue.submit([&](sycl::handler &cgh) { + auto kern = [=](sycl::id<1>) { #ifdef __SYCL_DEVICE_ONLY__ this_function_does_not_exist(); #endif diff --git a/test/conformance/enqueue/CMakeLists.txt b/test/conformance/enqueue/CMakeLists.txt index 7cc68203a0..1e19658dac 100644 --- a/test/conformance/enqueue/CMakeLists.txt +++ b/test/conformance/enqueue/CMakeLists.txt @@ -9,6 +9,7 @@ add_conformance_test_with_kernels_environment(enqueue urEnqueueEventsWait.cpp urEnqueueEventsWaitWithBarrier.cpp urEnqueueKernelLaunch.cpp + urEnqueueKernelLaunchAndMemcpyInOrder.cpp urEnqueueMemBufferCopyRect.cpp urEnqueueMemBufferCopy.cpp urEnqueueMemBufferFill.cpp diff --git a/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match b/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match index bf20fe2e0f..e48c5175b4 100644 --- a/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match +++ b/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match @@ -1,37 +1,14 @@ urEnqueueDeviceGetGlobalVariableReadTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueEventsWaitTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueEventsWaitTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueEventsWaitTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueEventsWaitWithBarrierTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueEventsWaitWithBarrierTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueEventsWaitWithBarrierTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueKernelLaunchTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueKernelLaunchTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueKernelLaunchTest.InvalidWorkGroupSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueKernelLaunchTest.InvalidKernelArgs/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueKernelLaunchKernelWgSizeTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.SuccessWithExplicitLocalSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueKernelLaunchKernelSubGroupTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueKernelLaunchKernelStandardTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D_1 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D_31 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D_1027 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D_32 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D_256 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D_1_1 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D_31_7 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D_1027_1 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D_1_32 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D_256_79 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D_1_1_1 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D_31_7_1 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D_1027_1_19 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D_1_53_19 -urEnqueueKernelLaunchTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D_256_79_8 urEnqueueKernelLaunchWithVirtualMemory.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueKernelLaunchMultiDeviceTest.KernelLaunchReadDifferentQueues/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueKernelLaunchUSMLinkedList.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled urEnqueueKernelLaunchUSMLinkedList.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled +{{OPT}}urEnqueueKernelLaunchIncrementTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UseEventsEnabled +{{OPT}}urEnqueueKernelLaunchIncrementTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UseEventsDisabled +{{OPT}}urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest.Success/UseEventsNoQueuePerThread +{{OPT}}urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest.Success/NoUseEventsNoQueuePerThread urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_whole_buffer_2D urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_non_zero_offsets_2D urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_different_buffer_sizes_2D @@ -41,61 +18,7 @@ urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_ urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_3d_with_offsets urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_2d_3d urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_3d_2d -urEnqueueMemBufferCopyRectTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferCopyRectTest.InvalidNullHandleBufferSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferCopyRectTest.InvalidNullHandleBufferDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferCopyRectTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueMemBufferCopyRectTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferCopyTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urEnqueueMemBufferCopyTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500 -urEnqueueMemBufferCopyTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096 -urEnqueueMemBufferCopyTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000 -urEnqueueMemBufferCopyTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urEnqueueMemBufferCopyTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500 -urEnqueueMemBufferCopyTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096 -urEnqueueMemBufferCopyTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000 -urEnqueueMemBufferCopyTestWithParam.InvalidNullHandleBufferSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urEnqueueMemBufferCopyTestWithParam.InvalidNullHandleBufferSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500 -urEnqueueMemBufferCopyTestWithParam.InvalidNullHandleBufferSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096 -urEnqueueMemBufferCopyTestWithParam.InvalidNullHandleBufferSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000 -urEnqueueMemBufferCopyTestWithParam.InvalidNullHandleBufferDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urEnqueueMemBufferCopyTestWithParam.InvalidNullHandleBufferDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500 -urEnqueueMemBufferCopyTestWithParam.InvalidNullHandleBufferDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096 -urEnqueueMemBufferCopyTestWithParam.InvalidNullHandleBufferDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000 -urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500 -urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096 -urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000 -urEnqueueMemBufferCopyTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urEnqueueMemBufferCopyTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500 -urEnqueueMemBufferCopyTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096 -urEnqueueMemBufferCopyTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000 -urEnqueueMemBufferFillTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1__patternSize__1 -urEnqueueMemBufferFillTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__256 -urEnqueueMemBufferFillTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1024__patternSize__256 -urEnqueueMemBufferFillTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__4 -urEnqueueMemBufferFillTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 -urEnqueueMemBufferFillTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 -urEnqueueMemBufferFillTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 -urEnqueueMemBufferFillTest.SuccessPartialFill/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1__patternSize__1 -urEnqueueMemBufferFillTest.SuccessPartialFill/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__256 -urEnqueueMemBufferFillTest.SuccessPartialFill/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1024__patternSize__256 -urEnqueueMemBufferFillTest.SuccessPartialFill/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__4 -urEnqueueMemBufferFillTest.SuccessPartialFill/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 -urEnqueueMemBufferFillTest.SuccessPartialFill/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 -urEnqueueMemBufferFillTest.SuccessPartialFill/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 -urEnqueueMemBufferFillTest.SuccessOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1__patternSize__1 -urEnqueueMemBufferFillTest.SuccessOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__256 -urEnqueueMemBufferFillTest.SuccessOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1024__patternSize__256 -urEnqueueMemBufferFillTest.SuccessOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__4 -urEnqueueMemBufferFillTest.SuccessOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 -urEnqueueMemBufferFillTest.SuccessOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 -urEnqueueMemBufferFillTest.SuccessOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 -urEnqueueMemBufferFillNegativeTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferFillNegativeTest.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferFillNegativeTest.InvalidNullHandlePointerPattern/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferFillNegativeTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferFillNegativeTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueMemBufferMapTestWithParam.SuccessRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE urEnqueueMemBufferMapTestWithParam.SuccessRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE urEnqueueMemBufferMapTestWithParam.SuccessRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE @@ -192,86 +115,6 @@ urEnqueueMemBufferMapTestWithParam.SuccessMultiMaps/Intel_R__oneAPI_Unified_Runt urEnqueueMemBufferMapTestWithParam.SuccessMultiMaps/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER urEnqueueMemBufferMapTestWithParam.SuccessMultiMaps/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER urEnqueueMemBufferMapTestWithParam.SuccessMultiMaps/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidEnumerationMapFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullPointerRetMap/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER urEnqueueMemBufferMapTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE urEnqueueMemBufferMapTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE urEnqueueMemBufferMapTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE @@ -290,134 +133,6 @@ urEnqueueMemBufferMapTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_o urEnqueueMemBufferMapTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER urEnqueueMemBufferMapTestWithWriteFlagParam.SuccessWrite/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MAP_FLAG_WRITE urEnqueueMemBufferMapTestWithWriteFlagParam.SuccessWrite/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MAP_FLAG_WRITE_INVALIDATE_REGION -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferReadTestWithParam.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_whole_buffer_2D urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_non_zero_offsets_2D urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_different_buffer_sizes_2D @@ -427,123 +142,7 @@ urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_ urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_with_offsets urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_2d_3d urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_2d -urEnqueueMemBufferReadRectTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferReadRectTest.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferReadRectTest.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferReadRectTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueMemBufferReadRectTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.SuccessWriteRead/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_WRITE -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_WRITE_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_READ_ONLY -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueMemBufferWriteTestWithParam.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_whole_buffer_2D urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_non_zero_offsets_2D urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_different_buffer_sizes_2D @@ -553,10 +152,6 @@ urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_with_offsets urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_2d_3d urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_2d -urEnqueueMemBufferWriteRectTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferWriteRectTest.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferWriteRectTest.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueMemBufferWriteRectTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueMemBufferWriteRectTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueMemImageCopyTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D urEnqueueMemImageCopyTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D @@ -691,159 +286,11 @@ urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unifi urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2500_UR_MEM_FLAG_ALLOC_HOST_POINTER urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4096_UR_MEM_FLAG_ALLOC_HOST_POINTER urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -urEnqueueUSMFillTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1__patternSize__1 -urEnqueueUSMFillTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__256 -urEnqueueUSMFillTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1024__patternSize__256 -urEnqueueUSMFillTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__4 -urEnqueueUSMFillTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 -urEnqueueUSMFillTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 -urEnqueueUSMFillTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 -urEnqueueUSMFillNegativeTest.InvalidEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMFill2DTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__patternSize__1 -urEnqueueUSMFill2DTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__patternSize__256 -urEnqueueUSMFill2DTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__patternSize__4 -urEnqueueUSMFill2DTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__57__height__1__patternSize__1 -urEnqueueUSMFill2DTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__patternSize__256 -urEnqueueUSMFill2DTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__patternSize__1024 -urEnqueueUSMFill2DTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__patternSize__1 -urEnqueueUSMFill2DTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__patternSize__256 -urEnqueueUSMFill2DTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__patternSize__65536 -urEnqueueUSMFill2DTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__patternSize__1 -urEnqueueUSMFill2DTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__35__patternSize__1 -urEnqueueUSMFill2DTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__35__patternSize__128 -urEnqueueUSMFill2DNegativeTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueUSMAdviseWithParamTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ADVICE_FLAG_DEFAULT urEnqueueUSMAdviseTest.MultipleParamsSuccess/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMAdviseTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMAdviseTest.InvalidNullPointerMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMAdviseTest.InvalidEnumeration/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMAdviseTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMAdviseTest.InvalidSizeTooLarge/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueUSMAdviseTest.NonCoherentDeviceMemorySuccessOrWarning/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMMemcpyTest.Blocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMMemcpyTest.BlockingWithEvent/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMMemcpyTest.NonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMMemcpyTest.WaitForDependencies/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -urEnqueueUSMMemcpy2DNegativeTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DNegativeTest.InvalidNullPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE urEnqueueUSMPrefetchWithParamTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT -urEnqueueUSMPrefetchTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMPrefetchTest.InvalidNullPointerMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMPrefetchTest.InvalidEnumeration/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMPrefetchTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMPrefetchTest.InvalidEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueReadHostPipeTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueReadHostPipeTest.InvalidNullHandleProgram/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueReadHostPipeTest.InvalidNullPointerPipeSymbol/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ @@ -856,4 +303,3 @@ urEnqueueWriteHostPipeTest.InvalidNullPointerBuffer/Intel_R__oneAPI_Unified_Runt urEnqueueWriteHostPipeTest.InvalidEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueTimestampRecordingExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEnqueueTimestampRecordingExpTest.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueTimestampRecordingExpTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ diff --git a/test/conformance/enqueue/enqueue_adapter_native_cpu.match b/test/conformance/enqueue/enqueue_adapter_native_cpu.match index 83e9f2391e..fc3cf2d975 100644 --- a/test/conformance/enqueue/enqueue_adapter_native_cpu.match +++ b/test/conformance/enqueue/enqueue_adapter_native_cpu.match @@ -45,6 +45,7 @@ {{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D_1_53_19 {{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D_256_79_8 {{OPT}}urEnqueueKernelLaunchWithVirtualMemory.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urEnqueueKernelLaunchWithUSM.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}urEnqueueKernelLaunchMultiDeviceTest.KernelLaunchReadDifferentQueues/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled {{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index dded6a67e4..c34240c057 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -3,6 +3,7 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include #include struct urEnqueueKernelLaunchTest : uur::urKernelExecutionTest { diff --git a/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp b/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp new file mode 100644 index 0000000000..2aee087c73 --- /dev/null +++ b/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp @@ -0,0 +1,392 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +#include +#include + +// There was a bug in previous L0 drivers that caused the test to fail +std::tuple minL0DriverVersion = {1, 3, 29534}; + +template +struct urMultiQueueLaunchMemcpyTest : uur::urMultiDeviceContextTestTemplate<1>, + testing::WithParamInterface { + std::string KernelName; + std::vector programs; + std::vector kernels; + std::vector SharedMem; + + std::vector queues; + std::vector devices; + + std::function createQueues; + + static constexpr char ProgramName[] = "increment"; + static constexpr size_t ArraySize = 100; + static constexpr size_t InitialValue = 1; + + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE( + uur::urMultiDeviceContextTestTemplate<1>::SetUp()); + + createQueues(); + + for (auto &device : devices) { + SKIP_IF_DRIVER_TOO_OLD("Level-Zero", minL0DriverVersion, platform, + device); + } + + programs.resize(devices.size()); + kernels.resize(devices.size()); + SharedMem.resize(devices.size()); + + KernelName = uur::KernelsEnvironment::instance->GetEntryPointNames( + ProgramName)[0]; + + std::shared_ptr> il_binary; + std::vector metadatas{}; + + uur::KernelsEnvironment::instance->LoadSource(ProgramName, il_binary); + + for (size_t i = 0; i < devices.size(); i++) { + const ur_program_properties_t properties = { + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES, nullptr, + static_cast(metadatas.size()), + metadatas.empty() ? nullptr : metadatas.data()}; + + uur::raii::Program program; + ASSERT_SUCCESS(uur::KernelsEnvironment::instance->CreateProgram( + platform, context, devices[i], *il_binary, &properties, + &programs[i])); + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urProgramBuild(context, programs[i], nullptr)); + ASSERT_SUCCESS( + urKernelCreate(programs[i], KernelName.data(), &kernels[i])); + + ASSERT_SUCCESS( + urUSMSharedAlloc(context, devices[i], nullptr, nullptr, + ArraySize * sizeof(uint32_t), &SharedMem[i])); + + ASSERT_SUCCESS(urEnqueueUSMFill(queues[i], SharedMem[i], + sizeof(uint32_t), &InitialValue, + ArraySize * sizeof(uint32_t), 0, + nullptr, nullptr /* &Event */)); + ASSERT_SUCCESS(urQueueFinish(queues[i])); + + ASSERT_SUCCESS( + urKernelSetArgPointer(kernels[i], 0, nullptr, SharedMem[i])); + } + } + + void TearDown() override { + for (auto &Ptr : SharedMem) { + urUSMFree(context, Ptr); + } + for (const auto &queue : queues) { + EXPECT_SUCCESS(urQueueRelease(queue)); + } + for (const auto &kernel : kernels) { + urKernelRelease(kernel); + } + for (const auto &program : programs) { + urProgramRelease(program); + } + UUR_RETURN_ON_FATAL_FAILURE( + uur::urMultiDeviceContextTestTemplate<1>::TearDown()); + } +}; + +template +struct urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam + : public urMultiQueueLaunchMemcpyTest { + static constexpr size_t duplicateDevices = 8; + + using urMultiQueueLaunchMemcpyTest::context; + using urMultiQueueLaunchMemcpyTest::queues; + using urMultiQueueLaunchMemcpyTest::devices; + using urMultiQueueLaunchMemcpyTest::kernels; + using urMultiQueueLaunchMemcpyTest::SharedMem; + + void SetUp() override { + this->createQueues = [&] { + for (size_t i = 0; i < duplicateDevices; i++) { + devices.insert( + devices.end(), + uur::KernelsEnvironment::instance->devices.begin(), + uur::KernelsEnvironment::instance->devices.end()); + } + + for (auto &device : devices) { + ur_queue_handle_t queue = nullptr; + ASSERT_SUCCESS(urQueueCreate(context, device, 0, &queue)); + queues.push_back(queue); + } + }; + + UUR_RETURN_ON_FATAL_FAILURE( + urMultiQueueLaunchMemcpyTest::SetUp()); + } + + void TearDown() override { + UUR_RETURN_ON_FATAL_FAILURE( + urMultiQueueLaunchMemcpyTest::TearDown()); + } +}; + +struct urEnqueueKernelLaunchIncrementTest + : urMultiQueueLaunchMemcpyTest< + std::tuple> { + static constexpr size_t numOps = 50; + + ur_queue_handle_t queue; + + using Param = std::tuple; + using urMultiQueueLaunchMemcpyTest::context; + using urMultiQueueLaunchMemcpyTest::queues; + using urMultiQueueLaunchMemcpyTest::devices; + using urMultiQueueLaunchMemcpyTest::kernels; + using urMultiQueueLaunchMemcpyTest::SharedMem; + + void SetUp() override { + auto device = std::get<0>(GetParam()); + + this->createQueues = [&] { + ASSERT_SUCCESS(urQueueCreate(context, device, 0, &queue)); + + // use the same queue and device for all operations + for (size_t i = 0; i < numOps; i++) { + urQueueRetain(queue); + + queues.push_back(queue); + devices.push_back(device); + } + }; + + UUR_RETURN_ON_FATAL_FAILURE( + urMultiQueueLaunchMemcpyTest::SetUp()); + } + + void TearDown() override { + urQueueRelease(queue); + UUR_RETURN_ON_FATAL_FAILURE( + urMultiQueueLaunchMemcpyTest::TearDown()); + } +}; + +UUR_TEST_SUITE_P( + urEnqueueKernelLaunchIncrementTest, + testing::ValuesIn(uur::BoolTestParam::makeBoolParam("UseEvents")), + uur::deviceTestWithParamPrinter); + +TEST_P(urEnqueueKernelLaunchIncrementTest, Success) { + constexpr size_t global_offset = 0; + constexpr size_t n_dimensions = 1; + + auto useEvents = std::get<1>(GetParam()).value; + + std::vector Events(numOps * 2); + for (size_t i = 0; i < numOps; i++) { + size_t waitNum = 0; + ur_event_handle_t *lastEvent = nullptr; + ur_event_handle_t *kernelEvent = nullptr; + ur_event_handle_t *memcpyEvent = nullptr; + + if (useEvents) { + // Events are: kernelEvent0, memcpyEvent0, kernelEvent1, ... + waitNum = i > 0 ? 1 : 0; + lastEvent = i > 0 ? Events[i * 2 - 1].ptr() : nullptr; + + kernelEvent = Events[i * 2].ptr(); + memcpyEvent = Events[i * 2 + 1].ptr(); + } + + // execute kernel that increments each element by 1 + ASSERT_SUCCESS(urEnqueueKernelLaunch( + queue, kernels[i], n_dimensions, &global_offset, &ArraySize, + nullptr, waitNum, lastEvent, kernelEvent)); + + // copy the memory (input for the next kernel) + if (i < numOps - 1) { + ASSERT_SUCCESS( + urEnqueueUSMMemcpy(queue, false, SharedMem[i + 1], SharedMem[i], + ArraySize * sizeof(uint32_t), useEvents, + kernelEvent, memcpyEvent)); + } + } + + if (useEvents) { + // TODO: just wait on the last event, once urEventWait is implemented + // by V2 L0 adapter + urQueueFinish(queue); + } else { + urQueueFinish(queue); + } + + size_t ExpectedValue = InitialValue; + for (size_t i = 0; i < numOps; i++) { + ExpectedValue++; + for (uint32_t j = 0; j < ArraySize; ++j) { + ASSERT_EQ(reinterpret_cast(SharedMem[i])[j], + ExpectedValue); + } + } +} + +struct VoidParam {}; +using urEnqueueKernelLaunchIncrementMultiDeviceTest = + urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam; + +// Do a chain of kernelLaunch(dev0) -> memcpy(dev0, dev1) -> kernelLaunch(dev1) ... ops +TEST_F(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) { + size_t returned_size; + ASSERT_SUCCESS(urDeviceGetInfo(devices[0], UR_DEVICE_INFO_EXTENSIONS, 0, + nullptr, &returned_size)); + + std::unique_ptr returned_extensions(new char[returned_size]); + + ASSERT_SUCCESS(urDeviceGetInfo(devices[0], UR_DEVICE_INFO_EXTENSIONS, + returned_size, returned_extensions.get(), + nullptr)); + + std::string_view extensions_string(returned_extensions.get()); + const bool usm_p2p_support = + extensions_string.find(UR_USM_P2P_EXTENSION_STRING_EXP) != + std::string::npos; + + if (!usm_p2p_support) { + GTEST_SKIP() << "EXP usm p2p feature is not supported."; + } + + constexpr size_t global_offset = 0; + constexpr size_t n_dimensions = 1; + + std::vector Events(devices.size() * 2); + for (size_t i = 0; i < devices.size(); i++) { + // Events are: kernelEvent0, memcpyEvent0, kernelEvent1, ... + size_t waitNum = i > 0 ? 1 : 0; + ur_event_handle_t *lastEvent = + i > 0 ? Events[i * 2 - 1].ptr() : nullptr; + ur_event_handle_t *kernelEvent = Events[i * 2].ptr(); + ur_event_handle_t *memcpyEvent = Events[i * 2 + 1].ptr(); + + // execute kernel that increments each element by 1 + ASSERT_SUCCESS(urEnqueueKernelLaunch( + queues[i], kernels[i], n_dimensions, &global_offset, &ArraySize, + nullptr, waitNum, lastEvent, kernelEvent)); + + // copy the memory to next device + if (i < devices.size() - 1) { + ASSERT_SUCCESS(urEnqueueUSMMemcpy( + queues[i], false, SharedMem[i + 1], SharedMem[i], + ArraySize * sizeof(uint32_t), 1, kernelEvent, memcpyEvent)); + } + } + + // synchronize on the last queue only, this has to ensure all the operations + // are completed + urQueueFinish(queues.back()); + + size_t ExpectedValue = InitialValue; + for (size_t i = 0; i < devices.size(); i++) { + ExpectedValue++; + for (uint32_t j = 0; j < ArraySize; ++j) { + ASSERT_EQ(reinterpret_cast(SharedMem[i])[j], + ExpectedValue); + } + } +} + +template +inline std::string +printParams(const testing::TestParamInfo &info) { + std::stringstream ss; + + auto param1 = std::get<0>(info.param); + auto param2 = std::get<1>(info.param); + + ss << (param1.value ? "" : "No") << param1.name; + ss << (param2.value ? "" : "No") << param2.name; + + return ss.str(); +} + +using urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest = + urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam< + std::tuple>; + +INSTANTIATE_TEST_SUITE_P( + , urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, + testing::Combine( + testing::ValuesIn(uur::BoolTestParam::makeBoolParam("UseEvents")), + testing::ValuesIn(uur::BoolTestParam::makeBoolParam("QueuePerThread"))), + printParams); + +// Enqueue kernelLaunch concurrently from multiple threads +// With !queuePerThread this becomes a test on a single device +TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) { + size_t numThreads = devices.size(); + std::vector threads; + + static constexpr size_t numOpsPerThread = 6; + + auto useEvents = std::get<0>(GetParam()).value; + auto queuePerThread = std::get<1>(GetParam()).value; + + for (size_t i = 0; i < numThreads; i++) { + threads.emplace_back([this, i, queuePerThread, useEvents]() { + constexpr size_t global_offset = 0; + constexpr size_t n_dimensions = 1; + + auto queue = queuePerThread ? queues[i] : queues.back(); + auto kernel = kernels[i]; + auto sharedPtr = SharedMem[i]; + + std::vector Events(numOpsPerThread + 1); + for (size_t j = 0; j < numOpsPerThread; j++) { + size_t waitNum = 0; + ur_event_handle_t *lastEvent = nullptr; + ur_event_handle_t *signalEvent = nullptr; + + if (useEvents) { + waitNum = j > 0 ? 1 : 0; + lastEvent = j > 0 ? Events[j - 1].ptr() : nullptr; + signalEvent = Events[j].ptr(); + } + + // execute kernel that increments each element by 1 + ASSERT_SUCCESS(urEnqueueKernelLaunch( + queue, kernel, n_dimensions, &global_offset, &ArraySize, + nullptr, waitNum, lastEvent, signalEvent)); + } + + std::vector data(ArraySize); + + auto lastEvent = + useEvents ? Events[numOpsPerThread - 1].ptr() : nullptr; + auto signalEvent = useEvents ? Events.back().ptr() : nullptr; + ASSERT_SUCCESS( + urEnqueueUSMMemcpy(queue, false, data.data(), sharedPtr, + ArraySize * sizeof(uint32_t), useEvents, + lastEvent, signalEvent)); + + urQueueFinish(queue); + // TODO: when useEvents is implemented for L0 v2 adapter + // wait on event instead + + size_t ExpectedValue = InitialValue; + ExpectedValue += numOpsPerThread; + for (uint32_t j = 0; j < ArraySize; ++j) { + ASSERT_EQ(data[j], ExpectedValue); + } + }); + } + + for (auto &thread : threads) { + thread.join(); + } +} diff --git a/test/conformance/event/event_adapter_level_zero_v2.match b/test/conformance/event/event_adapter_level_zero_v2.match index a9d97d5044..e3f93c54c3 100644 --- a/test/conformance/event/event_adapter_level_zero_v2.match +++ b/test/conformance/event/event_adapter_level_zero_v2.match @@ -1,15 +1,9 @@ - urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_COMMAND_QUEUE urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_CONTEXT urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_COMMAND_TYPE -urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_COMMAND_EXECUTION_STATUS -urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_REFERENCE_COUNT urEventGetInfoNegativeTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventGetInfoNegativeTest.InvalidEnumeration/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventGetInfoNegativeTest.InvalidSizePropSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventGetInfoNegativeTest.InvalidSizePropSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventGetInfoNegativeTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventGetInfoNegativeTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_QUEUED urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_SUBMIT urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_START @@ -17,22 +11,8 @@ urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Z urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_COMPLETE urEventGetProfilingInfoWithTimingComparisonTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventGetProfilingInfoNegativeTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventGetProfilingInfoNegativeTest.InvalidEnumeration/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventGetProfilingInfoNegativeTest.InvalidValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventWaitTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventRetainTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventReleaseTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventReleaseTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventGetNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventGetNativeHandleTest.InvalidNullHandleEvent/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventGetNativeHandleTest.InvalidNullPointerNativeEvent/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventSetCallbackTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventSetCallbackTest.ValidateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventSetCallbackTest.AllStates/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventSetCallbackTest.EventAlreadyCompleted/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventSetCallbackNegativeTest.InvalidNullHandleEvent/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventSetCallbackNegativeTest.InvalidNullPointerCallback/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventSetCallbackNegativeTest.InvalidEnumeration/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}{{Segmentation fault|Aborted}} diff --git a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp index 55e6773cb7..5a80be188f 100644 --- a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp @@ -4,6 +4,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "fixtures.h" +#include // Test that updating a command-buffer with a single kernel command // taking buffer & scalar arguments works correctly. diff --git a/test/conformance/exp_command_buffer/commands.cpp b/test/conformance/exp_command_buffer/commands.cpp index 412e4ab6de..53898ce60a 100644 --- a/test/conformance/exp_command_buffer/commands.cpp +++ b/test/conformance/exp_command_buffer/commands.cpp @@ -4,6 +4,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "fixtures.h" +#include struct urCommandBufferCommandsTest : uur::command_buffer::urCommandBufferExpTest { diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index 85457bea97..9a38772eb7 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -154,42 +154,23 @@ struct urUpdatableCommandBufferExpExecutionTest // Create a command-buffer with update enabled. ur_exp_command_buffer_desc_t desc{ - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true, false, + false}; ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, &updatable_cmd_buf_handle)); ASSERT_NE(updatable_cmd_buf_handle, nullptr); - - // Currently there are synchronization issue with immediate submission when used for command buffers. - // So, create queue with batched submission for this test suite if the backend is Level Zero. - if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { - ur_queue_flags_t flags = UR_QUEUE_FLAG_SUBMISSION_BATCHED; - ur_queue_properties_t props = { - /*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, - /*.pNext =*/nullptr, - /*.flags =*/flags, - }; - ASSERT_SUCCESS(urQueueCreate(context, device, &props, &queue)); - ASSERT_NE(queue, nullptr); - } else { - queue = urCommandBufferExpExecutionTest::queue; - } } void TearDown() override { if (updatable_cmd_buf_handle) { EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); } - if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO && queue) { - ASSERT_SUCCESS(urQueueRelease(queue)); - } - UUR_RETURN_ON_FATAL_FAILURE( urCommandBufferExpExecutionTest::TearDown()); } ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; - ur_queue_handle_t queue = nullptr; }; struct urCommandBufferCommandExpTest diff --git a/test/conformance/exp_command_buffer/ndrange_update.cpp b/test/conformance/exp_command_buffer/ndrange_update.cpp index 3c053fe4b9..5cbfffa9cd 100644 --- a/test/conformance/exp_command_buffer/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/ndrange_update.cpp @@ -4,6 +4,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "fixtures.h" +#include #include // Test that updating a command-buffer with a single kernel command diff --git a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp index 606744cd86..2bf9755c21 100644 --- a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp @@ -4,6 +4,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "fixtures.h" +#include #include // Test that updating a command-buffer with a single kernel command diff --git a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp index 0cb50cb3f1..7f9b5d7f69 100644 --- a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp @@ -4,6 +4,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "fixtures.h" +#include #include // Test that updating a command-buffer with a single kernel command diff --git a/test/conformance/kernel/kernel_adapter_level_zero_v2.match b/test/conformance/kernel/kernel_adapter_level_zero_v2.match index dca885cc4d..074a58720a 100644 --- a/test/conformance/kernel/kernel_adapter_level_zero_v2.match +++ b/test/conformance/kernel/kernel_adapter_level_zero_v2.match @@ -1,85 +1,63 @@ -urKernelGetGroupInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE -urKernelGetGroupInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE -urKernelGetGroupInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE -urKernelGetGroupInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE -urKernelGetGroupInfoSingleTest.CompileWorkGroupSizeEmpty/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoSingleTest.KernelNameCorrect/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelGetInfoSingleTest.KernelContextCorrect/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelGetSubGroupInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE -urKernelGetSubGroupInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL -urKernelGetSubGroupInfoSingleTest.CompileNumSubgroupsIsZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetArgLocalTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetArgLocalTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetArgMemObjTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetArgMemObjTest.InvalidNullHandleKernel/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -{{OPT}}urKernelSetArgPointerTest.SuccessHost/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -{{OPT}}urKernelSetArgPointerTest.SuccessDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -{{OPT}}urKernelSetArgPointerTest.SuccessShared/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTest.SuccessWithProps/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetArgSamplerTest.InvalidNullHandleKernel/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetArgSamplerTest.InvalidNullHandleArgValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetArgValueTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetArgValueTest.InvalidKernelArgumentSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetExecInfoTest.SuccessIndirectAccess/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetExecInfoUSMPointersTest.SuccessHost/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetExecInfoUSMPointersTest.SuccessDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetExecInfoUSMPointersTest.SuccessShared/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urKernelSetExecInfoCacheConfigTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_CACHE_CONFIG_DEFAULT -urKernelSetExecInfoCacheConfigTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_CACHE_CONFIG_LARGE_SLM -urKernelSetExecInfoCacheConfigTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_KERNEL_CACHE_CONFIG_LARGE_DATA +urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_FUNCTION_NAME +urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_NUM_ARGS +urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_REFERENCE_COUNT +urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_CONTEXT +urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_PROGRAM +urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_ATTRIBUTES +urKernelGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_NUM_REGS +urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_FUNCTION_NAME +urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_NUM_ARGS +urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_REFERENCE_COUNT +urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_CONTEXT +urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_PROGRAM +urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_ATTRIBUTES +urKernelGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_NUM_REGS +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_FUNCTION_NAME +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_NUM_ARGS +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_REFERENCE_COUNT +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_CONTEXT +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_PROGRAM +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_ATTRIBUTES +urKernelGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_NUM_REGS +urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_FUNCTION_NAME +urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_NUM_ARGS +urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_REFERENCE_COUNT +urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_CONTEXT +urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_PROGRAM +urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_ATTRIBUTES +urKernelGetInfoTest.InvalidNullPointerPropValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_KERNEL_INFO_NUM_REGS +urKernelGetInfoSingleTest.KernelNameCorrect/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelGetInfoSingleTest.KernelContextCorrect/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgLocalTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_NEAREST +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_LINEAR +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_NEAREST +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_LINEAR +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_NEAREST +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_LINEAR +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_NEAREST +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_LINEAR +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_NEAREST +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_LINEAR +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_NEAREST +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_LINEAR +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST +urKernelSetArgSamplerTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR +urKernelSetArgSamplerTest.SuccessWithProps/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgSamplerTest.InvalidNullHandleKernel/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgSamplerTest.InvalidNullHandleArgValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgValueTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgValueTest.InvalidKernelArgumentSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoTest.SuccessIndirectAccess/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoUSMPointersTest.SuccessHost/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoUSMPointersTest.SuccessDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoUSMPointersTest.SuccessShared/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ diff --git a/test/conformance/kernel/kernel_adapter_native_cpu.match b/test/conformance/kernel/kernel_adapter_native_cpu.match index 4d3b506fcf..6e5db6f70f 100644 --- a/test/conformance/kernel/kernel_adapter_native_cpu.match +++ b/test/conformance/kernel/kernel_adapter_native_cpu.match @@ -104,6 +104,7 @@ urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL urKernelGetSubGroupInfoSingleTest.CompileNumSubgroupsIsZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urKernelReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urKernelReleaseTest.KernelReleaseAfterProgramRelease/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urKernelReleaseTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urKernelRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urKernelRetainTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} diff --git a/test/conformance/kernel/urKernelGetGroupInfo.cpp b/test/conformance/kernel/urKernelGetGroupInfo.cpp index b91001a07f..cd48cdc0c2 100644 --- a/test/conformance/kernel/urKernelGetGroupInfo.cpp +++ b/test/conformance/kernel/urKernelGetGroupInfo.cpp @@ -3,6 +3,7 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include #include using urKernelGetGroupInfoTest = diff --git a/test/conformance/kernel/urKernelRelease.cpp b/test/conformance/kernel/urKernelRelease.cpp index 051c9d4954..3e2078d98c 100644 --- a/test/conformance/kernel/urKernelRelease.cpp +++ b/test/conformance/kernel/urKernelRelease.cpp @@ -13,6 +13,13 @@ TEST_P(urKernelReleaseTest, Success) { ASSERT_SUCCESS(urKernelRelease(kernel)); } +TEST_P(urKernelReleaseTest, KernelReleaseAfterProgramRelease) { + ASSERT_SUCCESS(urKernelRetain(kernel)); + ASSERT_SUCCESS(urProgramRelease(program)); + program = nullptr; + ASSERT_SUCCESS(urKernelRelease(kernel)); +} + TEST_P(urKernelReleaseTest, InvalidNullHandleKernel) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, urKernelRelease(nullptr)); diff --git a/test/conformance/memory/memory_adapter_level_zero_v2.match b/test/conformance/memory/memory_adapter_level_zero_v2.match index 7e92763def..e6639680ed 100644 --- a/test/conformance/memory/memory_adapter_level_zero_v2.match +++ b/test/conformance/memory/memory_adapter_level_zero_v2.match @@ -1,38 +1,12 @@ -urMemBufferCreateWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_READ_WRITE -urMemBufferCreateWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_WRITE_ONLY -urMemBufferCreateWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_READ_ONLY -urMemBufferCreateWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_ALLOC_HOST_POINTER -urMemBufferCreateWithHostPtrFlagsTest.SUCCESS/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER -urMemBufferCreateWithHostPtrFlagsTest.SUCCESS/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_USE_HOST_POINTER -urMemBufferCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urMemBufferPartitionTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemBufferPartitionTest.InvalidNullHandleBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemBufferPartitionTest.InvalidEnumerationFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemBufferPartitionTest.InvalidEnumerationBufferCreateType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemBufferPartitionTest.InvalidNullPointerBufferCreateInfo/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemBufferPartitionTest.InvalidNullPointerMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemBufferPartitionTest.InvalidBufferSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urMemBufferPartitionTest.InvalidValueCreateType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urMemGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE urMemGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT -urMemGetInfoTest.InvalidNullHandleMemory/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE -urMemGetInfoTest.InvalidNullHandleMemory/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT -urMemGetInfoTest.InvalidEnumerationMemInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE -urMemGetInfoTest.InvalidEnumerationMemInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT -urMemGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE -urMemGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT urMemGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE urMemGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT -urMemGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE -urMemGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT -urMemGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE -urMemGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT -urMemGetNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemGetNativeHandleTest.InvalidNullHandleMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemGetNativeHandleTest.InvalidNullPointerNativeMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 @@ -307,7 +281,3 @@ urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runt urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -urMemReleaseTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemReleaseTest.InvalidNullHandleMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemRetainTest.InvalidNullHandleMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ diff --git a/test/conformance/platform/platform_adapter_native_cpu.match b/test/conformance/platform/platform_adapter_native_cpu.match index 257822d30b..b459b89bbe 100644 --- a/test/conformance/platform/platform_adapter_native_cpu.match +++ b/test/conformance/platform/platform_adapter_native_cpu.match @@ -1,6 +1 @@ urPlatformCreateWithNativeHandleTest.InvalidNullPointerPlatform -urPlatfromGetBackendOptionTest.InvalidValueFrontendOption -urPlatfromGetBackendOptionTestWithParam.Success/_O0 -urPlatfromGetBackendOptionTestWithParam.Success/_O1 -urPlatfromGetBackendOptionTestWithParam.Success/_O2 -urPlatfromGetBackendOptionTestWithParam.Success/_O3 diff --git a/test/conformance/program/program_adapter_level_zero_v2.match b/test/conformance/program/program_adapter_level_zero_v2.match index 7c53e24502..70e0a12609 100644 --- a/test/conformance/program/program_adapter_level_zero_v2.match +++ b/test/conformance/program/program_adapter_level_zero_v2.match @@ -1,12 +1,8 @@ -urProgramCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urProgramGetBuildInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_PROGRAM_BUILD_INFO_STATUS -urProgramGetFunctionPointerTest.InvalidKernelName/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urProgramGetNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ +urProgramCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urProgramGetBuildInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROGRAM_BUILD_INFO_STATUS +urProgramGetFunctionPointerTest.InvalidKernelName/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urProgramGetNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ {{OPT}}urProgramLinkErrorTest.LinkFailure/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ {{OPT}}urProgramLinkErrorTest.SetOutputOnLinkError/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramSetSpecializationConstantsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urProgramSetSpecializationConstantsTest.UseDefaultValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urProgramSetMultipleSpecializationConstantsTest.MultipleCalls/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urProgramSetMultipleSpecializationConstantsTest.SingleCall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ diff --git a/test/conformance/program/urProgramCreateWithIL.cpp b/test/conformance/program/urProgramCreateWithIL.cpp index 3d81d14104..7c02c3c7b9 100644 --- a/test/conformance/program/urProgramCreateWithIL.cpp +++ b/test/conformance/program/urProgramCreateWithIL.cpp @@ -37,7 +37,8 @@ TEST_P(urProgramCreateWithILTest, Success) { } TEST_P(urProgramCreateWithILTest, SuccessWithProperties) { - ur_program_properties_t properties{UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES}; + ur_program_properties_t properties{UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES, + nullptr, 0, nullptr}; ur_program_handle_t program = nullptr; ASSERT_SUCCESS(urProgramCreateWithIL( context, il_binary->data(), il_binary->size(), &properties, &program)); diff --git a/test/conformance/queue/queue_adapter_level_zero_v2.match b/test/conformance/queue/queue_adapter_level_zero_v2.match index 524e04fa1d..e69de29bb2 100644 --- a/test/conformance/queue/queue_adapter_level_zero_v2.match +++ b/test/conformance/queue/queue_adapter_level_zero_v2.match @@ -1,2 +0,0 @@ -urQueueFinishTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urQueueFlushTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index e57a31584a..568f700da1 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -513,11 +513,12 @@ struct urMultiQueueTest : urContextTest { ur_queue_handle_t queue2 = nullptr; }; -struct urMultiDeviceContextTest : urPlatformTest { +template +struct urMultiDeviceContextTestTemplate : urPlatformTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urPlatformTest::SetUp()); auto &devices = DevicesEnvironment::instance->devices; - if (devices.size() <= 1) { + if (devices.size() < MinDevices) { GTEST_SKIP(); } ASSERT_SUCCESS(urContextCreate(static_cast(devices.size()), @@ -534,6 +535,10 @@ struct urMultiDeviceContextTest : urPlatformTest { ur_context_handle_t context = nullptr; }; +struct urMultiDeviceContextTest : urMultiDeviceContextTestTemplate<> { + using urMultiDeviceContextTestTemplate::context; +}; + struct urMultiDeviceMemBufferTest : urMultiDeviceContextTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceContextTest::SetUp()); diff --git a/test/conformance/testing/include/uur/utils.h b/test/conformance/testing/include/uur/utils.h index 7e23e55843..a1febf9769 100644 --- a/test/conformance/testing/include/uur/utils.h +++ b/test/conformance/testing/include/uur/utils.h @@ -424,6 +424,65 @@ ur_result_t MakeUSMAllocationByType(ur_usm_type_t type, ur_usm_pool_handle_t hPool, size_t size, void **ppMem); +inline std::tuple +decodeSemVersion(std::string version) { + auto posMajor = version.find('.'); + auto posMinor = version.find('.', posMajor + 1); + auto major = std::stoi(version.substr(0, posMajor)); + auto minor = + std::stoi(version.substr(posMajor + 1, posMinor - posMajor - 1)); + auto patch = std::stoi(version.substr(posMinor + 1)); + return std::make_tuple(major, minor, patch); +} + +inline bool isGivenAdapter(ur_platform_handle_t hPlatform, + std::string adapterName) { + size_t psize; + EXPECT_EQ( + urPlatformGetInfo(hPlatform, UR_PLATFORM_INFO_NAME, 0, nullptr, &psize), + UR_RESULT_SUCCESS); + std::string platform(psize, '\0'); + EXPECT_EQ(urPlatformGetInfo(hPlatform, UR_PLATFORM_INFO_NAME, psize, + platform.data(), nullptr), + UR_RESULT_SUCCESS); + + return platform.find(adapterName) != std::string::npos; +} + +inline std::tuple +getDriverVersion(ur_device_handle_t hDevice) { + size_t driverVersionSize = 0; + EXPECT_EQ(urDeviceGetInfo(hDevice, UR_DEVICE_INFO_DRIVER_VERSION, 0, + nullptr, &driverVersionSize), + UR_RESULT_SUCCESS); + std::string driver(driverVersionSize, '\0'); + EXPECT_EQ(urDeviceGetInfo(hDevice, UR_DEVICE_INFO_DRIVER_VERSION, + driverVersionSize, driver.data(), + &driverVersionSize), + UR_RESULT_SUCCESS); + + return decodeSemVersion(driver); +} + +#define SKIP_IF_DRIVER_TOO_OLD(adapterName, minDriverVersion, hPlatform, \ + hDevice) \ + do { \ + if (uur::isGivenAdapter(hPlatform, adapterName)) { \ + auto [major, minor, patch] = uur::getDriverVersion(hDevice); \ + auto [minMajor, minMinor, minPatch] = minL0DriverVersion; \ + if (major < minMajor || (major == minMajor && minor < minMinor) || \ + (major == minMajor && minor == minMinor && \ + patch < minPatch)) { \ + GTEST_SKIP() \ + << "Skipping test because driver version is too old for " \ + << adapterName << ". " \ + << "Driver version: " << major << "." << minor << "." \ + << patch << " Minimum required version: " << minMajor \ + << "." << minMinor << "." << minPatch; \ + } \ + } \ + } while (0) + } // namespace uur #endif // UR_CONFORMANCE_INCLUDE_UTILS_H_INCLUDED diff --git a/test/conformance/usm/usm_adapter_level_zero_v2.match b/test/conformance/usm/usm_adapter_level_zero_v2.match index 88501eea9d..0908da40da 100644 --- a/test/conformance/usm/usm_adapter_level_zero_v2.match +++ b/test/conformance/usm/usm_adapter_level_zero_v2.match @@ -1,122 +1,7 @@ -urUSMDeviceAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled -urUSMDeviceAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled -urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled -urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_4_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_4_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_4_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_8_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_8_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_8_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_16_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_16_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_16_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_32_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_32_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_32_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_64_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_64_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_64_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_4_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_4_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_4_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_8_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_8_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_8_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_16_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_16_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_16_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_32_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_32_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_32_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_64_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_64_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_64_2048 -urUSMFreeTest.SuccessDeviceAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urUSMFreeTest.SuccessHostAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urUSMFreeTest.SuccessSharedAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urUSMGetMemAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_USM_ALLOC_INFO_TYPE -urUSMGetMemAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_USM_ALLOC_INFO_BASE_PTR -urUSMGetMemAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_USM_ALLOC_INFO_SIZE -urUSMGetMemAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_USM_ALLOC_INFO_DEVICE urUSMGetMemAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_USM_ALLOC_INFO_POOL -urUSMGetMemAllocInfoNegativeTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urUSMGetMemAllocInfoNegativeTest.InvalidNullPointerMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urUSMGetMemAllocInfoNegativeTest.InvalidEnumeration/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urUSMGetMemAllocInfoNegativeTest.InvalidValuePropSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ -urUSMHostAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled -urUSMHostAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled -urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled -urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_4_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_4_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_4_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_8_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_8_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_8_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_16_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_16_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_16_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_32_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_32_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_32_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_64_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_64_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_64_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_4_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_4_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_4_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_8_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_8_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_8_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_16_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_16_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_16_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_32_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_32_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_32_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_64_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_64_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_64_2048 -urUSMSharedAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled -urUSMSharedAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled -urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled -urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_4_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_4_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_4_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_8_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_8_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_8_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_16_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_16_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_16_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_32_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_32_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_32_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_64_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_64_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled_64_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_4_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_4_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_4_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_8_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_8_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_8_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_16_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_16_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_16_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_32_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_32_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_32_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_64_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_64_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled_64_2048 diff --git a/test/layers/tracing/CMakeLists.txt b/test/layers/tracing/CMakeLists.txt index 969e4318b1..c09f2eafb1 100644 --- a/test/layers/tracing/CMakeLists.txt +++ b/test/layers/tracing/CMakeLists.txt @@ -15,9 +15,9 @@ target_link_libraries(test_collector PRIVATE ${TARGET_XPTI}) target_include_directories(test_collector PRIVATE ${xpti_SOURCE_DIR}/include) if(MSVC) - target_compile_definitions(test_collector PRIVATE - XPTI_STATIC_LIBRARY XPTI_CALLBACK_API_EXPORTS) + target_compile_definitions(test_collector PRIVATE XPTI_STATIC_LIBRARY) endif() +target_compile_definitions(test_collector PRIVATE XPTI_CALLBACK_API_EXPORTS) function(set_tracing_test_props target_name collector_name) set_tests_properties(${target_name} PROPERTIES diff --git a/test/loader/adapter_registry/CMakeLists.txt b/test/loader/adapter_registry/CMakeLists.txt index 2778ad5c40..6d80430e6c 100644 --- a/test/loader/adapter_registry/CMakeLists.txt +++ b/test/loader/adapter_registry/CMakeLists.txt @@ -51,3 +51,7 @@ add_adapter_reg_search_test(search-order SEARCH_PATH ${TEST_SEARCH_PATH} ENVS "TEST_ADAPTER_SEARCH_PATH=\"${TEST_SEARCH_PATH}\"" "TEST_CUR_SEARCH_PATH=\"${TEST_BIN_PATH}\"" SOURCES search_order.cpp) + +add_adapter_reg_search_test(prefilter + SEARCH_PATH "" + SOURCES prefilter.cpp) diff --git a/test/loader/adapter_registry/fixtures.hpp b/test/loader/adapter_registry/fixtures.hpp index 79a831d40f..da5c963e8a 100644 --- a/test/loader/adapter_registry/fixtures.hpp +++ b/test/loader/adapter_registry/fixtures.hpp @@ -74,5 +74,49 @@ struct adapterRegSearchTest : ::testing::Test { } } }; +#ifndef _WIN32 +struct adapterPreFilterTest : ::testing::Test { + ur_loader::AdapterRegistry *registry; + const fs::path levelzeroLibName = + MAKE_LIBRARY_NAME("ur_adapter_level_zero", "0"); + std::function islevelzeroLibName = + [this](const fs::path &path) { return path == levelzeroLibName; }; + + std::function &)> haslevelzeroLibName = + [this](const std::vector &paths) { + return std::any_of(paths.cbegin(), paths.cend(), + islevelzeroLibName); + }; + + const fs::path openclLibName = MAKE_LIBRARY_NAME("ur_adapter_opencl", "0"); + std::function isOpenclLibName = + [this](const fs::path &path) { return path == openclLibName; }; + + std::function &)> hasOpenclLibName = + [this](const std::vector &paths) { + return std::any_of(paths.cbegin(), paths.cend(), isOpenclLibName); + }; + + const fs::path cudaLibName = MAKE_LIBRARY_NAME("ur_adapter_cuda", "0"); + std::function isCudaLibName = + [this](const fs::path &path) { return path == cudaLibName; }; + + std::function &)> hasCudaLibName = + [this](const std::vector &paths) { + return std::any_of(paths.cbegin(), paths.cend(), isCudaLibName); + }; + + void SetUp(std::string filter) { + try { + setenv("ONEAPI_DEVICE_SELECTOR", filter.c_str(), 1); + registry = new ur_loader::AdapterRegistry; + } catch (const std::invalid_argument &e) { + FAIL() << e.what(); + } + } + void SetUp() override {} + void TearDown() override { delete registry; } +}; +#endif #endif // UR_ADAPTER_REG_TEST_HELPERS_H diff --git a/test/loader/adapter_registry/prefilter.cpp b/test/loader/adapter_registry/prefilter.cpp new file mode 100644 index 0000000000..1d2b095da3 --- /dev/null +++ b/test/loader/adapter_registry/prefilter.cpp @@ -0,0 +1,140 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.hpp" + +#ifndef _WIN32 + +TEST_F(adapterPreFilterTest, testPrefilterAcceptFilterSingleBackend) { + SetUp("level_zero:*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_FALSE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_FALSE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterAcceptFilterMultipleBackends) { + SetUp("level_zero:*;opencl:*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_TRUE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_FALSE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterDiscardFilterSingleBackend) { + SetUp("!level_zero:*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_FALSE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_TRUE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_TRUE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterDiscardFilterMultipleBackends) { + SetUp("!level_zero:*;!cuda:*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_FALSE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_TRUE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_FALSE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterAcceptAndDiscardFilter) { + SetUp("!cuda:*;level_zero:*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_FALSE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_FALSE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterDiscardFilterAll) { + SetUp("*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_TRUE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_TRUE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterWithInvalidMissingBackend) { + SetUp(":garbage"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_TRUE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_TRUE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterWithInvalidBackend) { + SetUp("garbage:0"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_TRUE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_TRUE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterWithNotAllAndAcceptFilter) { + SetUp("!*;level_zero"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_FALSE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_FALSE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterWithNotAllFilter) { + SetUp("!*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_FALSE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_FALSE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_FALSE(cudaExists); +} + +#endif diff --git a/test/unit/utils/CMakeLists.txt b/test/unit/utils/CMakeLists.txt index a0e0fd3ef7..62681b1032 100644 --- a/test/unit/utils/CMakeLists.txt +++ b/test/unit/utils/CMakeLists.txt @@ -13,3 +13,6 @@ add_unit_test(params add_unit_test(print print.cpp) + +add_unit_test(helpers + helpers.cpp) diff --git a/test/unit/utils/helpers.cpp b/test/unit/utils/helpers.cpp new file mode 100644 index 0000000000..87223b21cc --- /dev/null +++ b/test/unit/utils/helpers.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +#include "ur_util.hpp" + +TEST(groupDigits, Success) { + EXPECT_EQ(groupDigits(-1), "-1"); + EXPECT_EQ(groupDigits(-12), "-12"); + EXPECT_EQ(groupDigits(-123), "-123"); + EXPECT_EQ(groupDigits(-1234), "-1'234"); + EXPECT_EQ(groupDigits(-12345), "-12'345"); + EXPECT_EQ(groupDigits(-123456), "-123'456"); + EXPECT_EQ(groupDigits(-1234567), "-1'234'567"); + EXPECT_EQ(groupDigits(-12345678), "-12'345'678"); + + EXPECT_EQ(groupDigits(0), "0"); + EXPECT_EQ(groupDigits(1), "1"); + EXPECT_EQ(groupDigits(12), "12"); + EXPECT_EQ(groupDigits(123), "123"); + EXPECT_EQ(groupDigits(1234), "1'234"); + EXPECT_EQ(groupDigits(12345), "12'345"); + EXPECT_EQ(groupDigits(123456), "123'456"); + EXPECT_EQ(groupDigits(1234567), "1'234'567"); + EXPECT_EQ(groupDigits(12345678), "12'345'678"); +} diff --git a/test/unit/utils/params.cpp b/test/unit/utils/params.cpp index c456f69795..e86181344c 100644 --- a/test/unit/utils/params.cpp +++ b/test/unit/utils/params.cpp @@ -27,3 +27,17 @@ TEST(PrintPtr, nested_void_ptrs) { ur::details::printPtr(out, pppreal); EXPECT_THAT(out.str(), MatchesRegex(".+ \\(.+ \\(.+ \\(.+\\)\\)\\)")); } + +TEST(PrintBool, False) { + ur_bool_t value = false; + std::ostringstream out; + out << value; + EXPECT_STREQ(out.str().data(), "false"); +} + +TEST(PrintBool, True) { + ur_bool_t value = 1; + std::ostringstream out; + out << value; + EXPECT_STREQ(out.str().data(), "true"); +} diff --git a/third_party/deps.yml b/third_party/deps.yml index 80c2fa109c..69e2222bcf 100644 --- a/third_party/deps.yml +++ b/third_party/deps.yml @@ -25,7 +25,7 @@ dependencies: - libssh2=1.11.0 - libstdcxx-ng=13.1.0 - libuv=1.44.2 - - libzlib=1.2.13 + - libzlib=1.3.1 - llvm-spirv=14.0.0 - llvm-tools=14.0.6 - llvmdev=14.0.6 @@ -35,5 +35,5 @@ dependencies: - rhash=1.4.3 # don't upgrade xz utils due to CVE-2024-3094 - xz=5.2.6 - - zlib=1.2.13 + - zlib=1.3.1 - zstd=1.5.2 diff --git a/third_party/requirements.txt b/third_party/requirements.txt index 330cbd023b..9975d59353 100644 --- a/third_party/requirements.txt +++ b/third_party/requirements.txt @@ -4,7 +4,7 @@ bandit==1.6.2 beautifulsoup4==4.11.1 breathe==4.33.1 bs4==0.0.1 -certifi==2023.07.22 +certifi==2024.07.04 chardet==3.0.4 clang-format==15.0.7 colorama==0.4.1 @@ -22,7 +22,7 @@ pyparsing==2.4.5 pytest>=7.0 pytz==2019.3 PyYAML==6.0.1 -requests==2.31.0 +requests==2.32.2 rst2pdf==0.98 six==1.13.0 snowballstemmer==2.0.0 @@ -37,5 +37,5 @@ sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 sphinxcontrib-websupport==1.2.4 sphinx-rtd-theme==1.0.0 -urllib3==2.1.0 +urllib3==2.2.2 dataclasses-json==0.6.7 diff --git a/tools/urtrace/CMakeLists.txt b/tools/urtrace/CMakeLists.txt index 085f361223..9b385606ea 100644 --- a/tools/urtrace/CMakeLists.txt +++ b/tools/urtrace/CMakeLists.txt @@ -17,9 +17,9 @@ target_link_libraries(${TARGET_NAME} PRIVATE ${TARGET_XPTI} ${PROJECT_NAME}::com target_include_directories(${TARGET_NAME} PRIVATE ${xpti_SOURCE_DIR}/include) if(MSVC) - target_compile_definitions(${TARGET_NAME} PRIVATE - XPTI_STATIC_LIBRARY XPTI_CALLBACK_API_EXPORTS) + target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_STATIC_LIBRARY) endif() +target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_CALLBACK_API_EXPORTS) set(UR_TRACE_CLI_BIN ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/urtrace)