Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Candidate for the v0.11.3 release tag #2526

Merged
merged 14 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

cmake_minimum_required(VERSION 3.20.0 FATAL_ERROR)
project(unified-runtime VERSION 0.11.2)
project(unified-runtime VERSION 0.11.3)

# Check if unified runtime is built as a standalone project.
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR UR_STANDALONE_BUILD)
Expand Down
19 changes: 19 additions & 0 deletions source/adapters/cuda/tensor_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,24 @@

#include "context.hpp"

#if CUDA_VERSION < 12000
UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp(
ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *,
const uint64_t *, const uint64_t *, const int *, const int *, uint32_t,
uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t,
ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t,
ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp(
ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *,
const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *,
ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t,
ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t,
ur_exp_tensor_map_handle_t *) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
#else
struct ur_exp_tensor_map_handle_t_ {
CUtensorMap Map;
};
Expand Down Expand Up @@ -140,3 +158,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp(
}
return UR_RESULT_SUCCESS;
}
#endif
88 changes: 55 additions & 33 deletions source/adapters/level_zero/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,9 @@ namespace {
// given Context and Device.
bool checkImmediateAppendSupport(ur_context_handle_t Context,
ur_device_handle_t Device) {
// TODO The L0 driver is not reporting this extension yet. Once it does,
// switch to using the variable zeDriverImmediateCommandListAppendFound.

// Minimum version that supports zeCommandListImmediateAppendCommandListsExp.
constexpr uint32_t MinDriverVersion = 30898;
bool DriverSupportsImmediateAppend =
Context->getPlatform()->isDriverVersionNewerOrSimilar(1, 3,
MinDriverVersion);
Context->getPlatform()->ZeCommandListImmediateAppendExt.Supported;

// If this environment variable is:
// - Set to 1: the immediate append path will always be enabled as long the
Expand All @@ -58,10 +53,8 @@ bool checkImmediateAppendSupport(ur_context_handle_t Context,
if (EnableAppendPath && !DriverSupportsImmediateAppend) {
logger::error("{} is set but "
"the current driver does not support the "
"zeCommandListImmediateAppendCommandListsExp entrypoint. A "
"driver version of at least {} is required to use the "
"immediate append path.",
AppendEnvVarName, MinDriverVersion);
"zeCommandListImmediateAppendCommandListsExp entrypoint.",
AppendEnvVarName);
std::abort();
}

Expand Down Expand Up @@ -955,41 +948,53 @@ createCommandHandle(ur_exp_command_buffer_handle_t CommandBuffer,

auto Platform = CommandBuffer->Context->getPlatform();
auto ZeDevice = CommandBuffer->Device->ZeDevice;
ze_command_list_handle_t ZeCommandList =
CommandBuffer->ZeComputeCommandListTranslated;
if (Platform->ZeMutableCmdListExt.LoaderExtension) {
ZeCommandList = CommandBuffer->ZeComputeCommandList;
}

if (NumKernelAlternatives > 0) {
ZeMutableCommandDesc.flags |=
ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_INSTRUCTION;

std::vector<ze_kernel_handle_t> TranslatedKernelHandles(
NumKernelAlternatives + 1, nullptr);
std::vector<ze_kernel_handle_t> KernelHandles(NumKernelAlternatives + 1,
nullptr);

ze_kernel_handle_t ZeMainKernel{};
UR_CALL(getZeKernel(ZeDevice, Kernel, &ZeMainKernel));

// Translate main kernel first
ZE2UR_CALL(zelLoaderTranslateHandle,
(ZEL_HANDLE_KERNEL, ZeMainKernel,
(void **)&TranslatedKernelHandles[0]));
if (Platform->ZeMutableCmdListExt.LoaderExtension) {
KernelHandles[0] = ZeMainKernel;
} else {
// If the L0 loader is not aware of the MCL extension, the main kernel
// handle needs to be translated.
ZE2UR_CALL(zelLoaderTranslateHandle,
(ZEL_HANDLE_KERNEL, ZeMainKernel, (void **)&KernelHandles[0]));
}

for (size_t i = 0; i < NumKernelAlternatives; i++) {
ze_kernel_handle_t ZeAltKernel{};
UR_CALL(getZeKernel(ZeDevice, KernelAlternatives[i], &ZeAltKernel));

ZE2UR_CALL(zelLoaderTranslateHandle,
(ZEL_HANDLE_KERNEL, ZeAltKernel,
(void **)&TranslatedKernelHandles[i + 1]));
if (Platform->ZeMutableCmdListExt.LoaderExtension) {
KernelHandles[i + 1] = ZeAltKernel;
} else {
// If the L0 loader is not aware of the MCL extension, the kernel
// alternatives need to be translated.
ZE2UR_CALL(zelLoaderTranslateHandle, (ZEL_HANDLE_KERNEL, ZeAltKernel,
(void **)&KernelHandles[i + 1]));
}
}

ZE2UR_CALL(Platform->ZeMutableCmdListExt
.zexCommandListGetNextCommandIdWithKernelsExp,
(CommandBuffer->ZeComputeCommandListTranslated,
&ZeMutableCommandDesc, NumKernelAlternatives + 1,
TranslatedKernelHandles.data(), &CommandId));
(ZeCommandList, &ZeMutableCommandDesc, NumKernelAlternatives + 1,
KernelHandles.data(), &CommandId));

} else {
ZE2UR_CALL(Platform->ZeMutableCmdListExt.zexCommandListGetNextCommandIdExp,
(CommandBuffer->ZeComputeCommandListTranslated,
&ZeMutableCommandDesc, &CommandId));
(ZeCommandList, &ZeMutableCommandDesc, &CommandId));
}
DEBUG_LOG(CommandId);

Expand Down Expand Up @@ -1568,7 +1573,10 @@ ur_result_t enqueueImmediateAppendPath(
ur_event_handle_t *Event, ur_command_list_ptr_t CommandListHelper,
bool DoProfiling) {

ur_platform_handle_t Platform = CommandBuffer->Context->getPlatform();

assert(CommandListHelper->second.IsImmediate);
assert(Platform->ZeCommandListImmediateAppendExt.Supported);

_ur_ze_event_list_t UrZeEventList;
if (NumEventsInWaitList) {
Expand All @@ -1586,7 +1594,8 @@ ur_result_t enqueueImmediateAppendPath(
nullptr /*ForcedCmdQueue*/));
assert(ZeCopyEngineImmediateListHelper->second.IsImmediate);

ZE2UR_CALL(zeCommandListImmediateAppendCommandListsExp,
ZE2UR_CALL(Platform->ZeCommandListImmediateAppendExt
.zeCommandListImmediateAppendCommandListsExp,
(ZeCopyEngineImmediateListHelper->first, 1,
&CommandBuffer->ZeCopyCommandList, nullptr,
UrZeEventList.Length, UrZeEventList.ZeEventList));
Expand All @@ -1598,7 +1607,8 @@ ur_result_t enqueueImmediateAppendPath(
ze_event_handle_t &EventToSignal =
DoProfiling ? CommandBuffer->ComputeFinishedEvent->ZeEvent
: (*Event)->ZeEvent;
ZE2UR_CALL(zeCommandListImmediateAppendCommandListsExp,
ZE2UR_CALL(Platform->ZeCommandListImmediateAppendExt
.zeCommandListImmediateAppendCommandListsExp,
(CommandListHelper->first, 1, &CommandBuffer->ZeComputeCommandList,
EventToSignal, WaitList.Length, WaitList.ZeEventList));

Expand All @@ -1615,7 +1625,8 @@ ur_result_t enqueueImmediateAppendPath(
(CommandListHelper->first,
CommandBuffer->ExecutionFinishedEvent->ZeEvent, 0, nullptr));

ZE2UR_CALL(zeCommandListImmediateAppendCommandListsExp,
ZE2UR_CALL(Platform->ZeCommandListImmediateAppendExt
.zeCommandListImmediateAppendCommandListsExp,
(CommandListHelper->first, 1,
&CommandBuffer->ZeCommandListResetEvents, nullptr, 0, nullptr));
}
Expand Down Expand Up @@ -1863,17 +1874,22 @@ ur_result_t updateKernelCommand(
ur_kernel_handle_t NewKernel = CommandDesc->hNewKernel;

if (NewKernel && Command->Kernel != NewKernel) {
ze_kernel_handle_t KernelHandle{};
ze_kernel_handle_t ZeNewKernel{};
UR_CALL(getZeKernel(ZeDevice, NewKernel, &ZeNewKernel));

ze_kernel_handle_t ZeKernelTranslated = nullptr;
ZE2UR_CALL(zelLoaderTranslateHandle,
(ZEL_HANDLE_KERNEL, ZeNewKernel, (void **)&ZeKernelTranslated));
ze_command_list_handle_t ZeCommandList =
CommandBuffer->ZeComputeCommandList;
KernelHandle = ZeNewKernel;
if (!Platform->ZeMutableCmdListExt.LoaderExtension) {
ZeCommandList = CommandBuffer->ZeComputeCommandListTranslated;
ZE2UR_CALL(zelLoaderTranslateHandle,
(ZEL_HANDLE_KERNEL, ZeNewKernel, (void **)&KernelHandle));
}

ZE2UR_CALL(Platform->ZeMutableCmdListExt
.zexCommandListUpdateMutableCommandKernelsExp,
(CommandBuffer->ZeComputeCommandListTranslated, 1,
&Command->CommandId, &ZeKernelTranslated));
(ZeCommandList, 1, &Command->CommandId, &KernelHandle));
// Set current kernel to be the new kernel
Command->Kernel = NewKernel;
}
Expand Down Expand Up @@ -2079,9 +2095,15 @@ ur_result_t updateKernelCommand(
MutableCommandDesc.pNext = NextDesc;
MutableCommandDesc.flags = 0;

ze_command_list_handle_t ZeCommandList =
CommandBuffer->ZeComputeCommandListTranslated;
if (Platform->ZeMutableCmdListExt.LoaderExtension) {
ZeCommandList = CommandBuffer->ZeComputeCommandList;
}

ZE2UR_CALL(
Platform->ZeMutableCmdListExt.zexCommandListUpdateMutableCommandsExp,
(CommandBuffer->ZeComputeCommandListTranslated, &MutableCommandDesc));
(ZeCommandList, &MutableCommandDesc));

return UR_RESULT_SUCCESS;
}
Expand Down
18 changes: 11 additions & 7 deletions source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,13 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
if (*ZePool == nullptr) {
ze_event_pool_counter_based_exp_desc_t counterBasedExt = {
ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC, nullptr, 0};

ze_intel_event_sync_mode_exp_desc_t eventSyncMode = {
ZE_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC, nullptr, 0};
eventSyncMode.syncModeFlags =
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT |
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT;

ZeStruct<ze_event_pool_desc_t> ZeEventPoolDesc;
ZeEventPoolDesc.count = MaxNumEventsPerPool;
ZeEventPoolDesc.flags = 0;
Expand All @@ -552,14 +559,11 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
}
logger::debug("ze_event_pool_desc_t counter based flags set to: {}",
counterBasedExt.flags);
if (InterruptBasedEventEnabled) {
counterBasedExt.pNext = &eventSyncMode;
}
ZeEventPoolDesc.pNext = &counterBasedExt;
}
if (InterruptBasedEventEnabled) {
ze_intel_event_sync_mode_exp_desc_t eventSyncMode = {
ZE_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC, nullptr, 0};
eventSyncMode.syncModeFlags =
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT |
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT;
} else if (InterruptBasedEventEnabled) {
ZeEventPoolDesc.pNext = &eventSyncMode;
}

Expand Down
6 changes: 1 addition & 5 deletions source/adapters/level_zero/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,6 @@ ur_result_t urEnqueueEventsWait(
std::unique_lock<ur_shared_mutex> Lock(Queue->Mutex);
resetCommandLists(Queue);
}
if (OutEvent && (*OutEvent)->Completed) {
UR_CALL(CleanupCompletedEvent((*OutEvent), false, false));
UR_CALL(urEventReleaseInternal((*OutEvent)));
}

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -795,7 +791,7 @@ urEventWait(uint32_t NumEvents, ///< [in] number of events in the event list
//
ur_event_handle_t_ *Event = ur_cast<ur_event_handle_t_ *>(e);
if (!Event->hasExternalRefs())
die("urEventsWait must not be called for an internal event");
die("urEventWait must not be called for an internal event");

ze_event_handle_t ZeHostVisibleEvent;
if (auto Res = Event->getOrCreateHostVisibleEvent(ZeHostVisibleEvent))
Expand Down
25 changes: 25 additions & 0 deletions source/adapters/level_zero/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ ur_result_t ur_platform_handle_t_::initialize() {

bool MutableCommandListSpecExtensionSupported = false;
bool ZeIntelExternalSemaphoreExtensionSupported = false;
bool ZeImmediateCommandListAppendExtensionFound = false;
for (auto &extension : ZeExtensions) {
// Check if global offset extension is available
if (strncmp(extension.name, ZE_GLOBAL_OFFSET_EXP_NAME,
Expand All @@ -246,6 +247,14 @@ ur_result_t ur_platform_handle_t_::initialize() {
ZeDriverEventPoolCountingEventsExtensionFound = true;
}
}
// Check if the ImmediateAppendCommandLists extension is available.
if (strncmp(extension.name, ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_NAME,
strlen(ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_NAME) + 1) == 0) {
if (extension.version ==
ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_VERSION_CURRENT) {
ZeImmediateCommandListAppendExtensionFound = true;
}
}
// Check if extension is available for Mutable Command List v1.1.
if (strncmp(extension.name, ZE_MUTABLE_COMMAND_LIST_EXP_NAME,
strlen(ZE_MUTABLE_COMMAND_LIST_EXP_NAME) + 1) == 0) {
Expand Down Expand Up @@ -375,6 +384,7 @@ ur_result_t ur_platform_handle_t_::initialize() {
ZeMutableCmdListExt.Supported |=
ZeMutableCmdListExt.zexCommandListGetNextCommandIdWithKernelsExp !=
nullptr;
ZeMutableCmdListExt.LoaderExtension = true;
} else {
ZeMutableCmdListExt.Supported |=
(ZE_CALL_NOCHECK(
Expand Down Expand Up @@ -425,6 +435,21 @@ ur_result_t ur_platform_handle_t_::initialize() {
&ZeMutableCmdListExt
.zexCommandListGetNextCommandIdWithKernelsExp))) == 0);
}

// Check if ImmediateAppendCommandList is supported and initialize the
// function pointer.
if (ZeImmediateCommandListAppendExtensionFound) {
ZeCommandListImmediateAppendExt
.zeCommandListImmediateAppendCommandListsExp =
(ze_pfnCommandListImmediateAppendCommandListsExp_t)
ur_loader::LibLoader::getFunctionPtr(
GlobalAdapter->processHandle,
"zeCommandListImmediateAppendCommandListsExp");
ZeCommandListImmediateAppendExt.Supported =
ZeCommandListImmediateAppendExt
.zeCommandListImmediateAppendCommandListsExp != nullptr;
}

return UR_RESULT_SUCCESS;
}

Expand Down
15 changes: 14 additions & 1 deletion source/adapters/level_zero/platform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ struct ur_platform_handle_t_ : public _ur_platform {
// associated with particular Level Zero driver, store this extension here.
struct ZeMutableCmdListExtension {
bool Supported = false;
// If LoaderExtension is true, the L0 loader is aware of the MCL extension.
// If it is false, the extension has to be loaded directly from the driver
// using zeDriverGetExtensionFunctionAddress. If it is loaded directly from
// the driver, any handles passed to it must be translated using
// zelLoaderTranslateHandle.
bool LoaderExtension = false;
ze_result_t (*zexCommandListGetNextCommandIdExp)(
ze_command_list_handle_t, const ze_mutable_command_id_exp_desc_t *,
uint64_t *) = nullptr;
Expand Down Expand Up @@ -134,4 +140,11 @@ struct ur_platform_handle_t_ : public _ur_platform {
ze_result_t (*zexDeviceReleaseExternalSemaphoreExp)(
ze_intel_external_semaphore_exp_handle_t);
} ZeExternalSemaphoreExt;
};

struct ZeCommandListImmediateAppendExtension {
bool Supported = false;
ze_result_t (*zeCommandListImmediateAppendCommandListsExp)(
ze_command_list_handle_t, uint32_t, ze_command_list_handle_t *,
ze_event_handle_t, uint32_t, ze_event_handle_t *);
} ZeCommandListImmediateAppendExt;
};
7 changes: 3 additions & 4 deletions source/common/logger/ur_logger.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,15 @@ inline Logger create_logger(std::string logger_name, bool skip_prefix,
logger::Level default_log_level) {
std::transform(logger_name.begin(), logger_name.end(), logger_name.begin(),
::toupper);
std::stringstream env_var_name;
const auto default_flush_level = logger::Level::ERR;
const std::string default_output = "stderr";
auto level = default_log_level;
auto flush_level = default_flush_level;
std::unique_ptr<logger::Sink> sink;

env_var_name << "UR_LOG_" << logger_name;
auto env_var_name = "UR_LOG_" + logger_name;
try {
auto map = getenv_to_map(env_var_name.str().c_str());
auto map = getenv_to_map(env_var_name.c_str());
if (!map.has_value()) {
return Logger(
default_log_level,
Expand Down Expand Up @@ -173,7 +172,7 @@ inline Logger create_logger(std::string logger_name, bool skip_prefix,
skip_linebreak);
} catch (const std::invalid_argument &e) {
std::cerr << "Error when creating a logger instance from the '"
<< env_var_name.str() << "' environment variable:\n"
<< env_var_name << "' environment variable:\n"
<< e.what() << std::endl;
return Logger(default_log_level,
std::make_unique<logger::StderrSink>(
Expand Down
3 changes: 2 additions & 1 deletion source/loader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ if(UR_ENABLE_SANITIZER)
if(NOT EXISTS ${LIBCXX_PATH} OR NOT EXISTS ${LIBCXX_ABI_PATH})
message(FATAL_ERROR "libc++ is required but can't find the libraries")
endif()
target_link_libraries(ur_loader PRIVATE ${LIBCXX_PATH} ${LIBCXX_ABI_PATH})
# Link with gcc_s fisrt to avoid some symbols resolve to libc++/libc++abi/libunwind's one
target_link_libraries(ur_loader PRIVATE gcc_s ${LIBCXX_PATH} ${LIBCXX_ABI_PATH})
endif()
endif()

Expand Down
Loading
Loading